From 1b3c0b80d2bde24920e4f0ceaa4d458b4e94d9c5 Mon Sep 17 00:00:00 2001 From: ianchb Date: Mon, 13 Apr 2026 10:06:40 +0800 Subject: [PATCH] Refactor CUDA step buffers to remove loop-time allocations --- AMSS_NCKU_source/bssn_class.C | 16258 ++++++++++++++-------------- AMSS_NCKU_source/bssn_rhs_cuda.cu | 4272 ++++---- 2 files changed, 10265 insertions(+), 10265 deletions(-) diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index a69786d..7592cea 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -1,44 +1,44 @@ - -#ifdef newc -#include -#include -#include -#include -#include -#include -#include -using namespace std; -#else -#include -#include -#include -#endif - -#include - -#include "macrodef.h" -#include "misc.h" -#include "Ansorg.h" -#include "fmisc.h" -#include "Parallel.h" + +#ifdef newc +#include +#include +#include +#include +#include +#include +#include +using namespace std; +#else +#include +#include +#include +#endif + +#include + +#include "macrodef.h" +#include "misc.h" +#include "Ansorg.h" +#include "fmisc.h" +#include "Parallel.h" #include "bssn_class.h" #include "bssn_rhs.h" #if USE_CUDA_BSSN #include "bssn_rhs_cuda.h" #endif #include "initial_puncture.h" -#include "enforce_algebra.h" -#include "rungekutta4_rout.h" -#include "sommerfeld_rout.h" -#include "getnp4.h" -#include "shellfunctions.h" -#include "parameters.h" - -#ifdef With_AHF -#include "derivatives.h" -#include "myglobal.h" -#endif - +#include "enforce_algebra.h" +#include "rungekutta4_rout.h" +#include "sommerfeld_rout.h" +#include "getnp4.h" +#include "shellfunctions.h" +#include "parameters.h" + +#ifdef With_AHF +#include "derivatives.h" +#include "myglobal.h" +#endif + #include "perf.h" #include "derivatives.h" @@ -122,2190 +122,2190 @@ void bssn_cuda_download_level_state(MyList *PatL, MyList *vars, int #endif //================================================================================================ - -// define bssn_class - -//================================================================================================ - -bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, - double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, - int Symmetryi, int checkruni, char *checkfilenamei, - double numepssi, double numepsbi, double numepshi, - int a_levi, int maxli, int decni, double maxrexi, double drexi) - : Courant(Couranti), StartTime(StartTimei), TotalTime(TotalTimei), - DumpTime(DumpTimei), d2DumpTime(d2DumpTimei), CheckTime(CheckTimei), AnasTime(AnasTimei), - Symmetry(Symmetryi), checkrun(checkruni), numepss(numepssi), numepsb(numepsbi), numepsh(numepshi), -#ifdef With_AHF - xc(0), yc(0), zc(0), xr(0), yr(0), zr(0), trigger(0), dTT(0), dumpid(0), -#endif - a_lev(a_levi), maxl(maxli), decn(decni), maxrex(maxrexi), drex(drexi), - CheckPoint(0) - // CheckPoint(0) -{ - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - // setup Monitors - { - stringstream a_stream; - a_stream.setf(ios::left); - a_stream << "# Error log information"; - ErrorMonitor = new monitor("Error.log", myrank, a_stream.str()); - ErrorMonitor->print_message("Warning: we always assume intput parameter in cell center style."); - - a_stream.clear(); - a_stream.str(""); - a_stream << setw(15) << "# time"; - char str[50]; - for (int pl = 2; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - sprintf(str, "R%02dm%03d", pl, pm); - a_stream << setw(16) << str; - sprintf(str, "I%02dm%03d", pl, pm); - a_stream << setw(16) << str; - } - Psi4Monitor = new monitor("bssn_psi4.dat", myrank, a_stream.str()); - - a_stream.clear(); - a_stream.str(""); - a_stream << setw(15) << "# time"; - BHMonitor = new monitor("bssn_BH.dat", myrank, a_stream.str()); - - a_stream.clear(); - a_stream.str(""); - a_stream << setw(15) << "# time ADMmass ADMPx ADMPy ADMPz ADMSx ADMSy ADMSz"; - MAPMonitor = new monitor("bssn_ADMQs.dat", myrank, a_stream.str()); - - a_stream.clear(); - a_stream.str(""); - a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz"; - ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str()); - } - // setup sphere integration engine - Waveshell = new surface_integral(Symmetry); - - trfls = 0; - chitiny = 0; - // read parameter from file - { - char filename[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(filename, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename - << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "chitiny") - chitiny = atof(sval.c_str()); - else if (sgrp == "BSSN" && skey == "time refinement start from level") - trfls = atoi(sval.c_str()); -#ifdef With_AHF - else if (sgrp == "AHF" && skey == "AHfindevery") - AHfindevery = atoi(sval.c_str()); - else if (sgrp == "AHF" && skey == "AHdumptime") - AHdumptime = atof(sval.c_str()); -#endif - } - inf.close(); - } - if (myrank == 0) - { - // echo information of lower bound of chi - cout << " chitiny = " << chitiny << endl; - cout << " time refinement start from level #" << trfls << endl; -#ifdef With_AHF - cout << " parameters for AHF:" << endl; - cout << " AHfindevery = " << AHfindevery << endl; - cout << " AHdumptime = " << AHdumptime << endl; -#endif - } - - chitiny = chitiny - 1; // because we have subtracted one from chi - - strcpy(checkfilename, checkfilenamei); - - ngfs = 0; - phio = new var("phio", ngfs++, 1, 1, 1); - trKo = new var("trKo", ngfs++, 1, 1, 1); - gxxo = new var("gxxo", ngfs++, 1, 1, 1); - gxyo = new var("gxyo", ngfs++, -1, -1, 1); - gxzo = new var("gxzo", ngfs++, -1, 1, -1); - gyyo = new var("gyyo", ngfs++, 1, 1, 1); - gyzo = new var("gyzo", ngfs++, 1, -1, -1); - gzzo = new var("gzzo", ngfs++, 1, 1, 1); - Axxo = new var("Axxo", ngfs++, 1, 1, 1); - Axyo = new var("Axyo", ngfs++, -1, -1, 1); - Axzo = new var("Axzo", ngfs++, -1, 1, -1); - Ayyo = new var("Ayyo", ngfs++, 1, 1, 1); - Ayzo = new var("Ayzo", ngfs++, 1, -1, -1); - Azzo = new var("Azzo", ngfs++, 1, 1, 1); - Gmxo = new var("Gmxo", ngfs++, -1, 1, 1); - Gmyo = new var("Gmyo", ngfs++, 1, -1, 1); - Gmzo = new var("Gmzo", ngfs++, 1, 1, -1); - Lapo = new var("Lapo", ngfs++, 1, 1, 1); - Sfxo = new var("Sfxo", ngfs++, -1, 1, 1); - Sfyo = new var("Sfyo", ngfs++, 1, -1, 1); - Sfzo = new var("Sfzo", ngfs++, 1, 1, -1); - dtSfxo = new var("dtSfxo", ngfs++, -1, 1, 1); - dtSfyo = new var("dtSfyo", ngfs++, 1, -1, 1); - dtSfzo = new var("dtSfzo", ngfs++, 1, 1, -1); - - phi0 = new var("phi0", ngfs++, 1, 1, 1); - trK0 = new var("trK0", ngfs++, 1, 1, 1); - gxx0 = new var("gxx0", ngfs++, 1, 1, 1); - gxy0 = new var("gxy0", ngfs++, -1, -1, 1); - gxz0 = new var("gxz0", ngfs++, -1, 1, -1); - gyy0 = new var("gyy0", ngfs++, 1, 1, 1); - gyz0 = new var("gyz0", ngfs++, 1, -1, -1); - gzz0 = new var("gzz0", ngfs++, 1, 1, 1); - Axx0 = new var("Axx0", ngfs++, 1, 1, 1); - Axy0 = new var("Axy0", ngfs++, -1, -1, 1); - Axz0 = new var("Axz0", ngfs++, -1, 1, -1); - Ayy0 = new var("Ayy0", ngfs++, 1, 1, 1); - Ayz0 = new var("Ayz0", ngfs++, 1, -1, -1); - Azz0 = new var("Azz0", ngfs++, 1, 1, 1); - Gmx0 = new var("Gmx0", ngfs++, -1, 1, 1); - Gmy0 = new var("Gmy0", ngfs++, 1, -1, 1); - Gmz0 = new var("Gmz0", ngfs++, 1, 1, -1); - Lap0 = new var("Lap0", ngfs++, 1, 1, 1); - Sfx0 = new var("Sfx0", ngfs++, -1, 1, 1); - Sfy0 = new var("Sfy0", ngfs++, 1, -1, 1); - Sfz0 = new var("Sfz0", ngfs++, 1, 1, -1); - dtSfx0 = new var("dtSfx0", ngfs++, -1, 1, 1); - dtSfy0 = new var("dtSfy0", ngfs++, 1, -1, 1); - dtSfz0 = new var("dtSfz0", ngfs++, 1, 1, -1); - - phi = new var("phi", ngfs++, 1, 1, 1); - trK = new var("trK", ngfs++, 1, 1, 1); - gxx = new var("gxx", ngfs++, 1, 1, 1); - gxy = new var("gxy", ngfs++, -1, -1, 1); - gxz = new var("gxz", ngfs++, -1, 1, -1); - gyy = new var("gyy", ngfs++, 1, 1, 1); - gyz = new var("gyz", ngfs++, 1, -1, -1); - gzz = new var("gzz", ngfs++, 1, 1, 1); - Axx = new var("Axx", ngfs++, 1, 1, 1); - Axy = new var("Axy", ngfs++, -1, -1, 1); - Axz = new var("Axz", ngfs++, -1, 1, -1); - Ayy = new var("Ayy", ngfs++, 1, 1, 1); - Ayz = new var("Ayz", ngfs++, 1, -1, -1); - Azz = new var("Azz", ngfs++, 1, 1, 1); - Gmx = new var("Gmx", ngfs++, -1, 1, 1); - Gmy = new var("Gmy", ngfs++, 1, -1, 1); - Gmz = new var("Gmz", ngfs++, 1, 1, -1); - Lap = new var("Lap", ngfs++, 1, 1, 1); - Sfx = new var("Sfx", ngfs++, -1, 1, 1); - Sfy = new var("Sfy", ngfs++, 1, -1, 1); - Sfz = new var("Sfz", ngfs++, 1, 1, -1); - dtSfx = new var("dtSfx", ngfs++, -1, 1, 1); - dtSfy = new var("dtSfy", ngfs++, 1, -1, 1); - dtSfz = new var("dtSfz", ngfs++, 1, 1, -1); - - phi1 = new var("phi1", ngfs++, 1, 1, 1); - trK1 = new var("trK1", ngfs++, 1, 1, 1); - gxx1 = new var("gxx1", ngfs++, 1, 1, 1); - gxy1 = new var("gxy1", ngfs++, -1, -1, 1); - gxz1 = new var("gxz1", ngfs++, -1, 1, -1); - gyy1 = new var("gyy1", ngfs++, 1, 1, 1); - gyz1 = new var("gyz1", ngfs++, 1, -1, -1); - gzz1 = new var("gzz1", ngfs++, 1, 1, 1); - Axx1 = new var("Axx1", ngfs++, 1, 1, 1); - Axy1 = new var("Axy1", ngfs++, -1, -1, 1); - Axz1 = new var("Axz1", ngfs++, -1, 1, -1); - Ayy1 = new var("Ayy1", ngfs++, 1, 1, 1); - Ayz1 = new var("Ayz1", ngfs++, 1, -1, -1); - Azz1 = new var("Azz1", ngfs++, 1, 1, 1); - Gmx1 = new var("Gmx1", ngfs++, -1, 1, 1); - Gmy1 = new var("Gmy1", ngfs++, 1, -1, 1); - Gmz1 = new var("Gmz1", ngfs++, 1, 1, -1); - Lap1 = new var("Lap1", ngfs++, 1, 1, 1); - Sfx1 = new var("Sfx1", ngfs++, -1, 1, 1); - Sfy1 = new var("Sfy1", ngfs++, 1, -1, 1); - Sfz1 = new var("Sfz1", ngfs++, 1, 1, -1); - dtSfx1 = new var("dtSfx1", ngfs++, -1, 1, 1); - dtSfy1 = new var("dtSfy1", ngfs++, 1, -1, 1); - dtSfz1 = new var("dtSfz1", ngfs++, 1, 1, -1); - - phi_rhs = new var("phi_rhs", ngfs++, 1, 1, 1); - trK_rhs = new var("trK_rhs", ngfs++, 1, 1, 1); - gxx_rhs = new var("gxx_rhs", ngfs++, 1, 1, 1); - gxy_rhs = new var("gxy_rhs", ngfs++, -1, -1, 1); - gxz_rhs = new var("gxz_rhs", ngfs++, -1, 1, -1); - gyy_rhs = new var("gyy_rhs", ngfs++, 1, 1, 1); - gyz_rhs = new var("gyz_rhs", ngfs++, 1, -1, -1); - gzz_rhs = new var("gzz_rhs", ngfs++, 1, 1, 1); - Axx_rhs = new var("Axx_rhs", ngfs++, 1, 1, 1); - Axy_rhs = new var("Axy_rhs", ngfs++, -1, -1, 1); - Axz_rhs = new var("Axz_rhs", ngfs++, -1, 1, -1); - Ayy_rhs = new var("Ayy_rhs", ngfs++, 1, 1, 1); - Ayz_rhs = new var("Ayz_rhs", ngfs++, 1, -1, -1); - Azz_rhs = new var("Azz_rhs", ngfs++, 1, 1, 1); - Gmx_rhs = new var("Gmx_rhs", ngfs++, -1, 1, 1); - Gmy_rhs = new var("Gmy_rhs", ngfs++, 1, -1, 1); - Gmz_rhs = new var("Gmz_rhs", ngfs++, 1, 1, -1); - Lap_rhs = new var("Lap_rhs", ngfs++, 1, 1, 1); - Sfx_rhs = new var("Sfx_rhs", ngfs++, -1, 1, 1); - Sfy_rhs = new var("Sfy_rhs", ngfs++, 1, -1, 1); - Sfz_rhs = new var("Sfz_rhs", ngfs++, 1, 1, -1); - dtSfx_rhs = new var("dtSfx_rhs", ngfs++, -1, 1, 1); - dtSfy_rhs = new var("dtSfy_rhs", ngfs++, 1, -1, 1); - dtSfz_rhs = new var("dtSfz_rhs", ngfs++, 1, 1, -1); - - rho = new var("rho", ngfs++, 1, 1, 1); - Sx = new var("Sx", ngfs++, -1, 1, 1); - Sy = new var("Sy", ngfs++, 1, -1, 1); - Sz = new var("Sz", ngfs++, 1, 1, -1); - Sxx = new var("Sxx", ngfs++, 1, 1, 1); - Sxy = new var("Sxy", ngfs++, -1, -1, 1); - Sxz = new var("Sxz", ngfs++, -1, 1, -1); - Syy = new var("Syy", ngfs++, 1, 1, 1); - Syz = new var("Syz", ngfs++, 1, -1, -1); - Szz = new var("Szz", ngfs++, 1, 1, 1); - - Gamxxx = new var("Gamxxx", ngfs++, -1, 1, 1); - Gamxxy = new var("Gamxxy", ngfs++, 1, -1, 1); - Gamxxz = new var("Gamxxz", ngfs++, 1, 1, -1); - Gamxyy = new var("Gamxyy", ngfs++, -1, 1, 1); - Gamxyz = new var("Gamxyz", ngfs++, -1, -1, -1); - Gamxzz = new var("Gamxzz", ngfs++, -1, 1, 1); - Gamyxx = new var("Gamyxx", ngfs++, 1, -1, 1); - Gamyxy = new var("Gamyxy", ngfs++, -1, 1, 1); - Gamyxz = new var("Gamyxz", ngfs++, -1, -1, -1); - Gamyyy = new var("Gamyyy", ngfs++, 1, -1, 1); - Gamyyz = new var("Gamyyz", ngfs++, 1, 1, -1); - Gamyzz = new var("Gamyzz", ngfs++, 1, -1, 1); - Gamzxx = new var("Gamzxx", ngfs++, 1, 1, -1); - Gamzxy = new var("Gamzxy", ngfs++, -1, -1, -1); - Gamzxz = new var("Gamzxz", ngfs++, -1, 1, 1); - Gamzyy = new var("Gamzyy", ngfs++, 1, 1, -1); - Gamzyz = new var("Gamzyz", ngfs++, 1, -1, 1); - Gamzzz = new var("Gamzzz", ngfs++, 1, 1, -1); - - Rxx = new var("Rxx", ngfs++, 1, 1, 1); - Rxy = new var("Rxy", ngfs++, -1, -1, 1); - Rxz = new var("Rxz", ngfs++, -1, 1, -1); - Ryy = new var("Ryy", ngfs++, 1, 1, 1); - Ryz = new var("Ryz", ngfs++, 1, -1, -1); - Rzz = new var("Rzz", ngfs++, 1, 1, 1); - - // refer to PRD, 77, 024027 (2008) - Rpsi4 = new var("Rpsi4", ngfs++, 1, 1, 1); - Ipsi4 = new var("Ipsi4", ngfs++, -1, -1, -1); - t1Rpsi4 = new var("t1Rpsi4", ngfs++, 1, 1, 1); - t1Ipsi4 = new var("t1Ipsi4", ngfs++, -1, -1, -1); - t2Rpsi4 = new var("t2Rpsi4", ngfs++, 1, 1, 1); - t2Ipsi4 = new var("t2Ipsi4", ngfs++, -1, -1, -1); - - // constraint violation monitor variables - Cons_Ham = new var("Cons_Ham", ngfs++, 1, 1, 1); - Cons_Px = new var("Cons_Px", ngfs++, -1, 1, 1); - Cons_Py = new var("Cons_Py", ngfs++, 1, -1, 1); - Cons_Pz = new var("Cons_Pz", ngfs++, 1, 1, -1); - Cons_Gx = new var("Cons_Gx", ngfs++, -1, 1, 1); - Cons_Gy = new var("Cons_Gy", ngfs++, 1, -1, 1); - Cons_Gz = new var("Cons_Gz", ngfs++, 1, 1, -1); - -#ifdef Point_Psi4 - phix = new var("phix", ngfs++, -1, 1, 1); - phiy = new var("phiy", ngfs++, 1, -1, 1); - phiz = new var("phiz", ngfs++, 1, 1, -1); - trKx = new var("trKx", ngfs++, -1, 1, 1); - trKy = new var("trKy", ngfs++, 1, -1, 1); - trKz = new var("trKz", ngfs++, 1, 1, -1); - Axxx = new var("Axxx", ngfs++, -1, 1, 1); - Axxy = new var("Axxy", ngfs++, 1, -1, 1); - Axxz = new var("Axxz", ngfs++, 1, 1, -1); - Axyx = new var("Axyx", ngfs++, 1, -1, 1); - Axyy = new var("Axyy", ngfs++, -1, 1, 1); - Axyz = new var("Axyz", ngfs++, -1, -1, -1); - Axzx = new var("Axzx", ngfs++, 1, 1, -1); - Axzy = new var("Axzy", ngfs++, -1, -1, -1); - Axzz = new var("Axzz", ngfs++, -1, 1, 1); - Ayyx = new var("Ayyx", ngfs++, -1, 1, 1); - Ayyy = new var("Ayyy", ngfs++, 1, -1, 1); - Ayyz = new var("Ayyz", ngfs++, 1, 1, -1); - Ayzx = new var("Ayzx", ngfs++, -1, -1, -1); - Ayzy = new var("Ayzy", ngfs++, 1, 1, -1); - Ayzz = new var("Ayzz", ngfs++, 1, -1, 1); - Azzx = new var("Azzx", ngfs++, -1, 1, 1); - Azzy = new var("Azzy", ngfs++, 1, -1, 1); - Azzz = new var("Azzz", ngfs++, 1, 1, -1); -#endif - - // specific properspeed for 1+log slice - { - const double vl = sqrt(2); - trKo->setpropspeed(vl); - trK0->setpropspeed(vl); - trK->setpropspeed(vl); - trK1->setpropspeed(vl); - trK_rhs->setpropspeed(vl); - - phio->setpropspeed(vl); - phi0->setpropspeed(vl); - phi->setpropspeed(vl); - phi1->setpropspeed(vl); - phi_rhs->setpropspeed(vl); - - Lapo->setpropspeed(vl); - Lap0->setpropspeed(vl); - Lap->setpropspeed(vl); - Lap1->setpropspeed(vl); - Lap_rhs->setpropspeed(vl); - } - - OldStateList = new MyList(phio); - OldStateList->insert(trKo); - OldStateList->insert(gxxo); - OldStateList->insert(gxyo); - OldStateList->insert(gxzo); - OldStateList->insert(gyyo); - OldStateList->insert(gyzo); - OldStateList->insert(gzzo); - OldStateList->insert(Axxo); - OldStateList->insert(Axyo); - OldStateList->insert(Axzo); - OldStateList->insert(Ayyo); - OldStateList->insert(Ayzo); - OldStateList->insert(Azzo); - OldStateList->insert(Gmxo); - OldStateList->insert(Gmyo); - OldStateList->insert(Gmzo); - OldStateList->insert(Lapo); - OldStateList->insert(Sfxo); - OldStateList->insert(Sfyo); - OldStateList->insert(Sfzo); - OldStateList->insert(dtSfxo); - OldStateList->insert(dtSfyo); - OldStateList->insert(dtSfzo); - - StateList = new MyList(phi0); - StateList->insert(trK0); - StateList->insert(gxx0); - StateList->insert(gxy0); - StateList->insert(gxz0); - StateList->insert(gyy0); - StateList->insert(gyz0); - StateList->insert(gzz0); - StateList->insert(Axx0); - StateList->insert(Axy0); - StateList->insert(Axz0); - StateList->insert(Ayy0); - StateList->insert(Ayz0); - StateList->insert(Azz0); - StateList->insert(Gmx0); - StateList->insert(Gmy0); - StateList->insert(Gmz0); - StateList->insert(Lap0); - StateList->insert(Sfx0); - StateList->insert(Sfy0); - StateList->insert(Sfz0); - StateList->insert(dtSfx0); - StateList->insert(dtSfy0); - StateList->insert(dtSfz0); - - RHSList = new MyList(phi_rhs); - RHSList->insert(trK_rhs); - RHSList->insert(gxx_rhs); - RHSList->insert(gxy_rhs); - RHSList->insert(gxz_rhs); - RHSList->insert(gyy_rhs); - RHSList->insert(gyz_rhs); - RHSList->insert(gzz_rhs); - RHSList->insert(Axx_rhs); - RHSList->insert(Axy_rhs); - RHSList->insert(Axz_rhs); - RHSList->insert(Ayy_rhs); - RHSList->insert(Ayz_rhs); - RHSList->insert(Azz_rhs); - RHSList->insert(Gmx_rhs); - RHSList->insert(Gmy_rhs); - RHSList->insert(Gmz_rhs); - RHSList->insert(Lap_rhs); - RHSList->insert(Sfx_rhs); - RHSList->insert(Sfy_rhs); - RHSList->insert(Sfz_rhs); - RHSList->insert(dtSfx_rhs); - RHSList->insert(dtSfy_rhs); - RHSList->insert(dtSfz_rhs); - - SynchList_pre = new MyList(phi); - SynchList_pre->insert(trK); - SynchList_pre->insert(gxx); - SynchList_pre->insert(gxy); - SynchList_pre->insert(gxz); - SynchList_pre->insert(gyy); - SynchList_pre->insert(gyz); - SynchList_pre->insert(gzz); - SynchList_pre->insert(Axx); - SynchList_pre->insert(Axy); - SynchList_pre->insert(Axz); - SynchList_pre->insert(Ayy); - SynchList_pre->insert(Ayz); - SynchList_pre->insert(Azz); - SynchList_pre->insert(Gmx); - SynchList_pre->insert(Gmy); - SynchList_pre->insert(Gmz); - SynchList_pre->insert(Lap); - SynchList_pre->insert(Sfx); - SynchList_pre->insert(Sfy); - SynchList_pre->insert(Sfz); - SynchList_pre->insert(dtSfx); - SynchList_pre->insert(dtSfy); - SynchList_pre->insert(dtSfz); - - SynchList_cor = new MyList(phi1); - SynchList_cor->insert(trK1); - SynchList_cor->insert(gxx1); - SynchList_cor->insert(gxy1); - SynchList_cor->insert(gxz1); - SynchList_cor->insert(gyy1); - SynchList_cor->insert(gyz1); - SynchList_cor->insert(gzz1); - SynchList_cor->insert(Axx1); - SynchList_cor->insert(Axy1); - SynchList_cor->insert(Axz1); - SynchList_cor->insert(Ayy1); - SynchList_cor->insert(Ayz1); - SynchList_cor->insert(Azz1); - SynchList_cor->insert(Gmx1); - SynchList_cor->insert(Gmy1); - SynchList_cor->insert(Gmz1); - SynchList_cor->insert(Lap1); - SynchList_cor->insert(Sfx1); - SynchList_cor->insert(Sfy1); - SynchList_cor->insert(Sfz1); - SynchList_cor->insert(dtSfx1); - SynchList_cor->insert(dtSfy1); - SynchList_cor->insert(dtSfz1); - - DumpList = new MyList(phi0); - DumpList->insert(trK0); - DumpList->insert(gxx0); - DumpList->insert(gxy0); - DumpList->insert(gxz0); - DumpList->insert(gyy0); - DumpList->insert(gyz0); - DumpList->insert(gzz0); - // DumpList->insert(Axx0); - // DumpList->insert(Axy0); - // DumpList->insert(Axz0); - // DumpList->insert(Ayy0); - // DumpList->insert(Ayz0); - // DumpList->insert(Azz0); - // DumpList->insert(Gmx0); - // DumpList->insert(Gmy0); - // DumpList->insert(Gmz0); - DumpList->insert(Lap0); - // DumpList->insert(Sfx0); - // DumpList->insert(Sfy0); - // DumpList->insert(Sfz0); - // DumpList->insert(dtSfx0); - // DumpList->insert(dtSfy0); - // DumpList->insert(dtSfz0); - // DumpList->insert(Rpsi4); - // DumpList->insert(Ipsi4); - DumpList->insert(Cons_Ham); - DumpList->insert(Cons_Px); - DumpList->insert(Cons_Py); - DumpList->insert(Cons_Pz); - // DumpList->insert(Cons_Gx); - // DumpList->insert(Cons_Gy); - // DumpList->insert(Cons_Gz); - - ConstraintList = new MyList(Cons_Ham); - ConstraintList->insert(Cons_Px); - ConstraintList->insert(Cons_Py); - ConstraintList->insert(Cons_Pz); - ConstraintList->insert(Cons_Gx); - ConstraintList->insert(Cons_Gy); - ConstraintList->insert(Cons_Gz); -#ifdef With_AHF - // setup kinds of var list - // List for AparentHorizonFinderDirect - // special attension is payed to symmetry type - // gij gij,x gij,y gij,z - AHList = new MyList(gxx0); - AHList->insert(Gamxxx); - AHList->insert(Gamyxx); - AHList->insert(Gamzxx); - AHList->insert(gxy0); - AHList->insert(Gamxxy); - AHList->insert(Gamyxy); - AHList->insert(Gamzxy); - AHList->insert(gxz0); - AHList->insert(Gamxxz); - AHList->insert(Gamyxz); - AHList->insert(Gamzxz); - AHList->insert(gyy0); - AHList->insert(Gamxyy); - AHList->insert(Gamyyy); - AHList->insert(Gamzyy); - AHList->insert(gyz0); - AHList->insert(Gamxyz); - AHList->insert(Gamyyz); - AHList->insert(Gamzyz); - AHList->insert(gzz0); - AHList->insert(Gamxzz); - AHList->insert(Gamyzz); - AHList->insert(Gamzzz); - // phi phi,x phi,y phi,z - AHList->insert(phi0); - AHList->insert(dtSfx_rhs); - AHList->insert(dtSfy_rhs); - AHList->insert(dtSfz_rhs); - // Aij - AHList->insert(Axx0); - AHList->insert(Axy0); - AHList->insert(Axz0); - AHList->insert(Ayy0); - AHList->insert(Ayz0); - AHList->insert(Azz0); - // trK - AHList->insert(trK0); - // gij,x gij,y gij,z - AHDList = new MyList(Gamxxx); - AHDList->insert(Gamyxx); - AHDList->insert(Gamzxx); - AHDList->insert(Gamxxy); - AHDList->insert(Gamyxy); - AHDList->insert(Gamzxy); - AHDList->insert(Gamxxz); - AHDList->insert(Gamyxz); - AHDList->insert(Gamzxz); - AHDList->insert(Gamxyy); - AHDList->insert(Gamyyy); - AHDList->insert(Gamzyy); - AHDList->insert(Gamxyz); - AHDList->insert(Gamyyz); - AHDList->insert(Gamzyz); - AHDList->insert(Gamxzz); - AHDList->insert(Gamyzz); - AHDList->insert(Gamzzz); - // phi,x phi,y phi,z - AHDList->insert(dtSfx_rhs); - AHDList->insert(dtSfy_rhs); - AHDList->insert(dtSfz_rhs); - - GaugeList = new MyList(Lap0); - GaugeList->insert(Sfx0); - GaugeList->insert(Sfy0); - GaugeList->insert(Sfz0); -#endif - - - - // Note: the first checkpoint-class variable is `bool` while the local variable is `int`; - // an explicit conversion may be required in some contexts. - // bool checkrun00 = checkrun; - // Note: the second checkpoint-class variable is `const char*` while the local variable is `char*`; - // an explicit conversion may be required. - // const char* checkfilename00 = checkfilename; - - CheckPoint = new checkpoint(checkrun, checkfilename, myrank); - - if (myrank==0) { - cout << " BSSN class successfully created " << endl; - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function initializes the class - -//================================================================================================ - -void bssn_class::Initialize() -{ - if (myrank == 0) - cout << " you have setted " << ngfs << " grid functions." << endl; - - CheckPoint->addvariablelist(StateList); - CheckPoint->addvariablelist(OldStateList); - - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor); - if (checkrun) - CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry); - else - GH->compose_cgh(nprocs); -#ifdef WithShell - SH = new ShellPatch(0, ngfs, pname, Symmetry, myrank, ErrorMonitor); - SH->matchcheck(GH->PatL[0]); - SH->compose_sh(nprocs); - // SH->compose_shr(nprocs); //sh is faster than shr - SH->setupcordtrans(); - SH->Dump_xyz(0, 0, 1); - SH->setupintintstuff(nprocs, GH->PatL[0], Symmetry); - - if (checkrun) - CheckPoint->readcheck_sh(SH, myrank); -#else - SH = 0; -#endif - - double h = GH->PatL[0]->data->blb->data->getdX(0); - for (int i = 1; i < dim; i++) - h = Mymin(h, GH->PatL[0]->data->blb->data->getdX(i)); - dT = Courant * h; - - if (checkrun) - { - CheckPoint->read_Black_Hole_position(BH_num_input, BH_num, Porg0, Pmom, Spin, Mass, Porgbr, Porg, Porg1, Porg_rhs); - setpbh(BH_num, Porg0, Mass, BH_num_input); - } - else - { - PhysTime = StartTime; - Setup_Black_Hole_position(); - } - - // Initialize sync caches (per-level, for predictor and corrector) - sync_cache_pre = new Parallel::SyncCache[GH->levels]; - sync_cache_cor = new Parallel::SyncCache[GH->levels]; - sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels]; - sync_cache_rp_fine = new Parallel::SyncCache[GH->levels]; - sync_cache_restrict = new Parallel::SyncCache[GH->levels]; - sync_cache_outbd = new Parallel::SyncCache[GH->levels]; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function is the destructor; it releases allocated variables - -//================================================================================================ - -bssn_class::~bssn_class() -{ -#ifdef With_AHF - AHList->clearList(); - AHDList->clearList(); - GaugeList->clearList(); - if (lastahdumpid) - delete[] lastahdumpid; - if (findeveryl) - delete[] findeveryl; - - if (xc) - { - delete[] xc; - delete[] yc; - delete[] zc; - delete[] xr; - delete[] yr; - delete[] zr; - delete[] trigger; - delete[] dumpid; - delete[] dTT; - } - - AHFinderDirect::AHFinderDirect_cleanup(); -#endif - - StateList->clearList(); - RHSList->clearList(); - OldStateList->clearList(); - SynchList_pre->clearList(); - SynchList_cor->clearList(); - DumpList->clearList(); - ConstraintList->clearList(); - - delete phio; - delete trKo; - delete gxxo; - delete gxyo; - delete gxzo; - delete gyyo; - delete gyzo; - delete gzzo; - delete Axxo; - delete Axyo; - delete Axzo; - delete Ayyo; - delete Ayzo; - delete Azzo; - delete Gmxo; - delete Gmyo; - delete Gmzo; - delete Lapo; - delete Sfxo; - delete Sfyo; - delete Sfzo; - delete dtSfxo; - delete dtSfyo; - delete dtSfzo; - - delete phi0; - delete trK0; - delete gxx0; - delete gxy0; - delete gxz0; - delete gyy0; - delete gyz0; - delete gzz0; - delete Axx0; - delete Axy0; - delete Axz0; - delete Ayy0; - delete Ayz0; - delete Azz0; - delete Gmx0; - delete Gmy0; - delete Gmz0; - delete Lap0; - delete Sfx0; - delete Sfy0; - delete Sfz0; - delete dtSfx0; - delete dtSfy0; - delete dtSfz0; - - delete phi; - delete trK; - delete gxx; - delete gxy; - delete gxz; - delete gyy; - delete gyz; - delete gzz; - delete Axx; - delete Axy; - delete Axz; - delete Ayy; - delete Ayz; - delete Azz; - delete Gmx; - delete Gmy; - delete Gmz; - delete Lap; - delete Sfx; - delete Sfy; - delete Sfz; - delete dtSfx; - delete dtSfy; - delete dtSfz; - - delete phi1; - delete trK1; - delete gxx1; - delete gxy1; - delete gxz1; - delete gyy1; - delete gyz1; - delete gzz1; - delete Axx1; - delete Axy1; - delete Axz1; - delete Ayy1; - delete Ayz1; - delete Azz1; - delete Gmx1; - delete Gmy1; - delete Gmz1; - delete Lap1; - delete Sfx1; - delete Sfy1; - delete Sfz1; - delete dtSfx1; - delete dtSfy1; - delete dtSfz1; - - delete phi_rhs; - delete trK_rhs; - delete gxx_rhs; - delete gxy_rhs; - delete gxz_rhs; - delete gyy_rhs; - delete gyz_rhs; - delete gzz_rhs; - delete Axx_rhs; - delete Axy_rhs; - delete Axz_rhs; - delete Ayy_rhs; - delete Ayz_rhs; - delete Azz_rhs; - delete Gmx_rhs; - delete Gmy_rhs; - delete Gmz_rhs; - delete Lap_rhs; - delete Sfx_rhs; - delete Sfy_rhs; - delete Sfz_rhs; - delete dtSfx_rhs; - delete dtSfy_rhs; - delete dtSfz_rhs; - - delete rho; - delete Sx; - delete Sy; - delete Sz; - delete Sxx; - delete Sxy; - delete Sxz; - delete Syy; - delete Syz; - delete Szz; - - delete Gamxxx; - delete Gamxxy; - delete Gamxxz; - delete Gamxyy; - delete Gamxyz; - delete Gamxzz; - delete Gamyxx; - delete Gamyxy; - delete Gamyxz; - delete Gamyyy; - delete Gamyyz; - delete Gamyzz; - delete Gamzxx; - delete Gamzxy; - delete Gamzxz; - delete Gamzyy; - delete Gamzyz; - delete Gamzzz; - - delete Rxx; - delete Rxy; - delete Rxz; - delete Ryy; - delete Ryz; - delete Rzz; - - delete Rpsi4; - delete Ipsi4; - delete t1Rpsi4; - delete t1Ipsi4; - delete t2Rpsi4; - delete t2Ipsi4; - - delete Cons_Ham; - delete Cons_Px; - delete Cons_Py; - delete Cons_Pz; - delete Cons_Gx; - delete Cons_Gy; - delete Cons_Gz; - -#ifdef Point_Psi4 - delete phix; - delete phiy; - delete phiz; - delete trKx; - delete trKy; - delete trKz; - delete Axxx; - delete Axxy; - delete Axxz; - delete Axyx; - delete Axyy; - delete Axyz; - delete Axzx; - delete Axzy; - delete Axzz; - delete Ayyx; - delete Ayyy; - delete Ayyz; - delete Ayzx; - delete Ayzy; - delete Ayzz; - delete Azzx; - delete Azzy; - delete Azzz; -#endif - - // Destroy sync caches before GH - if (sync_cache_pre) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_pre[i].destroy(); - delete[] sync_cache_pre; - } - if (sync_cache_cor) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_cor[i].destroy(); - delete[] sync_cache_cor; - } - if (sync_cache_rp_coarse) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_rp_coarse[i].destroy(); - delete[] sync_cache_rp_coarse; - } - if (sync_cache_rp_fine) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_rp_fine[i].destroy(); - delete[] sync_cache_rp_fine; - } - - delete GH; -#ifdef WithShell - delete SH; -#endif - - for (int i = 0; i < BH_num; i++) - { - delete[] Porg0[i]; - delete[] Porgbr[i]; - delete[] Porg[i]; - delete[] Porg1[i]; - delete[] Porg_rhs[i]; - } - - delete[] Porg0; - delete[] Porgbr; - delete[] Porg; - delete[] Porg1; - delete[] Porg_rhs; - - delete[] Mass; - delete[] Spin; - delete[] Pmom; - - delete ErrorMonitor; - delete Psi4Monitor; - delete BHMonitor; - delete MAPMonitor; - delete ConVMonitor; - delete Waveshell; - - delete CheckPoint; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes initial data using Lousto's analytic method - -//================================================================================================ - -void bssn_class::Setup_Initial_Data_Lousto() -{ - if (!checkrun) - { - if (myrank == 0) - { - cout << endl; - cout << " Setup initial data with Lousto's analytical formula. " << endl; - cout << endl; - } - char filename[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(filename, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - int BH_NM; - double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - { - BH_NM = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - - Porg_here = new double[3 * BH_NM]; - Pmom_here = new double[3 * BH_NM]; - Spin_here = new double[3 * BH_NM]; - Mass_here = new double[BH_NM]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass_here[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom_here[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); - } - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - // Use Lousto's analytic formulas to compute initial data - f_get_lousto_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } - // dump read_in initial data - for (int lev = 0; lev < GH->levels; lev++) - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT); -#ifdef WithShell - // ShellPatch part - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_get_initial_nbhs_sh(cg->shape, - cg->fgfs[Pp->data->fngfs + ShellPatch::gx], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - // dump read_in initial data - SH->Dump_Data(StateList, 0, PhysTime, dT); -#endif - - delete[] Porg_here; - delete[] Mass_here; - delete[] Pmom_here; - delete[] Spin_here; - // SH->Synch(GH->PatL[0],StateList,Symmetry); - // exit(0); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes initial data using Cao's analytic formulas - -//================================================================================================ - -void bssn_class::Setup_Initial_Data_Cao() -{ - if (!checkrun) - { - if (myrank == 0) - { - cout << endl; - cout << " Setup initial data with Cao's analytical formula. " << endl; - cout << endl; - } - char filename[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(filename, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - int BH_NM; - double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - { - BH_NM = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - - Porg_here = new double[3 * BH_NM]; - Pmom_here = new double[3 * BH_NM]; - Spin_here = new double[3 * BH_NM]; - Mass_here = new double[BH_NM]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass_here[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom_here[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); - } - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - // Use Cao's analytic formulas to compute initial data - f_get_initial_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } - // dump read_in initial data - for (int lev = 0; lev < GH->levels; lev++) - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT); -#ifdef WithShell - // ShellPatch part - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_get_initial_nbhs_sh(cg->shape, - cg->fgfs[Pp->data->fngfs + ShellPatch::gx], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - // dump read_in initial data - SH->Dump_Data(StateList, 0, PhysTime, dT); -#endif - - delete[] Porg_here; - delete[] Mass_here; - delete[] Pmom_here; - delete[] Spin_here; - // SH->Synch(GH->PatL[0],StateList,Symmetry); - // exit(0); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes Kerr-Schild initial data via an analytic method - -//================================================================================================ - -void bssn_class::Setup_KerrSchild() -{ - if (!checkrun) - { - if (myrank == 0) - { - cout << endl; - cout << " Setup initial data with Kerr-Schild formula. " << endl; - cout << endl; - } - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_get_initial_kerrschild(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn]); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } -#ifdef WithShell - // ShellPatch part - MyList *Pp = SH->PatL; - while (Pp) - { - int lev = 0, fngfs = Pp->data->fngfs; - - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_get_initial_kerrschild_ss(cg->shape, - cg->fgfs[Pp->data->fngfs + ShellPatch::gx], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn]); - /* - f_fderivs_shc(cg->shape,cg->fgfs[phi0->sgfn], - cg->fgfs[Sfx_rhs->sgfn], - cg->fgfs[Sfy_rhs->sgfn], - cg->fgfs[Sfz_rhs->sgfn], - cg->X[0],cg->X[1],cg->X[2], - phi0->SoA[0],phi0->SoA[1],phi0->SoA[2], - Symmetry,lev,Pp->data->sst, - cg->fgfs[fngfs+ShellPatch::drhodx], - cg->fgfs[fngfs+ShellPatch::drhody], - cg->fgfs[fngfs+ShellPatch::drhodz], - cg->fgfs[fngfs+ShellPatch::dsigmadx], - cg->fgfs[fngfs+ShellPatch::dsigmady], - cg->fgfs[fngfs+ShellPatch::dsigmadz], - cg->fgfs[fngfs+ShellPatch::dRdx], - cg->fgfs[fngfs+ShellPatch::dRdy], - cg->fgfs[fngfs+ShellPatch::dRdz]); - f_fdderivs_shc(cg->shape,cg->fgfs[phi0->sgfn], - cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn], - cg->X[0],cg->X[1],cg->X[2], - phi0->SoA[0],phi0->SoA[1],phi0->SoA[2], - Symmetry,lev,Pp->data->sst, - cg->fgfs[fngfs+ShellPatch::drhodx], - cg->fgfs[fngfs+ShellPatch::drhody], - cg->fgfs[fngfs+ShellPatch::drhodz], - cg->fgfs[fngfs+ShellPatch::dsigmadx], - cg->fgfs[fngfs+ShellPatch::dsigmady], - cg->fgfs[fngfs+ShellPatch::dsigmadz], - cg->fgfs[fngfs+ShellPatch::dRdx], - cg->fgfs[fngfs+ShellPatch::dRdy], - cg->fgfs[fngfs+ShellPatch::dRdz], - cg->fgfs[fngfs+ShellPatch::drhodxx], - cg->fgfs[fngfs+ShellPatch::drhodxy], - cg->fgfs[fngfs+ShellPatch::drhodxz], - cg->fgfs[fngfs+ShellPatch::drhodyy], - cg->fgfs[fngfs+ShellPatch::drhodyz], - cg->fgfs[fngfs+ShellPatch::drhodzz], - cg->fgfs[fngfs+ShellPatch::dsigmadxx], - cg->fgfs[fngfs+ShellPatch::dsigmadxy], - cg->fgfs[fngfs+ShellPatch::dsigmadxz], - cg->fgfs[fngfs+ShellPatch::dsigmadyy], - cg->fgfs[fngfs+ShellPatch::dsigmadyz], - cg->fgfs[fngfs+ShellPatch::dsigmadzz], - cg->fgfs[fngfs+ShellPatch::dRdxx], - cg->fgfs[fngfs+ShellPatch::dRdxy], - cg->fgfs[fngfs+ShellPatch::dRdxz], - cg->fgfs[fngfs+ShellPatch::dRdyy], - cg->fgfs[fngfs+ShellPatch::dRdyz], - cg->fgfs[fngfs+ShellPatch::dRdzz]); - */ - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } -#endif - - // dump read_in initial data - // SH->Synch(GH->PatL[0],StateList,Symmetry); - // for(int lev=0;levlevels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT); - // SH->Dump_Data(StateList,0,PhysTime,dT); - // exit(0); - - /* - { - MyList * DG_List=new MyList(Sfx_rhs); - DG_List->insert(Sfy_rhs); - DG_List->insert(Sfz_rhs); - DG_List->insert(Axx_rhs); - DG_List->insert(Axy_rhs); - DG_List->insert(Axz_rhs); - DG_List->insert(Ayy_rhs); - DG_List->insert(Ayz_rhs); - DG_List->insert(Azz_rhs); - SH->Synch(DG_List,Symmetry); - SH->Dump_Data(DG_List,0,PhysTime,dT); - DG_List->clearList(); - exit(0); - } - */ - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function reads initial data produced by Pablo Galaviz's Olliptic program - -//================================================================================================ - -// Read initial data solved by Pablo's Olliptic Phys.Rev.D 82 024005 (2010) - -//|---------------------------------------------------------------------------- -// read ASCII file with the style of Pablo -//|---------------------------------------------------------------------------- -bool bssn_class::read_Pablo_file(int *ext, double *datain, char *filename) -{ - if (myrank == 0) - { - cout << endl; - cout << " Setup initial data with Pablo_file. " << endl; - cout << endl; - } - - int nx = ext[0], ny = ext[1], nz = ext[2]; - int i, j, k; - double x, y, z; - //|--->open in put file - ifstream infile; - infile.open(filename); - if (!infile) - { - cout << "bssn_class: read_Pablo_file can't open " << filename << " for input." << endl; - return false; - } - for (k = 0; k < nz; k++) - for (j = 0; j < ny; j++) - for (i = 0; i < nx; i++) - { - infile >> x >> y >> z >> datain[i + j * nx + k * nx * ny]; - } - - infile.close(); - - return true; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function writes initial data file in the style of Pablo Galaviz's Olliptic program - -//================================================================================================ - -//|---------------------------------------------------------------------------- -// write ASCII file with the style of Pablo -//|---------------------------------------------------------------------------- -void bssn_class::write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, - char *filename) -{ - int nx = ext[0], ny = ext[1], nz = ext[2]; - int i, j, k; - double *X, *Y, *Z; - X = new double[nx]; - Y = new double[ny]; - Z = new double[nz]; - double dX, dY, dZ; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - dX = (xmax - xmin) / (nx - 1); - for (i = 0; i < nx; i++) - X[i] = xmin + i * dX; - dY = (ymax - ymin) / (ny - 1); - for (j = 0; j < ny; j++) - Y[j] = ymin + j * dY; - dZ = (zmax - zmin) / (nz - 1); - for (k = 0; k < nz; k++) - Z[k] = zmin + k * dZ; -#else -#ifdef Cell - dX = (xmax - xmin) / nx; - for (i = 0; i < nx; i++) - X[i] = xmin + (i + 0.5) * dX; - dY = (ymax - ymin) / ny; - for (j = 0; j < ny; j++) - Y[j] = ymin + (j + 0.5) * dY; - dZ = (zmax - zmin) / nz; - for (k = 0; k < nz; k++) - Z[k] = zmin + (k + 0.5) * dZ; -#else -#error Not define Vertex nor Cell -#endif -#endif - //|--->open out put file - ofstream outfile; - outfile.open(filename); - if (!outfile) - { - cout << "bssn_class: write_Pablo_file can't open " << filename << " for output." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - outfile.setf(ios::scientific, ios::floatfield); - outfile.precision(16); - for (k = 0; k < nz; k++) - for (j = 0; j < ny; j++) - for (i = 0; i < nx; i++) - { - outfile << X[i] << " " << Y[j] << " " << Z[k] << " " - << 0 << endl; - } - outfile.close(); - - delete[] X; - delete[] Y; - delete[] Z; -} - -//================================================================================================ - - - -//================================================================================================ - -// Read initial data solved by Ansorg, PRD 70, 064011 (2004) - -void bssn_class::Read_Ansorg() -{ - if (!checkrun) - { - if (myrank == 0) - { - cout << endl; - cout << " Read initial data from Ansorg's solver," - << " please be sure the input parameters for black holes are puncture parameters!! " << endl; - cout << endl; - } - char filename[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(filename, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - int BH_NM; - double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - { - BH_NM = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - - Porg_here = new double[3 * BH_NM]; - Pmom_here = new double[3 * BH_NM]; - Spin_here = new double[3 * BH_NM]; - Mass_here = new double[BH_NM]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass_here[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom_here[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); - } - - int order = 6; - Ansorg read_ansorg("Ansorg.psid", order); - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - for (int k = 0; k < cg->shape[2]; k++) - for (int j = 0; j < cg->shape[1]; j++) - for (int i = 0; i < cg->shape[0]; i++) - cg->fgfs[phi0->sgfn][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]] = - read_ansorg.ps_u_at_xyz(cg->X[0][i], cg->X[1][j], cg->X[2][k]); - - f_get_ansorg_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } -#ifdef WithShell - // ShellPatch part - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - for (int k = 0; k < cg->shape[2]; k++) - for (int j = 0; j < cg->shape[1]; j++) - for (int i = 0; i < cg->shape[0]; i++) - cg->fgfs[phi0->sgfn][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]] = - read_ansorg.ps_u_at_xyz(cg->fgfs[Pp->data->fngfs + ShellPatch::gx][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]]); - - f_get_ansorg_nbhs_ss(cg->shape, - cg->fgfs[Pp->data->fngfs + ShellPatch::gx], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); -#if 0 -// for check fderivs_sh - f_fderivs_sh(cg->shape,cg->fgfs[Ayz0->sgfn], - cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn], - cg->X[0],cg->X[1],cg->X[2], - Ayz0->SoA[0],Ayz0->SoA[1],Ayz0->SoA[2], - Symmetry,Pp->data->sst,Pp->data->sst); -#endif -#if 0 -// for check fderivs_shc - int fngfs = Pp->data->fngfs; - f_fderivs_shc(cg->shape,cg->fgfs[Ayz0->sgfn], - cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn], - cg->X[0],cg->X[1],cg->X[2], - Ayz0->SoA[0],Ayz0->SoA[1],Ayz0->SoA[2], - Symmetry,Pp->data->sst,Pp->data->sst, - cg->fgfs[fngfs+ShellPatch::drhodx], - cg->fgfs[fngfs+ShellPatch::drhody], - cg->fgfs[fngfs+ShellPatch::drhodz], - cg->fgfs[fngfs+ShellPatch::dsigmadx], - cg->fgfs[fngfs+ShellPatch::dsigmady], - cg->fgfs[fngfs+ShellPatch::dsigmadz], - cg->fgfs[fngfs+ShellPatch::dRdx], - cg->fgfs[fngfs+ShellPatch::dRdy], - cg->fgfs[fngfs+ShellPatch::dRdz]); -#endif - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } -#endif - - delete[] Porg_here; - delete[] Mass_here; - delete[] Pmom_here; - delete[] Spin_here; - - Compute_Constraint(); - // dump read_in initial data - for (int lev = 0; lev < GH->levels; lev++) - Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT); -#ifdef WithShell - SH->Dump_Data(DumpList, 0, PhysTime, dT); -#endif - // if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function sets up the time evolution for the entire process - -//================================================================================================ - -void bssn_class::Evolve(int Steps) -{ - clock_t prev_clock, curr_clock; - double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0; - LastAnas = 0; -#if 0 -//initial checkpoint for special uasge - { - CheckPoint->write_Black_Hole_position(BH_num_input,BH_num,Porg0,Porgbr,Mass); - CheckPoint->writecheck_cgh(PhysTime,GH); -#ifdef WithShell - CheckPoint->writecheck_sh(PhysTime,SH); -#endif - CheckPoint->write_bssn(LastDump,Last2dDump,LastAnas); - misc::tillherecheck("complete initialization preparation"); // we need synchronization here - if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); - } -#endif - // for step 0 constraint interpolation - Interp_Constraint(true); - -#ifdef With_AHF - // setup apparent horizon finder direct of thornburg - { - HN_num = BH_num; - for (int ia = 0; ia < BH_num; ia++) - for (int ib = ia + 1; ib < BH_num; ib++) - HN_num++; - - AHFinderDirect::AHFinderDirect_setup(AHList, GaugeList, - this, - Symmetry, HN_num, &PhysTime); - - lastahdumpid = new int[HN_num]; - findeveryl = new int[HN_num]; - xc = new double[HN_num]; - yc = new double[HN_num]; - zc = new double[HN_num]; - xr = new double[HN_num]; - yr = new double[HN_num]; - zr = new double[HN_num]; - dTT = new double[HN_num]; - trigger = new bool[HN_num]; - dumpid = new int[HN_num]; - - for (int ihn = 0; ihn < HN_num; ihn++) - { - lastahdumpid[ihn] = 0; - findeveryl[ihn] = AHfindevery; - } - } -#endif - - if (checkrun) - CheckPoint->read_bssn(LastDump, Last2dDump, LastAnas); - - double dT_mon = dT * pow(0.5, Mymax(0, trfls)); - - /* - #ifdef With_AHF - //initial apparent horizon finding - { - double gam; - double massmin=Mass[0]; - for(int ihn=1;ihnprint_message("Warning: we always assume intput parameter in cell center style."); + + a_stream.clear(); + a_stream.str(""); + a_stream << setw(15) << "# time"; + char str[50]; + for (int pl = 2; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + sprintf(str, "R%02dm%03d", pl, pm); + a_stream << setw(16) << str; + sprintf(str, "I%02dm%03d", pl, pm); + a_stream << setw(16) << str; + } + Psi4Monitor = new monitor("bssn_psi4.dat", myrank, a_stream.str()); + + a_stream.clear(); + a_stream.str(""); + a_stream << setw(15) << "# time"; + BHMonitor = new monitor("bssn_BH.dat", myrank, a_stream.str()); + + a_stream.clear(); + a_stream.str(""); + a_stream << setw(15) << "# time ADMmass ADMPx ADMPy ADMPz ADMSx ADMSy ADMSz"; + MAPMonitor = new monitor("bssn_ADMQs.dat", myrank, a_stream.str()); + + a_stream.clear(); + a_stream.str(""); + a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz"; + ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str()); + } + // setup sphere integration engine + Waveshell = new surface_integral(Symmetry); + + trfls = 0; + chitiny = 0; + // read parameter from file + { + char filename[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(filename, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename + << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && skey == "chitiny") + chitiny = atof(sval.c_str()); + else if (sgrp == "BSSN" && skey == "time refinement start from level") + trfls = atoi(sval.c_str()); +#ifdef With_AHF + else if (sgrp == "AHF" && skey == "AHfindevery") + AHfindevery = atoi(sval.c_str()); + else if (sgrp == "AHF" && skey == "AHdumptime") + AHdumptime = atof(sval.c_str()); +#endif + } + inf.close(); + } + if (myrank == 0) + { + // echo information of lower bound of chi + cout << " chitiny = " << chitiny << endl; + cout << " time refinement start from level #" << trfls << endl; +#ifdef With_AHF + cout << " parameters for AHF:" << endl; + cout << " AHfindevery = " << AHfindevery << endl; + cout << " AHdumptime = " << AHdumptime << endl; +#endif + } + + chitiny = chitiny - 1; // because we have subtracted one from chi + + strcpy(checkfilename, checkfilenamei); + + ngfs = 0; + phio = new var("phio", ngfs++, 1, 1, 1); + trKo = new var("trKo", ngfs++, 1, 1, 1); + gxxo = new var("gxxo", ngfs++, 1, 1, 1); + gxyo = new var("gxyo", ngfs++, -1, -1, 1); + gxzo = new var("gxzo", ngfs++, -1, 1, -1); + gyyo = new var("gyyo", ngfs++, 1, 1, 1); + gyzo = new var("gyzo", ngfs++, 1, -1, -1); + gzzo = new var("gzzo", ngfs++, 1, 1, 1); + Axxo = new var("Axxo", ngfs++, 1, 1, 1); + Axyo = new var("Axyo", ngfs++, -1, -1, 1); + Axzo = new var("Axzo", ngfs++, -1, 1, -1); + Ayyo = new var("Ayyo", ngfs++, 1, 1, 1); + Ayzo = new var("Ayzo", ngfs++, 1, -1, -1); + Azzo = new var("Azzo", ngfs++, 1, 1, 1); + Gmxo = new var("Gmxo", ngfs++, -1, 1, 1); + Gmyo = new var("Gmyo", ngfs++, 1, -1, 1); + Gmzo = new var("Gmzo", ngfs++, 1, 1, -1); + Lapo = new var("Lapo", ngfs++, 1, 1, 1); + Sfxo = new var("Sfxo", ngfs++, -1, 1, 1); + Sfyo = new var("Sfyo", ngfs++, 1, -1, 1); + Sfzo = new var("Sfzo", ngfs++, 1, 1, -1); + dtSfxo = new var("dtSfxo", ngfs++, -1, 1, 1); + dtSfyo = new var("dtSfyo", ngfs++, 1, -1, 1); + dtSfzo = new var("dtSfzo", ngfs++, 1, 1, -1); + + phi0 = new var("phi0", ngfs++, 1, 1, 1); + trK0 = new var("trK0", ngfs++, 1, 1, 1); + gxx0 = new var("gxx0", ngfs++, 1, 1, 1); + gxy0 = new var("gxy0", ngfs++, -1, -1, 1); + gxz0 = new var("gxz0", ngfs++, -1, 1, -1); + gyy0 = new var("gyy0", ngfs++, 1, 1, 1); + gyz0 = new var("gyz0", ngfs++, 1, -1, -1); + gzz0 = new var("gzz0", ngfs++, 1, 1, 1); + Axx0 = new var("Axx0", ngfs++, 1, 1, 1); + Axy0 = new var("Axy0", ngfs++, -1, -1, 1); + Axz0 = new var("Axz0", ngfs++, -1, 1, -1); + Ayy0 = new var("Ayy0", ngfs++, 1, 1, 1); + Ayz0 = new var("Ayz0", ngfs++, 1, -1, -1); + Azz0 = new var("Azz0", ngfs++, 1, 1, 1); + Gmx0 = new var("Gmx0", ngfs++, -1, 1, 1); + Gmy0 = new var("Gmy0", ngfs++, 1, -1, 1); + Gmz0 = new var("Gmz0", ngfs++, 1, 1, -1); + Lap0 = new var("Lap0", ngfs++, 1, 1, 1); + Sfx0 = new var("Sfx0", ngfs++, -1, 1, 1); + Sfy0 = new var("Sfy0", ngfs++, 1, -1, 1); + Sfz0 = new var("Sfz0", ngfs++, 1, 1, -1); + dtSfx0 = new var("dtSfx0", ngfs++, -1, 1, 1); + dtSfy0 = new var("dtSfy0", ngfs++, 1, -1, 1); + dtSfz0 = new var("dtSfz0", ngfs++, 1, 1, -1); + + phi = new var("phi", ngfs++, 1, 1, 1); + trK = new var("trK", ngfs++, 1, 1, 1); + gxx = new var("gxx", ngfs++, 1, 1, 1); + gxy = new var("gxy", ngfs++, -1, -1, 1); + gxz = new var("gxz", ngfs++, -1, 1, -1); + gyy = new var("gyy", ngfs++, 1, 1, 1); + gyz = new var("gyz", ngfs++, 1, -1, -1); + gzz = new var("gzz", ngfs++, 1, 1, 1); + Axx = new var("Axx", ngfs++, 1, 1, 1); + Axy = new var("Axy", ngfs++, -1, -1, 1); + Axz = new var("Axz", ngfs++, -1, 1, -1); + Ayy = new var("Ayy", ngfs++, 1, 1, 1); + Ayz = new var("Ayz", ngfs++, 1, -1, -1); + Azz = new var("Azz", ngfs++, 1, 1, 1); + Gmx = new var("Gmx", ngfs++, -1, 1, 1); + Gmy = new var("Gmy", ngfs++, 1, -1, 1); + Gmz = new var("Gmz", ngfs++, 1, 1, -1); + Lap = new var("Lap", ngfs++, 1, 1, 1); + Sfx = new var("Sfx", ngfs++, -1, 1, 1); + Sfy = new var("Sfy", ngfs++, 1, -1, 1); + Sfz = new var("Sfz", ngfs++, 1, 1, -1); + dtSfx = new var("dtSfx", ngfs++, -1, 1, 1); + dtSfy = new var("dtSfy", ngfs++, 1, -1, 1); + dtSfz = new var("dtSfz", ngfs++, 1, 1, -1); + + phi1 = new var("phi1", ngfs++, 1, 1, 1); + trK1 = new var("trK1", ngfs++, 1, 1, 1); + gxx1 = new var("gxx1", ngfs++, 1, 1, 1); + gxy1 = new var("gxy1", ngfs++, -1, -1, 1); + gxz1 = new var("gxz1", ngfs++, -1, 1, -1); + gyy1 = new var("gyy1", ngfs++, 1, 1, 1); + gyz1 = new var("gyz1", ngfs++, 1, -1, -1); + gzz1 = new var("gzz1", ngfs++, 1, 1, 1); + Axx1 = new var("Axx1", ngfs++, 1, 1, 1); + Axy1 = new var("Axy1", ngfs++, -1, -1, 1); + Axz1 = new var("Axz1", ngfs++, -1, 1, -1); + Ayy1 = new var("Ayy1", ngfs++, 1, 1, 1); + Ayz1 = new var("Ayz1", ngfs++, 1, -1, -1); + Azz1 = new var("Azz1", ngfs++, 1, 1, 1); + Gmx1 = new var("Gmx1", ngfs++, -1, 1, 1); + Gmy1 = new var("Gmy1", ngfs++, 1, -1, 1); + Gmz1 = new var("Gmz1", ngfs++, 1, 1, -1); + Lap1 = new var("Lap1", ngfs++, 1, 1, 1); + Sfx1 = new var("Sfx1", ngfs++, -1, 1, 1); + Sfy1 = new var("Sfy1", ngfs++, 1, -1, 1); + Sfz1 = new var("Sfz1", ngfs++, 1, 1, -1); + dtSfx1 = new var("dtSfx1", ngfs++, -1, 1, 1); + dtSfy1 = new var("dtSfy1", ngfs++, 1, -1, 1); + dtSfz1 = new var("dtSfz1", ngfs++, 1, 1, -1); + + phi_rhs = new var("phi_rhs", ngfs++, 1, 1, 1); + trK_rhs = new var("trK_rhs", ngfs++, 1, 1, 1); + gxx_rhs = new var("gxx_rhs", ngfs++, 1, 1, 1); + gxy_rhs = new var("gxy_rhs", ngfs++, -1, -1, 1); + gxz_rhs = new var("gxz_rhs", ngfs++, -1, 1, -1); + gyy_rhs = new var("gyy_rhs", ngfs++, 1, 1, 1); + gyz_rhs = new var("gyz_rhs", ngfs++, 1, -1, -1); + gzz_rhs = new var("gzz_rhs", ngfs++, 1, 1, 1); + Axx_rhs = new var("Axx_rhs", ngfs++, 1, 1, 1); + Axy_rhs = new var("Axy_rhs", ngfs++, -1, -1, 1); + Axz_rhs = new var("Axz_rhs", ngfs++, -1, 1, -1); + Ayy_rhs = new var("Ayy_rhs", ngfs++, 1, 1, 1); + Ayz_rhs = new var("Ayz_rhs", ngfs++, 1, -1, -1); + Azz_rhs = new var("Azz_rhs", ngfs++, 1, 1, 1); + Gmx_rhs = new var("Gmx_rhs", ngfs++, -1, 1, 1); + Gmy_rhs = new var("Gmy_rhs", ngfs++, 1, -1, 1); + Gmz_rhs = new var("Gmz_rhs", ngfs++, 1, 1, -1); + Lap_rhs = new var("Lap_rhs", ngfs++, 1, 1, 1); + Sfx_rhs = new var("Sfx_rhs", ngfs++, -1, 1, 1); + Sfy_rhs = new var("Sfy_rhs", ngfs++, 1, -1, 1); + Sfz_rhs = new var("Sfz_rhs", ngfs++, 1, 1, -1); + dtSfx_rhs = new var("dtSfx_rhs", ngfs++, -1, 1, 1); + dtSfy_rhs = new var("dtSfy_rhs", ngfs++, 1, -1, 1); + dtSfz_rhs = new var("dtSfz_rhs", ngfs++, 1, 1, -1); + + rho = new var("rho", ngfs++, 1, 1, 1); + Sx = new var("Sx", ngfs++, -1, 1, 1); + Sy = new var("Sy", ngfs++, 1, -1, 1); + Sz = new var("Sz", ngfs++, 1, 1, -1); + Sxx = new var("Sxx", ngfs++, 1, 1, 1); + Sxy = new var("Sxy", ngfs++, -1, -1, 1); + Sxz = new var("Sxz", ngfs++, -1, 1, -1); + Syy = new var("Syy", ngfs++, 1, 1, 1); + Syz = new var("Syz", ngfs++, 1, -1, -1); + Szz = new var("Szz", ngfs++, 1, 1, 1); + + Gamxxx = new var("Gamxxx", ngfs++, -1, 1, 1); + Gamxxy = new var("Gamxxy", ngfs++, 1, -1, 1); + Gamxxz = new var("Gamxxz", ngfs++, 1, 1, -1); + Gamxyy = new var("Gamxyy", ngfs++, -1, 1, 1); + Gamxyz = new var("Gamxyz", ngfs++, -1, -1, -1); + Gamxzz = new var("Gamxzz", ngfs++, -1, 1, 1); + Gamyxx = new var("Gamyxx", ngfs++, 1, -1, 1); + Gamyxy = new var("Gamyxy", ngfs++, -1, 1, 1); + Gamyxz = new var("Gamyxz", ngfs++, -1, -1, -1); + Gamyyy = new var("Gamyyy", ngfs++, 1, -1, 1); + Gamyyz = new var("Gamyyz", ngfs++, 1, 1, -1); + Gamyzz = new var("Gamyzz", ngfs++, 1, -1, 1); + Gamzxx = new var("Gamzxx", ngfs++, 1, 1, -1); + Gamzxy = new var("Gamzxy", ngfs++, -1, -1, -1); + Gamzxz = new var("Gamzxz", ngfs++, -1, 1, 1); + Gamzyy = new var("Gamzyy", ngfs++, 1, 1, -1); + Gamzyz = new var("Gamzyz", ngfs++, 1, -1, 1); + Gamzzz = new var("Gamzzz", ngfs++, 1, 1, -1); + + Rxx = new var("Rxx", ngfs++, 1, 1, 1); + Rxy = new var("Rxy", ngfs++, -1, -1, 1); + Rxz = new var("Rxz", ngfs++, -1, 1, -1); + Ryy = new var("Ryy", ngfs++, 1, 1, 1); + Ryz = new var("Ryz", ngfs++, 1, -1, -1); + Rzz = new var("Rzz", ngfs++, 1, 1, 1); + + // refer to PRD, 77, 024027 (2008) + Rpsi4 = new var("Rpsi4", ngfs++, 1, 1, 1); + Ipsi4 = new var("Ipsi4", ngfs++, -1, -1, -1); + t1Rpsi4 = new var("t1Rpsi4", ngfs++, 1, 1, 1); + t1Ipsi4 = new var("t1Ipsi4", ngfs++, -1, -1, -1); + t2Rpsi4 = new var("t2Rpsi4", ngfs++, 1, 1, 1); + t2Ipsi4 = new var("t2Ipsi4", ngfs++, -1, -1, -1); + + // constraint violation monitor variables + Cons_Ham = new var("Cons_Ham", ngfs++, 1, 1, 1); + Cons_Px = new var("Cons_Px", ngfs++, -1, 1, 1); + Cons_Py = new var("Cons_Py", ngfs++, 1, -1, 1); + Cons_Pz = new var("Cons_Pz", ngfs++, 1, 1, -1); + Cons_Gx = new var("Cons_Gx", ngfs++, -1, 1, 1); + Cons_Gy = new var("Cons_Gy", ngfs++, 1, -1, 1); + Cons_Gz = new var("Cons_Gz", ngfs++, 1, 1, -1); + +#ifdef Point_Psi4 + phix = new var("phix", ngfs++, -1, 1, 1); + phiy = new var("phiy", ngfs++, 1, -1, 1); + phiz = new var("phiz", ngfs++, 1, 1, -1); + trKx = new var("trKx", ngfs++, -1, 1, 1); + trKy = new var("trKy", ngfs++, 1, -1, 1); + trKz = new var("trKz", ngfs++, 1, 1, -1); + Axxx = new var("Axxx", ngfs++, -1, 1, 1); + Axxy = new var("Axxy", ngfs++, 1, -1, 1); + Axxz = new var("Axxz", ngfs++, 1, 1, -1); + Axyx = new var("Axyx", ngfs++, 1, -1, 1); + Axyy = new var("Axyy", ngfs++, -1, 1, 1); + Axyz = new var("Axyz", ngfs++, -1, -1, -1); + Axzx = new var("Axzx", ngfs++, 1, 1, -1); + Axzy = new var("Axzy", ngfs++, -1, -1, -1); + Axzz = new var("Axzz", ngfs++, -1, 1, 1); + Ayyx = new var("Ayyx", ngfs++, -1, 1, 1); + Ayyy = new var("Ayyy", ngfs++, 1, -1, 1); + Ayyz = new var("Ayyz", ngfs++, 1, 1, -1); + Ayzx = new var("Ayzx", ngfs++, -1, -1, -1); + Ayzy = new var("Ayzy", ngfs++, 1, 1, -1); + Ayzz = new var("Ayzz", ngfs++, 1, -1, 1); + Azzx = new var("Azzx", ngfs++, -1, 1, 1); + Azzy = new var("Azzy", ngfs++, 1, -1, 1); + Azzz = new var("Azzz", ngfs++, 1, 1, -1); +#endif + + // specific properspeed for 1+log slice + { + const double vl = sqrt(2); + trKo->setpropspeed(vl); + trK0->setpropspeed(vl); + trK->setpropspeed(vl); + trK1->setpropspeed(vl); + trK_rhs->setpropspeed(vl); + + phio->setpropspeed(vl); + phi0->setpropspeed(vl); + phi->setpropspeed(vl); + phi1->setpropspeed(vl); + phi_rhs->setpropspeed(vl); + + Lapo->setpropspeed(vl); + Lap0->setpropspeed(vl); + Lap->setpropspeed(vl); + Lap1->setpropspeed(vl); + Lap_rhs->setpropspeed(vl); + } + + OldStateList = new MyList(phio); + OldStateList->insert(trKo); + OldStateList->insert(gxxo); + OldStateList->insert(gxyo); + OldStateList->insert(gxzo); + OldStateList->insert(gyyo); + OldStateList->insert(gyzo); + OldStateList->insert(gzzo); + OldStateList->insert(Axxo); + OldStateList->insert(Axyo); + OldStateList->insert(Axzo); + OldStateList->insert(Ayyo); + OldStateList->insert(Ayzo); + OldStateList->insert(Azzo); + OldStateList->insert(Gmxo); + OldStateList->insert(Gmyo); + OldStateList->insert(Gmzo); + OldStateList->insert(Lapo); + OldStateList->insert(Sfxo); + OldStateList->insert(Sfyo); + OldStateList->insert(Sfzo); + OldStateList->insert(dtSfxo); + OldStateList->insert(dtSfyo); + OldStateList->insert(dtSfzo); + + StateList = new MyList(phi0); + StateList->insert(trK0); + StateList->insert(gxx0); + StateList->insert(gxy0); + StateList->insert(gxz0); + StateList->insert(gyy0); + StateList->insert(gyz0); + StateList->insert(gzz0); + StateList->insert(Axx0); + StateList->insert(Axy0); + StateList->insert(Axz0); + StateList->insert(Ayy0); + StateList->insert(Ayz0); + StateList->insert(Azz0); + StateList->insert(Gmx0); + StateList->insert(Gmy0); + StateList->insert(Gmz0); + StateList->insert(Lap0); + StateList->insert(Sfx0); + StateList->insert(Sfy0); + StateList->insert(Sfz0); + StateList->insert(dtSfx0); + StateList->insert(dtSfy0); + StateList->insert(dtSfz0); + + RHSList = new MyList(phi_rhs); + RHSList->insert(trK_rhs); + RHSList->insert(gxx_rhs); + RHSList->insert(gxy_rhs); + RHSList->insert(gxz_rhs); + RHSList->insert(gyy_rhs); + RHSList->insert(gyz_rhs); + RHSList->insert(gzz_rhs); + RHSList->insert(Axx_rhs); + RHSList->insert(Axy_rhs); + RHSList->insert(Axz_rhs); + RHSList->insert(Ayy_rhs); + RHSList->insert(Ayz_rhs); + RHSList->insert(Azz_rhs); + RHSList->insert(Gmx_rhs); + RHSList->insert(Gmy_rhs); + RHSList->insert(Gmz_rhs); + RHSList->insert(Lap_rhs); + RHSList->insert(Sfx_rhs); + RHSList->insert(Sfy_rhs); + RHSList->insert(Sfz_rhs); + RHSList->insert(dtSfx_rhs); + RHSList->insert(dtSfy_rhs); + RHSList->insert(dtSfz_rhs); + + SynchList_pre = new MyList(phi); + SynchList_pre->insert(trK); + SynchList_pre->insert(gxx); + SynchList_pre->insert(gxy); + SynchList_pre->insert(gxz); + SynchList_pre->insert(gyy); + SynchList_pre->insert(gyz); + SynchList_pre->insert(gzz); + SynchList_pre->insert(Axx); + SynchList_pre->insert(Axy); + SynchList_pre->insert(Axz); + SynchList_pre->insert(Ayy); + SynchList_pre->insert(Ayz); + SynchList_pre->insert(Azz); + SynchList_pre->insert(Gmx); + SynchList_pre->insert(Gmy); + SynchList_pre->insert(Gmz); + SynchList_pre->insert(Lap); + SynchList_pre->insert(Sfx); + SynchList_pre->insert(Sfy); + SynchList_pre->insert(Sfz); + SynchList_pre->insert(dtSfx); + SynchList_pre->insert(dtSfy); + SynchList_pre->insert(dtSfz); + + SynchList_cor = new MyList(phi1); + SynchList_cor->insert(trK1); + SynchList_cor->insert(gxx1); + SynchList_cor->insert(gxy1); + SynchList_cor->insert(gxz1); + SynchList_cor->insert(gyy1); + SynchList_cor->insert(gyz1); + SynchList_cor->insert(gzz1); + SynchList_cor->insert(Axx1); + SynchList_cor->insert(Axy1); + SynchList_cor->insert(Axz1); + SynchList_cor->insert(Ayy1); + SynchList_cor->insert(Ayz1); + SynchList_cor->insert(Azz1); + SynchList_cor->insert(Gmx1); + SynchList_cor->insert(Gmy1); + SynchList_cor->insert(Gmz1); + SynchList_cor->insert(Lap1); + SynchList_cor->insert(Sfx1); + SynchList_cor->insert(Sfy1); + SynchList_cor->insert(Sfz1); + SynchList_cor->insert(dtSfx1); + SynchList_cor->insert(dtSfy1); + SynchList_cor->insert(dtSfz1); + + DumpList = new MyList(phi0); + DumpList->insert(trK0); + DumpList->insert(gxx0); + DumpList->insert(gxy0); + DumpList->insert(gxz0); + DumpList->insert(gyy0); + DumpList->insert(gyz0); + DumpList->insert(gzz0); + // DumpList->insert(Axx0); + // DumpList->insert(Axy0); + // DumpList->insert(Axz0); + // DumpList->insert(Ayy0); + // DumpList->insert(Ayz0); + // DumpList->insert(Azz0); + // DumpList->insert(Gmx0); + // DumpList->insert(Gmy0); + // DumpList->insert(Gmz0); + DumpList->insert(Lap0); + // DumpList->insert(Sfx0); + // DumpList->insert(Sfy0); + // DumpList->insert(Sfz0); + // DumpList->insert(dtSfx0); + // DumpList->insert(dtSfy0); + // DumpList->insert(dtSfz0); + // DumpList->insert(Rpsi4); + // DumpList->insert(Ipsi4); + DumpList->insert(Cons_Ham); + DumpList->insert(Cons_Px); + DumpList->insert(Cons_Py); + DumpList->insert(Cons_Pz); + // DumpList->insert(Cons_Gx); + // DumpList->insert(Cons_Gy); + // DumpList->insert(Cons_Gz); + + ConstraintList = new MyList(Cons_Ham); + ConstraintList->insert(Cons_Px); + ConstraintList->insert(Cons_Py); + ConstraintList->insert(Cons_Pz); + ConstraintList->insert(Cons_Gx); + ConstraintList->insert(Cons_Gy); + ConstraintList->insert(Cons_Gz); +#ifdef With_AHF + // setup kinds of var list + // List for AparentHorizonFinderDirect + // special attension is payed to symmetry type + // gij gij,x gij,y gij,z + AHList = new MyList(gxx0); + AHList->insert(Gamxxx); + AHList->insert(Gamyxx); + AHList->insert(Gamzxx); + AHList->insert(gxy0); + AHList->insert(Gamxxy); + AHList->insert(Gamyxy); + AHList->insert(Gamzxy); + AHList->insert(gxz0); + AHList->insert(Gamxxz); + AHList->insert(Gamyxz); + AHList->insert(Gamzxz); + AHList->insert(gyy0); + AHList->insert(Gamxyy); + AHList->insert(Gamyyy); + AHList->insert(Gamzyy); + AHList->insert(gyz0); + AHList->insert(Gamxyz); + AHList->insert(Gamyyz); + AHList->insert(Gamzyz); + AHList->insert(gzz0); + AHList->insert(Gamxzz); + AHList->insert(Gamyzz); + AHList->insert(Gamzzz); + // phi phi,x phi,y phi,z + AHList->insert(phi0); + AHList->insert(dtSfx_rhs); + AHList->insert(dtSfy_rhs); + AHList->insert(dtSfz_rhs); + // Aij + AHList->insert(Axx0); + AHList->insert(Axy0); + AHList->insert(Axz0); + AHList->insert(Ayy0); + AHList->insert(Ayz0); + AHList->insert(Azz0); + // trK + AHList->insert(trK0); + // gij,x gij,y gij,z + AHDList = new MyList(Gamxxx); + AHDList->insert(Gamyxx); + AHDList->insert(Gamzxx); + AHDList->insert(Gamxxy); + AHDList->insert(Gamyxy); + AHDList->insert(Gamzxy); + AHDList->insert(Gamxxz); + AHDList->insert(Gamyxz); + AHDList->insert(Gamzxz); + AHDList->insert(Gamxyy); + AHDList->insert(Gamyyy); + AHDList->insert(Gamzyy); + AHDList->insert(Gamxyz); + AHDList->insert(Gamyyz); + AHDList->insert(Gamzyz); + AHDList->insert(Gamxzz); + AHDList->insert(Gamyzz); + AHDList->insert(Gamzzz); + // phi,x phi,y phi,z + AHDList->insert(dtSfx_rhs); + AHDList->insert(dtSfy_rhs); + AHDList->insert(dtSfz_rhs); + + GaugeList = new MyList(Lap0); + GaugeList->insert(Sfx0); + GaugeList->insert(Sfy0); + GaugeList->insert(Sfz0); +#endif + + + + // Note: the first checkpoint-class variable is `bool` while the local variable is `int`; + // an explicit conversion may be required in some contexts. + // bool checkrun00 = checkrun; + // Note: the second checkpoint-class variable is `const char*` while the local variable is `char*`; + // an explicit conversion may be required. + // const char* checkfilename00 = checkfilename; + + CheckPoint = new checkpoint(checkrun, checkfilename, myrank); + + if (myrank==0) { + cout << " BSSN class successfully created " << endl; + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function initializes the class + +//================================================================================================ + +void bssn_class::Initialize() +{ + if (myrank == 0) + cout << " you have setted " << ngfs << " grid functions." << endl; + + CheckPoint->addvariablelist(StateList); + CheckPoint->addvariablelist(OldStateList); + + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor); + if (checkrun) + CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry); + else + GH->compose_cgh(nprocs); +#ifdef WithShell + SH = new ShellPatch(0, ngfs, pname, Symmetry, myrank, ErrorMonitor); + SH->matchcheck(GH->PatL[0]); + SH->compose_sh(nprocs); + // SH->compose_shr(nprocs); //sh is faster than shr + SH->setupcordtrans(); + SH->Dump_xyz(0, 0, 1); + SH->setupintintstuff(nprocs, GH->PatL[0], Symmetry); + + if (checkrun) + CheckPoint->readcheck_sh(SH, myrank); +#else + SH = 0; +#endif + + double h = GH->PatL[0]->data->blb->data->getdX(0); + for (int i = 1; i < dim; i++) + h = Mymin(h, GH->PatL[0]->data->blb->data->getdX(i)); + dT = Courant * h; + + if (checkrun) + { + CheckPoint->read_Black_Hole_position(BH_num_input, BH_num, Porg0, Pmom, Spin, Mass, Porgbr, Porg, Porg1, Porg_rhs); + setpbh(BH_num, Porg0, Mass, BH_num_input); + } + else + { + PhysTime = StartTime; + Setup_Black_Hole_position(); + } + + // Initialize sync caches (per-level, for predictor and corrector) + sync_cache_pre = new Parallel::SyncCache[GH->levels]; + sync_cache_cor = new Parallel::SyncCache[GH->levels]; + sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels]; + sync_cache_rp_fine = new Parallel::SyncCache[GH->levels]; + sync_cache_restrict = new Parallel::SyncCache[GH->levels]; + sync_cache_outbd = new Parallel::SyncCache[GH->levels]; +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function is the destructor; it releases allocated variables + +//================================================================================================ + +bssn_class::~bssn_class() +{ +#ifdef With_AHF + AHList->clearList(); + AHDList->clearList(); + GaugeList->clearList(); + if (lastahdumpid) + delete[] lastahdumpid; + if (findeveryl) + delete[] findeveryl; + + if (xc) + { + delete[] xc; + delete[] yc; + delete[] zc; + delete[] xr; + delete[] yr; + delete[] zr; + delete[] trigger; + delete[] dumpid; + delete[] dTT; + } + + AHFinderDirect::AHFinderDirect_cleanup(); +#endif + + StateList->clearList(); + RHSList->clearList(); + OldStateList->clearList(); + SynchList_pre->clearList(); + SynchList_cor->clearList(); + DumpList->clearList(); + ConstraintList->clearList(); + + delete phio; + delete trKo; + delete gxxo; + delete gxyo; + delete gxzo; + delete gyyo; + delete gyzo; + delete gzzo; + delete Axxo; + delete Axyo; + delete Axzo; + delete Ayyo; + delete Ayzo; + delete Azzo; + delete Gmxo; + delete Gmyo; + delete Gmzo; + delete Lapo; + delete Sfxo; + delete Sfyo; + delete Sfzo; + delete dtSfxo; + delete dtSfyo; + delete dtSfzo; + + delete phi0; + delete trK0; + delete gxx0; + delete gxy0; + delete gxz0; + delete gyy0; + delete gyz0; + delete gzz0; + delete Axx0; + delete Axy0; + delete Axz0; + delete Ayy0; + delete Ayz0; + delete Azz0; + delete Gmx0; + delete Gmy0; + delete Gmz0; + delete Lap0; + delete Sfx0; + delete Sfy0; + delete Sfz0; + delete dtSfx0; + delete dtSfy0; + delete dtSfz0; + + delete phi; + delete trK; + delete gxx; + delete gxy; + delete gxz; + delete gyy; + delete gyz; + delete gzz; + delete Axx; + delete Axy; + delete Axz; + delete Ayy; + delete Ayz; + delete Azz; + delete Gmx; + delete Gmy; + delete Gmz; + delete Lap; + delete Sfx; + delete Sfy; + delete Sfz; + delete dtSfx; + delete dtSfy; + delete dtSfz; + + delete phi1; + delete trK1; + delete gxx1; + delete gxy1; + delete gxz1; + delete gyy1; + delete gyz1; + delete gzz1; + delete Axx1; + delete Axy1; + delete Axz1; + delete Ayy1; + delete Ayz1; + delete Azz1; + delete Gmx1; + delete Gmy1; + delete Gmz1; + delete Lap1; + delete Sfx1; + delete Sfy1; + delete Sfz1; + delete dtSfx1; + delete dtSfy1; + delete dtSfz1; + + delete phi_rhs; + delete trK_rhs; + delete gxx_rhs; + delete gxy_rhs; + delete gxz_rhs; + delete gyy_rhs; + delete gyz_rhs; + delete gzz_rhs; + delete Axx_rhs; + delete Axy_rhs; + delete Axz_rhs; + delete Ayy_rhs; + delete Ayz_rhs; + delete Azz_rhs; + delete Gmx_rhs; + delete Gmy_rhs; + delete Gmz_rhs; + delete Lap_rhs; + delete Sfx_rhs; + delete Sfy_rhs; + delete Sfz_rhs; + delete dtSfx_rhs; + delete dtSfy_rhs; + delete dtSfz_rhs; + + delete rho; + delete Sx; + delete Sy; + delete Sz; + delete Sxx; + delete Sxy; + delete Sxz; + delete Syy; + delete Syz; + delete Szz; + + delete Gamxxx; + delete Gamxxy; + delete Gamxxz; + delete Gamxyy; + delete Gamxyz; + delete Gamxzz; + delete Gamyxx; + delete Gamyxy; + delete Gamyxz; + delete Gamyyy; + delete Gamyyz; + delete Gamyzz; + delete Gamzxx; + delete Gamzxy; + delete Gamzxz; + delete Gamzyy; + delete Gamzyz; + delete Gamzzz; + + delete Rxx; + delete Rxy; + delete Rxz; + delete Ryy; + delete Ryz; + delete Rzz; + + delete Rpsi4; + delete Ipsi4; + delete t1Rpsi4; + delete t1Ipsi4; + delete t2Rpsi4; + delete t2Ipsi4; + + delete Cons_Ham; + delete Cons_Px; + delete Cons_Py; + delete Cons_Pz; + delete Cons_Gx; + delete Cons_Gy; + delete Cons_Gz; + +#ifdef Point_Psi4 + delete phix; + delete phiy; + delete phiz; + delete trKx; + delete trKy; + delete trKz; + delete Axxx; + delete Axxy; + delete Axxz; + delete Axyx; + delete Axyy; + delete Axyz; + delete Axzx; + delete Axzy; + delete Axzz; + delete Ayyx; + delete Ayyy; + delete Ayyz; + delete Ayzx; + delete Ayzy; + delete Ayzz; + delete Azzx; + delete Azzy; + delete Azzz; +#endif + + // Destroy sync caches before GH + if (sync_cache_pre) + { + for (int i = 0; i < GH->levels; i++) + sync_cache_pre[i].destroy(); + delete[] sync_cache_pre; + } + if (sync_cache_cor) + { + for (int i = 0; i < GH->levels; i++) + sync_cache_cor[i].destroy(); + delete[] sync_cache_cor; + } + if (sync_cache_rp_coarse) + { + for (int i = 0; i < GH->levels; i++) + sync_cache_rp_coarse[i].destroy(); + delete[] sync_cache_rp_coarse; + } + if (sync_cache_rp_fine) + { + for (int i = 0; i < GH->levels; i++) + sync_cache_rp_fine[i].destroy(); + delete[] sync_cache_rp_fine; + } + + delete GH; +#ifdef WithShell + delete SH; +#endif + + for (int i = 0; i < BH_num; i++) + { + delete[] Porg0[i]; + delete[] Porgbr[i]; + delete[] Porg[i]; + delete[] Porg1[i]; + delete[] Porg_rhs[i]; + } + + delete[] Porg0; + delete[] Porgbr; + delete[] Porg; + delete[] Porg1; + delete[] Porg_rhs; + + delete[] Mass; + delete[] Spin; + delete[] Pmom; + + delete ErrorMonitor; + delete Psi4Monitor; + delete BHMonitor; + delete MAPMonitor; + delete ConVMonitor; + delete Waveshell; + + delete CheckPoint; +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes initial data using Lousto's analytic method + +//================================================================================================ + +void bssn_class::Setup_Initial_Data_Lousto() +{ + if (!checkrun) + { + if (myrank == 0) + { + cout << endl; + cout << " Setup initial data with Lousto's analytical formula. " << endl; + cout << endl; + } + char filename[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(filename, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + int BH_NM; + double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && skey == "BH_num") + { + BH_NM = atoi(sval.c_str()); + break; + } + } + inf.close(); + } + + Porg_here = new double[3 * BH_NM]; + Pmom_here = new double[3 * BH_NM]; + Spin_here = new double[3 * BH_NM]; + Mass_here = new double[BH_NM]; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && sind < BH_NM) + { + if (skey == "Mass") + Mass_here[sind] = atof(sval.c_str()); + else if (skey == "Porgx") + Porg_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Porgy") + Porg_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Porgz") + Porg_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Spinx") + Spin_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Spiny") + Spin_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Spinz") + Spin_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Pmomx") + Pmom_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Pmomy") + Pmom_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Pmomz") + Pmom_here[sind * 3 + 2] = atof(sval.c_str()); + } + } + inf.close(); + } + // set initial data + for (int lev = 0; lev < GH->levels; lev++) + { + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BL = Pp->data->blb; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + // Use Lousto's analytic formulas to compute initial data + f_get_lousto_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + } + // dump read_in initial data + for (int lev = 0; lev < GH->levels; lev++) + Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT); +#ifdef WithShell + // ShellPatch part + MyList *Pp = SH->PatL; + while (Pp) + { + MyList *BL = Pp->data->blb; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + f_get_initial_nbhs_sh(cg->shape, + cg->fgfs[Pp->data->fngfs + ShellPatch::gx], + cg->fgfs[Pp->data->fngfs + ShellPatch::gy], + cg->fgfs[Pp->data->fngfs + ShellPatch::gz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + // dump read_in initial data + SH->Dump_Data(StateList, 0, PhysTime, dT); +#endif + + delete[] Porg_here; + delete[] Mass_here; + delete[] Pmom_here; + delete[] Spin_here; + // SH->Synch(GH->PatL[0],StateList,Symmetry); + // exit(0); + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes initial data using Cao's analytic formulas + +//================================================================================================ + +void bssn_class::Setup_Initial_Data_Cao() +{ + if (!checkrun) + { + if (myrank == 0) + { + cout << endl; + cout << " Setup initial data with Cao's analytical formula. " << endl; + cout << endl; + } + char filename[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(filename, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + int BH_NM; + double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && skey == "BH_num") + { + BH_NM = atoi(sval.c_str()); + break; + } + } + inf.close(); + } + + Porg_here = new double[3 * BH_NM]; + Pmom_here = new double[3 * BH_NM]; + Spin_here = new double[3 * BH_NM]; + Mass_here = new double[BH_NM]; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && sind < BH_NM) + { + if (skey == "Mass") + Mass_here[sind] = atof(sval.c_str()); + else if (skey == "Porgx") + Porg_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Porgy") + Porg_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Porgz") + Porg_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Spinx") + Spin_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Spiny") + Spin_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Spinz") + Spin_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Pmomx") + Pmom_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Pmomy") + Pmom_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Pmomz") + Pmom_here[sind * 3 + 2] = atof(sval.c_str()); + } + } + inf.close(); + } + // set initial data + for (int lev = 0; lev < GH->levels; lev++) + { + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BL = Pp->data->blb; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + // Use Cao's analytic formulas to compute initial data + f_get_initial_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + } + // dump read_in initial data + for (int lev = 0; lev < GH->levels; lev++) + Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT); +#ifdef WithShell + // ShellPatch part + MyList *Pp = SH->PatL; + while (Pp) + { + MyList *BL = Pp->data->blb; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + f_get_initial_nbhs_sh(cg->shape, + cg->fgfs[Pp->data->fngfs + ShellPatch::gx], + cg->fgfs[Pp->data->fngfs + ShellPatch::gy], + cg->fgfs[Pp->data->fngfs + ShellPatch::gz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + // dump read_in initial data + SH->Dump_Data(StateList, 0, PhysTime, dT); +#endif + + delete[] Porg_here; + delete[] Mass_here; + delete[] Pmom_here; + delete[] Spin_here; + // SH->Synch(GH->PatL[0],StateList,Symmetry); + // exit(0); + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes Kerr-Schild initial data via an analytic method + +//================================================================================================ + +void bssn_class::Setup_KerrSchild() +{ + if (!checkrun) + { + if (myrank == 0) + { + cout << endl; + cout << " Setup initial data with Kerr-Schild formula. " << endl; + cout << endl; + } + // set initial data + for (int lev = 0; lev < GH->levels; lev++) + { + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BL = Pp->data->blb; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + f_get_initial_kerrschild(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn]); + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + } +#ifdef WithShell + // ShellPatch part + MyList *Pp = SH->PatL; + while (Pp) + { + int lev = 0, fngfs = Pp->data->fngfs; + + MyList *BL = Pp->data->blb; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + f_get_initial_kerrschild_ss(cg->shape, + cg->fgfs[Pp->data->fngfs + ShellPatch::gx], + cg->fgfs[Pp->data->fngfs + ShellPatch::gy], + cg->fgfs[Pp->data->fngfs + ShellPatch::gz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn]); + /* + f_fderivs_shc(cg->shape,cg->fgfs[phi0->sgfn], + cg->fgfs[Sfx_rhs->sgfn], + cg->fgfs[Sfy_rhs->sgfn], + cg->fgfs[Sfz_rhs->sgfn], + cg->X[0],cg->X[1],cg->X[2], + phi0->SoA[0],phi0->SoA[1],phi0->SoA[2], + Symmetry,lev,Pp->data->sst, + cg->fgfs[fngfs+ShellPatch::drhodx], + cg->fgfs[fngfs+ShellPatch::drhody], + cg->fgfs[fngfs+ShellPatch::drhodz], + cg->fgfs[fngfs+ShellPatch::dsigmadx], + cg->fgfs[fngfs+ShellPatch::dsigmady], + cg->fgfs[fngfs+ShellPatch::dsigmadz], + cg->fgfs[fngfs+ShellPatch::dRdx], + cg->fgfs[fngfs+ShellPatch::dRdy], + cg->fgfs[fngfs+ShellPatch::dRdz]); + f_fdderivs_shc(cg->shape,cg->fgfs[phi0->sgfn], + cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn], + cg->X[0],cg->X[1],cg->X[2], + phi0->SoA[0],phi0->SoA[1],phi0->SoA[2], + Symmetry,lev,Pp->data->sst, + cg->fgfs[fngfs+ShellPatch::drhodx], + cg->fgfs[fngfs+ShellPatch::drhody], + cg->fgfs[fngfs+ShellPatch::drhodz], + cg->fgfs[fngfs+ShellPatch::dsigmadx], + cg->fgfs[fngfs+ShellPatch::dsigmady], + cg->fgfs[fngfs+ShellPatch::dsigmadz], + cg->fgfs[fngfs+ShellPatch::dRdx], + cg->fgfs[fngfs+ShellPatch::dRdy], + cg->fgfs[fngfs+ShellPatch::dRdz], + cg->fgfs[fngfs+ShellPatch::drhodxx], + cg->fgfs[fngfs+ShellPatch::drhodxy], + cg->fgfs[fngfs+ShellPatch::drhodxz], + cg->fgfs[fngfs+ShellPatch::drhodyy], + cg->fgfs[fngfs+ShellPatch::drhodyz], + cg->fgfs[fngfs+ShellPatch::drhodzz], + cg->fgfs[fngfs+ShellPatch::dsigmadxx], + cg->fgfs[fngfs+ShellPatch::dsigmadxy], + cg->fgfs[fngfs+ShellPatch::dsigmadxz], + cg->fgfs[fngfs+ShellPatch::dsigmadyy], + cg->fgfs[fngfs+ShellPatch::dsigmadyz], + cg->fgfs[fngfs+ShellPatch::dsigmadzz], + cg->fgfs[fngfs+ShellPatch::dRdxx], + cg->fgfs[fngfs+ShellPatch::dRdxy], + cg->fgfs[fngfs+ShellPatch::dRdxz], + cg->fgfs[fngfs+ShellPatch::dRdyy], + cg->fgfs[fngfs+ShellPatch::dRdyz], + cg->fgfs[fngfs+ShellPatch::dRdzz]); + */ + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } +#endif + + // dump read_in initial data + // SH->Synch(GH->PatL[0],StateList,Symmetry); + // for(int lev=0;levlevels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT); + // SH->Dump_Data(StateList,0,PhysTime,dT); + // exit(0); + + /* + { + MyList * DG_List=new MyList(Sfx_rhs); + DG_List->insert(Sfy_rhs); + DG_List->insert(Sfz_rhs); + DG_List->insert(Axx_rhs); + DG_List->insert(Axy_rhs); + DG_List->insert(Axz_rhs); + DG_List->insert(Ayy_rhs); + DG_List->insert(Ayz_rhs); + DG_List->insert(Azz_rhs); + SH->Synch(DG_List,Symmetry); + SH->Dump_Data(DG_List,0,PhysTime,dT); + DG_List->clearList(); + exit(0); + } + */ + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function reads initial data produced by Pablo Galaviz's Olliptic program + +//================================================================================================ + +// Read initial data solved by Pablo's Olliptic Phys.Rev.D 82 024005 (2010) + +//|---------------------------------------------------------------------------- +// read ASCII file with the style of Pablo +//|---------------------------------------------------------------------------- +bool bssn_class::read_Pablo_file(int *ext, double *datain, char *filename) +{ + if (myrank == 0) + { + cout << endl; + cout << " Setup initial data with Pablo_file. " << endl; + cout << endl; + } + + int nx = ext[0], ny = ext[1], nz = ext[2]; + int i, j, k; + double x, y, z; + //|--->open in put file + ifstream infile; + infile.open(filename); + if (!infile) + { + cout << "bssn_class: read_Pablo_file can't open " << filename << " for input." << endl; + return false; + } + for (k = 0; k < nz; k++) + for (j = 0; j < ny; j++) + for (i = 0; i < nx; i++) + { + infile >> x >> y >> z >> datain[i + j * nx + k * nx * ny]; + } + + infile.close(); + + return true; +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function writes initial data file in the style of Pablo Galaviz's Olliptic program + +//================================================================================================ + +//|---------------------------------------------------------------------------- +// write ASCII file with the style of Pablo +//|---------------------------------------------------------------------------- +void bssn_class::write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, + char *filename) +{ + int nx = ext[0], ny = ext[1], nz = ext[2]; + int i, j, k; + double *X, *Y, *Z; + X = new double[nx]; + Y = new double[ny]; + Z = new double[nz]; + double dX, dY, dZ; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + dX = (xmax - xmin) / (nx - 1); + for (i = 0; i < nx; i++) + X[i] = xmin + i * dX; + dY = (ymax - ymin) / (ny - 1); + for (j = 0; j < ny; j++) + Y[j] = ymin + j * dY; + dZ = (zmax - zmin) / (nz - 1); + for (k = 0; k < nz; k++) + Z[k] = zmin + k * dZ; +#else +#ifdef Cell + dX = (xmax - xmin) / nx; + for (i = 0; i < nx; i++) + X[i] = xmin + (i + 0.5) * dX; + dY = (ymax - ymin) / ny; + for (j = 0; j < ny; j++) + Y[j] = ymin + (j + 0.5) * dY; + dZ = (zmax - zmin) / nz; + for (k = 0; k < nz; k++) + Z[k] = zmin + (k + 0.5) * dZ; +#else +#error Not define Vertex nor Cell +#endif +#endif + //|--->open out put file + ofstream outfile; + outfile.open(filename); + if (!outfile) + { + cout << "bssn_class: write_Pablo_file can't open " << filename << " for output." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + outfile.setf(ios::scientific, ios::floatfield); + outfile.precision(16); + for (k = 0; k < nz; k++) + for (j = 0; j < ny; j++) + for (i = 0; i < nx; i++) + { + outfile << X[i] << " " << Y[j] << " " << Z[k] << " " + << 0 << endl; + } + outfile.close(); + + delete[] X; + delete[] Y; + delete[] Z; +} + +//================================================================================================ + + + +//================================================================================================ + +// Read initial data solved by Ansorg, PRD 70, 064011 (2004) + +void bssn_class::Read_Ansorg() +{ + if (!checkrun) + { + if (myrank == 0) + { + cout << endl; + cout << " Read initial data from Ansorg's solver," + << " please be sure the input parameters for black holes are puncture parameters!! " << endl; + cout << endl; + } + char filename[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(filename, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + int BH_NM; + double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && skey == "BH_num") + { + BH_NM = atoi(sval.c_str()); + break; + } + } + inf.close(); + } + + Porg_here = new double[3 * BH_NM]; + Pmom_here = new double[3 * BH_NM]; + Spin_here = new double[3 * BH_NM]; + Mass_here = new double[BH_NM]; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && sind < BH_NM) + { + if (skey == "Mass") + Mass_here[sind] = atof(sval.c_str()); + else if (skey == "Porgx") + Porg_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Porgy") + Porg_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Porgz") + Porg_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Spinx") + Spin_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Spiny") + Spin_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Spinz") + Spin_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Pmomx") + Pmom_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Pmomy") + Pmom_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Pmomz") + Pmom_here[sind * 3 + 2] = atof(sval.c_str()); + } + } + inf.close(); + } + + int order = 6; + Ansorg read_ansorg("Ansorg.psid", order); + // set initial data + for (int lev = 0; lev < GH->levels; lev++) + { + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BL = Pp->data->blb; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + for (int k = 0; k < cg->shape[2]; k++) + for (int j = 0; j < cg->shape[1]; j++) + for (int i = 0; i < cg->shape[0]; i++) + cg->fgfs[phi0->sgfn][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]] = + read_ansorg.ps_u_at_xyz(cg->X[0][i], cg->X[1][j], cg->X[2][k]); + + f_get_ansorg_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + } +#ifdef WithShell + // ShellPatch part + MyList *Pp = SH->PatL; + while (Pp) + { + MyList *BL = Pp->data->blb; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + for (int k = 0; k < cg->shape[2]; k++) + for (int j = 0; j < cg->shape[1]; j++) + for (int i = 0; i < cg->shape[0]; i++) + cg->fgfs[phi0->sgfn][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]] = + read_ansorg.ps_u_at_xyz(cg->fgfs[Pp->data->fngfs + ShellPatch::gx][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]], + cg->fgfs[Pp->data->fngfs + ShellPatch::gy][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]], + cg->fgfs[Pp->data->fngfs + ShellPatch::gz][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]]); + + f_get_ansorg_nbhs_ss(cg->shape, + cg->fgfs[Pp->data->fngfs + ShellPatch::gx], + cg->fgfs[Pp->data->fngfs + ShellPatch::gy], + cg->fgfs[Pp->data->fngfs + ShellPatch::gz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); +#if 0 +// for check fderivs_sh + f_fderivs_sh(cg->shape,cg->fgfs[Ayz0->sgfn], + cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn], + cg->X[0],cg->X[1],cg->X[2], + Ayz0->SoA[0],Ayz0->SoA[1],Ayz0->SoA[2], + Symmetry,Pp->data->sst,Pp->data->sst); +#endif +#if 0 +// for check fderivs_shc + int fngfs = Pp->data->fngfs; + f_fderivs_shc(cg->shape,cg->fgfs[Ayz0->sgfn], + cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn], + cg->X[0],cg->X[1],cg->X[2], + Ayz0->SoA[0],Ayz0->SoA[1],Ayz0->SoA[2], + Symmetry,Pp->data->sst,Pp->data->sst, + cg->fgfs[fngfs+ShellPatch::drhodx], + cg->fgfs[fngfs+ShellPatch::drhody], + cg->fgfs[fngfs+ShellPatch::drhodz], + cg->fgfs[fngfs+ShellPatch::dsigmadx], + cg->fgfs[fngfs+ShellPatch::dsigmady], + cg->fgfs[fngfs+ShellPatch::dsigmadz], + cg->fgfs[fngfs+ShellPatch::dRdx], + cg->fgfs[fngfs+ShellPatch::dRdy], + cg->fgfs[fngfs+ShellPatch::dRdz]); +#endif + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } +#endif + + delete[] Porg_here; + delete[] Mass_here; + delete[] Pmom_here; + delete[] Spin_here; + + Compute_Constraint(); + // dump read_in initial data + for (int lev = 0; lev < GH->levels; lev++) + Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT); +#ifdef WithShell + SH->Dump_Data(DumpList, 0, PhysTime, dT); +#endif + // if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function sets up the time evolution for the entire process + +//================================================================================================ + +void bssn_class::Evolve(int Steps) +{ + clock_t prev_clock, curr_clock; + double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0; + LastAnas = 0; +#if 0 +//initial checkpoint for special uasge + { + CheckPoint->write_Black_Hole_position(BH_num_input,BH_num,Porg0,Porgbr,Mass); + CheckPoint->writecheck_cgh(PhysTime,GH); +#ifdef WithShell + CheckPoint->writecheck_sh(PhysTime,SH); +#endif + CheckPoint->write_bssn(LastDump,Last2dDump,LastAnas); + misc::tillherecheck("complete initialization preparation"); // we need synchronization here + if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); + } +#endif + // for step 0 constraint interpolation + Interp_Constraint(true); + +#ifdef With_AHF + // setup apparent horizon finder direct of thornburg + { + HN_num = BH_num; + for (int ia = 0; ia < BH_num; ia++) + for (int ib = ia + 1; ib < BH_num; ib++) + HN_num++; + + AHFinderDirect::AHFinderDirect_setup(AHList, GaugeList, + this, + Symmetry, HN_num, &PhysTime); + + lastahdumpid = new int[HN_num]; + findeveryl = new int[HN_num]; + xc = new double[HN_num]; + yc = new double[HN_num]; + zc = new double[HN_num]; + xr = new double[HN_num]; + yr = new double[HN_num]; + zr = new double[HN_num]; + dTT = new double[HN_num]; + trigger = new bool[HN_num]; + dumpid = new int[HN_num]; + + for (int ihn = 0; ihn < HN_num; ihn++) + { + lastahdumpid[ihn] = 0; + findeveryl[ihn] = AHfindevery; + } + } +#endif + + if (checkrun) + CheckPoint->read_bssn(LastDump, Last2dDump, LastAnas); + + double dT_mon = dT * pow(0.5, Mymax(0, trfls)); + + /* + #ifdef With_AHF + //initial apparent horizon finding + { + double gam; + double massmin=Mass[0]; + for(int ihn=1;ihnlevels; lev++) - GH->Lt[lev] = PhysTime; - - GH->settrfls(trfls); - - for (int ncount = 1; ncount < Steps + 1; ncount++) - { - // special for large mass ratio consideration - // if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6) - // { GH->levels=GH->movls; } - - if (myrank == 0) - curr_clock = clock(); -#if (PSTR == 0) - RecursiveStep(0); -#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - AnalysisStuff(a_lev, dT_mon); - ParallelStep(); -#endif - - // misc::tillherecheck("before Constraint_Out"); - - Constraint_Out(); // this will affect the Dump_List - - LastDump += dT_mon; - Last2dDump += dT_mon; - LastCheck += dT_mon; - - // When LastDump >= DumpTime, output corresponding binary data - if (LastDump >= DumpTime) - { - // misc::tillherecheck("before Dump_Data"); - - for (int lev = 0; lev < GH->levels; lev++) - Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon); -#ifdef WithShell - SH->Dump_Data(DumpList, 0, PhysTime, dT_mon); -#endif - - LastDump = 0; - - if (myrank == 0) - { - cout << " Dump done. " << endl; - } - } - - // When Last2dDump >= d2DumpTime, output corresponding 2D data - if (Last2dDump >= d2DumpTime) - { - // misc::tillherecheck("before 2dDump_Data"); - - for (int lev = 0; lev < GH->levels; lev++) - Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon); - - Last2dDump = 0; - - if (myrank == 0) - { - cout << " 2d Dump done. " << endl; - } - } - - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << endl; - cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " " - << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - // cout << endl; - } - - if (PhysTime >= TotalTime) - break; - -#if (REGLEV == 1) - GH->Regrid(Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor); - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif - -#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2)) -// GH->Regrid_fake(Symmetry,BH_num,Porgbr,Porg0, -// SynchList_cor,OldStateList,StateList,SynchList_pre, -// fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor); -#endif - + + for (int lev = 0; lev < GH->levels; lev++) + GH->Lt[lev] = PhysTime; + + GH->settrfls(trfls); + + for (int ncount = 1; ncount < Steps + 1; ncount++) + { + // special for large mass ratio consideration + // if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6) + // { GH->levels=GH->movls; } + + if (myrank == 0) + curr_clock = clock(); +#if (PSTR == 0) + RecursiveStep(0); +#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) + // data analysis part + // Warning NOTE: the variables1 are used as temp storege room + AnalysisStuff(a_lev, dT_mon); + ParallelStep(); +#endif + + // misc::tillherecheck("before Constraint_Out"); + + Constraint_Out(); // this will affect the Dump_List + + LastDump += dT_mon; + Last2dDump += dT_mon; + LastCheck += dT_mon; + + // When LastDump >= DumpTime, output corresponding binary data + if (LastDump >= DumpTime) + { + // misc::tillherecheck("before Dump_Data"); + + for (int lev = 0; lev < GH->levels; lev++) + Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon); +#ifdef WithShell + SH->Dump_Data(DumpList, 0, PhysTime, dT_mon); +#endif + + LastDump = 0; + + if (myrank == 0) + { + cout << " Dump done. " << endl; + } + } + + // When Last2dDump >= d2DumpTime, output corresponding 2D data + if (Last2dDump >= d2DumpTime) + { + // misc::tillherecheck("before 2dDump_Data"); + + for (int lev = 0; lev < GH->levels; lev++) + Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon); + + Last2dDump = 0; + + if (myrank == 0) + { + cout << " 2d Dump done. " << endl; + } + } + + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << endl; + cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " " + << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + // cout << endl; + } + + if (PhysTime >= TotalTime) + break; + +#if (REGLEV == 1) + GH->Regrid(Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor); + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } +#endif + +#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2)) +// GH->Regrid_fake(Symmetry,BH_num,Porgbr,Porg0, +// SynchList_cor,OldStateList,StateList,SynchList_pre, +// fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor); +#endif + #if BSSN_ENABLE_MEM_USAGE_LOG // Retrieve memory usage information used during computation; master process prints it bssn_perf.MemoryUsage(¤t_min, ¤t_avg, ¤t_max, @@ -2323,783 +2323,783 @@ void bssn_class::Evolve(int Steps) cout << endl; } #endif - - // Output puncture positions at each step - if (myrank == 0) - { - for (int i_count=0; i_count= CheckTime, perform runtime checks and output status data - if (LastCheck >= CheckTime) - { - LastCheck = 0; - - CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass); - CheckPoint->writecheck_cgh(PhysTime, GH); -#ifdef WithShell - CheckPoint->writecheck_sh(PhysTime, SH); -#endif - CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas); - } - } - /* - #ifdef With_AHF - // final apparent horizon finding - { - double gam; - for(int ihn=0;ihnCS_Inter(StateList, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; - } - } -#endif - -#endif - - // Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev); - } - -#if 0 - if(lev>0) Parallel::Restrict_after(GH->PatL[lev-1],GH->PatL[lev],StateList,StateList,Symmetry); -#endif - -#if (REGLEV == 0) - if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function implements recursive time-stepping across AMR levels -// This variant handles the cases PSTR == 1 and PSTR == 2 - -//================================================================================================ - -#elif (PSTR == 1 || PSTR == 2) -void bssn_class::RecursiveStep(int lev) -{ - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - - int NoIterations = 1, YN; - if (lev <= trfls) - NoIterations = 1; - else - NoIterations = 2; - - for (int i = 0; i < NoIterations; i++) - { - // if(myrank==0) cout<<"level now = "<mylev; - MPI_Status status; - // receive - if (lev < GH->levels - 1) - { - if (myrank == GH->start_rank[lev]) - { - MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev + 1], 1, MPI_COMM_WORLD, &status); - // cout<Commlev[lev]); - - for (int i = 0; i < BH_num; i++) - for (int j = 0; j < 3; j++) - Porg0[i][j] = tporg[3 * i + j]; - - // if(myrank==GH->start_rank[lev]) cout< 0 && myrank == GH->start_rank[lev]) - { - for (int i = 0; i < BH_num; i++) - for (int j = 0; j < 3; j++) - tporg[3 * i + j] = Porg0[i][j]; - - MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev - 1], 1, MPI_COMM_WORLD); - } - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - delete[] tporg; - delete[] tporgo; -#if (REGLEV == 0) - if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif -} - -//================================================================================================ - - - -//================================================================================================ - -// ParallelStep performs time evolution across AMR levels (parallelized) -// This is an alternate implementation - -//================================================================================================ - -#else -void bssn_class::ParallelStep() -{ - // stringstream a_stream; - // a_stream.setf(ios::left); - - double *tporg, *tporgo; - tporg = new double[3 * BH_num]; - tporgo = new double[3 * BH_num]; - - int lev = GH->mylev; - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - double dT_levp1 = dT * pow(0.5, Mymax(lev + 1, trfls)); - double dT_levm1 = dT * pow(0.5, Mymax(lev - 1, trfls)); - - int NoIterations = 1, YN; - if (lev <= trfls) - NoIterations = 1; - else - NoIterations = int(pow(2.0, lev - trfls)); - - for (int i = 0; i < NoIterations; i++) - { - // if(myrank==GH->start_rank[lev]) cout<<"level now = "<Commlev[lev],GH->start_rank[lev],a_stream.str()); - Step(lev, YN); - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - -#if (AGM == 2) - if (GH->levels == 1) - { - Enforce_algcon(lev, 0); - } -#endif - - GH->Lt[lev] += dT_lev; - - PhysTime += dT_lev; - -#if (AGM == 2) - if (lev > 0) - { - Enforce_algcon(lev, 0); - if (YN == 1) - Enforce_algcon(lev - 1, 0); - } -#endif - -#if (RPS == 1) - // mesh refinement boundary part - // - // till here the PhysTime has updated dT_lev - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - if (lev < GH->levels - 1) - { - if (lev + 1 <= trfls) - { - // RestrictProlong_aux(lev,1,fgt(PhysTime-dT_lev,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); - RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_lev, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); - } - else - { - // if(myrank==GH->start_rank[lev]) cout<mylev<<", "<Commlev[lev],GH->start_rank[lev],"between RestrictProlong"); - - // RestrictProlong_aux(lev,0,fgt(PhysTime-dT_lev,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); - // RestrictProlong_aux(lev,1,fgt(PhysTime-dT_levp1,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); - RestrictProlong(lev + 1, 0, fgt(PhysTime - dT_lev, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); - RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); - } - } - - // if(myrank==GH->start_rank[lev]) cout<mylev<<", "<Commlev[lev],GH->start_rank[lev],a_stream.str()); - - RestrictProlong(lev, YN, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor); - // RestrictProlong(lev,YN,false,StateList,OldStateList,SynchList_cor); - -// if(myrank==GH->start_rank[lev]) cout<mylev<Commlev[lev],GH->start_rank[lev],a_stream.str()); -#endif - - // Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev); - - { - MPI_Status status; - // receive - if (lev < GH->levels - 1) - { - if (myrank == GH->start_rank[lev]) - { - MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev + 1], 1, MPI_COMM_WORLD, &status); - // cout<Commlev[lev]); - - for (int i = 0; i < BH_num; i++) - for (int j = 0; j < 3; j++) - Porg0[i][j] = tporg[3 * i + j]; - - // if(myrank==GH->start_rank[lev]) cout< 0 && YN == 1 && myrank == GH->start_rank[lev]) - { - for (int i = 0; i < BH_num; i++) - for (int j = 0; j < 3; j++) - tporg[3 * i + j] = Porg0[i][j]; - - MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev - 1], 1, MPI_COMM_WORLD); - } - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - } -#if (REGLEV == 0) - // for higher level - if (lev < GH->levels - 1) - { - if (lev + 1 >= GH->movls) - { - // GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0, - if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor)) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Regrid_Onelevel_aux for higher level"; - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - } - - // for this level - if (YN == 1) - { - if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Regrid_Onelevel"; - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - - // for lower level - if (lev - 1 >= GH->movls) - { - if (lev - 1 <= trfls) - { - if (YN == 1) - { - // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, - if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Regrid_Onelevel_aux for lower level"; - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - } - else - { - if (i % 4 == 3) - { - // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, - if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Regrid_Onelevel_aux for lower level"; - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - } - } -#endif - } - -#ifdef WithShell - SHStep(); - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - -#if (RPS == 1) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(StateList, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - } -#endif - -#endif - -#if 0 - if(lev>0) Parallel::Restrict_after(GH->PatL[lev-1],GH->PatL[lev],StateList,StateList,Symmetry); -#endif - - delete[] tporg; - delete[] tporgo; -} -#endif - -//================================================================================================ - - - -//================================================================================================ - -// ParallelStep performs time evolution across AMR levels (parallelized) -// This is another implementation, for the case PSTR == 3 - -//================================================================================================ - -#elif (PSTR == 3) -#warning "remember do not use Shell" -void bssn_class::ParallelStep() -{ - // stringstream a_stream; - // a_stream.setf(ios::left); - - double *tporg, *tporgo; - tporg = new double[3 * BH_num]; - tporgo = new double[3 * BH_num]; - - int lev = GH->mylev; - double dT_lev = dT * pow(0.5, Mymax(GH->levels - 1, trfls)); - if (lev == 1) - { - lev = GH->levels - 1; - for (int i = 0; i < misc::MYpow2(lev); i++) - { - Step(lev, i % 2); - PhysTime += dT_lev; - // if(myrank==nprocs-1) cout<<"OOO level now = "<levels - 2; - for (int i = 1; i < misc::MYpow2(lev + 1); i++) - { - RecursiveStep(lev, i); - PhysTime += dT_lev; - if (i % 2 == 0) - { - // if(myrank==0) cout<<"level now = "<mylev; - if (lev == -1) - lev = 0; - else - lev = GH->levels - 1; - - { - MPI_Status status; - // receive - if (lev == 0) - { - if (myrank == GH->start_rank[lev]) - { - MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[GH->levels - 1], 1, MPI_COMM_WORLD, &status); - // cout<Commlev[lev]); - - for (int i = 0; i < BH_num; i++) - for (int j = 0; j < 3; j++) - Porg0[i][j] = tporg[3 * i + j]; - - // if(myrank==GH->start_rank[lev]) cout<start_rank[lev]) - { - for (int i = 0; i < BH_num; i++) - for (int j = 0; j < 3; j++) - tporg[3 * i + j] = Porg0[i][j]; - - MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[0], 1, MPI_COMM_WORLD); - } - } - - delete[] tporg; - delete[] tporgo; -} - -//================================================================================================ - - - - -//================================================================================================ - -// This member function implements recursive time-stepping across AMR levels - -//================================================================================================ - -void bssn_class::RecursiveStep(int lev, int num) // in all 2^(lev+1)-1 steps -{ - if (trfls > 0) - cout << "error: bssn_class::RecursiveStep does not support trfls > 0 yet" << endl; - - if (num / 2 * 2 == num) - RecursiveStep(lev - 1, num / 2); - else - { - Step(lev, 0); - double dT_lev = dT * pow(0.5, Mymax(lev + 1, trfls)); - if (myrank == 0) - cout << "level now = " << lev + 1 << ", " << (num - 1) % 2 << ", " - << fgt(PhysTime - dT_lev, StartTime, dT_lev / 2) << endl; - RestrictProlong(lev + 1, (num - 1) % 2, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor); - } -} -#endif - -//================================================================================================ - - - - -//================================================================================================ - -// This member function configures a single time-step evolution for each grid level. -// Applicable for the case PSTR == 0 - -//================================================================================================ - -#if (PSTR == 0) -#if 1 + + // Output puncture positions at each step + if (myrank == 0) + { + for (int i_count=0; i_count= CheckTime, perform runtime checks and output status data + if (LastCheck >= CheckTime) + { + LastCheck = 0; + + CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass); + CheckPoint->writecheck_cgh(PhysTime, GH); +#ifdef WithShell + CheckPoint->writecheck_sh(PhysTime, SH); +#endif + CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas); + } + } + /* + #ifdef With_AHF + // final apparent horizon finding + { + double gam; + for(int ihn=0;ihnCS_Inter(StateList, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; + } + } +#endif + +#endif + + // Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev); + } + +#if 0 + if(lev>0) Parallel::Restrict_after(GH->PatL[lev-1],GH->PatL[lev],StateList,StateList,Symmetry); +#endif + +#if (REGLEV == 0) + if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } +#endif +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function implements recursive time-stepping across AMR levels +// This variant handles the cases PSTR == 1 and PSTR == 2 + +//================================================================================================ + +#elif (PSTR == 1 || PSTR == 2) +void bssn_class::RecursiveStep(int lev) +{ + double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); + + int NoIterations = 1, YN; + if (lev <= trfls) + NoIterations = 1; + else + NoIterations = 2; + + for (int i = 0; i < NoIterations; i++) + { + // if(myrank==0) cout<<"level now = "<mylev; + MPI_Status status; + // receive + if (lev < GH->levels - 1) + { + if (myrank == GH->start_rank[lev]) + { + MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev + 1], 1, MPI_COMM_WORLD, &status); + // cout<Commlev[lev]); + + for (int i = 0; i < BH_num; i++) + for (int j = 0; j < 3; j++) + Porg0[i][j] = tporg[3 * i + j]; + + // if(myrank==GH->start_rank[lev]) cout< 0 && myrank == GH->start_rank[lev]) + { + for (int i = 0; i < BH_num; i++) + for (int j = 0; j < 3; j++) + tporg[3 * i + j] = Porg0[i][j]; + + MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev - 1], 1, MPI_COMM_WORLD); + } + + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); + } + delete[] tporg; + delete[] tporgo; +#if (REGLEV == 0) + if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } +#endif +} + +//================================================================================================ + + + +//================================================================================================ + +// ParallelStep performs time evolution across AMR levels (parallelized) +// This is an alternate implementation + +//================================================================================================ + +#else +void bssn_class::ParallelStep() +{ + // stringstream a_stream; + // a_stream.setf(ios::left); + + double *tporg, *tporgo; + tporg = new double[3 * BH_num]; + tporgo = new double[3 * BH_num]; + + int lev = GH->mylev; + double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); + double dT_levp1 = dT * pow(0.5, Mymax(lev + 1, trfls)); + double dT_levm1 = dT * pow(0.5, Mymax(lev - 1, trfls)); + + int NoIterations = 1, YN; + if (lev <= trfls) + NoIterations = 1; + else + NoIterations = int(pow(2.0, lev - trfls)); + + for (int i = 0; i < NoIterations; i++) + { + // if(myrank==GH->start_rank[lev]) cout<<"level now = "<Commlev[lev],GH->start_rank[lev],a_stream.str()); + Step(lev, YN); + + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); + +#if (AGM == 2) + if (GH->levels == 1) + { + Enforce_algcon(lev, 0); + } +#endif + + GH->Lt[lev] += dT_lev; + + PhysTime += dT_lev; + +#if (AGM == 2) + if (lev > 0) + { + Enforce_algcon(lev, 0); + if (YN == 1) + Enforce_algcon(lev - 1, 0); + } +#endif + +#if (RPS == 1) + // mesh refinement boundary part + // + // till here the PhysTime has updated dT_lev + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); + if (lev < GH->levels - 1) + { + if (lev + 1 <= trfls) + { + // RestrictProlong_aux(lev,1,fgt(PhysTime-dT_lev,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); + RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_lev, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); + } + else + { + // if(myrank==GH->start_rank[lev]) cout<mylev<<", "<Commlev[lev],GH->start_rank[lev],"between RestrictProlong"); + + // RestrictProlong_aux(lev,0,fgt(PhysTime-dT_lev,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); + // RestrictProlong_aux(lev,1,fgt(PhysTime-dT_levp1,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); + RestrictProlong(lev + 1, 0, fgt(PhysTime - dT_lev, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); + RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); + } + } + + // if(myrank==GH->start_rank[lev]) cout<mylev<<", "<Commlev[lev],GH->start_rank[lev],a_stream.str()); + + RestrictProlong(lev, YN, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor); + // RestrictProlong(lev,YN,false,StateList,OldStateList,SynchList_cor); + +// if(myrank==GH->start_rank[lev]) cout<mylev<Commlev[lev],GH->start_rank[lev],a_stream.str()); +#endif + + // Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev); + + { + MPI_Status status; + // receive + if (lev < GH->levels - 1) + { + if (myrank == GH->start_rank[lev]) + { + MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev + 1], 1, MPI_COMM_WORLD, &status); + // cout<Commlev[lev]); + + for (int i = 0; i < BH_num; i++) + for (int j = 0; j < 3; j++) + Porg0[i][j] = tporg[3 * i + j]; + + // if(myrank==GH->start_rank[lev]) cout< 0 && YN == 1 && myrank == GH->start_rank[lev]) + { + for (int i = 0; i < BH_num; i++) + for (int j = 0; j < 3; j++) + tporg[3 * i + j] = Porg0[i][j]; + + MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev - 1], 1, MPI_COMM_WORLD); + } + + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); + } +#if (REGLEV == 0) + // for higher level + if (lev < GH->levels - 1) + { + if (lev + 1 >= GH->movls) + { + // GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0, + if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor)) + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } + + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Regrid_Onelevel_aux for higher level"; + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); + } + } + + // for this level + if (YN == 1) + { + if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } + + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Regrid_Onelevel"; + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); + } + + // for lower level + if (lev - 1 >= GH->movls) + { + if (lev - 1 <= trfls) + { + if (YN == 1) + { + // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, + if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } + + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Regrid_Onelevel_aux for lower level"; + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); + } + } + else + { + if (i % 4 == 3) + { + // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, + if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } + + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Regrid_Onelevel_aux for lower level"; + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); + } + } + } +#endif + } + +#ifdef WithShell + SHStep(); + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); + +#if (RPS == 1) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->CS_Inter(StateList, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + // a_stream.clear(); + // a_stream.str(""); + // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); + } +#endif + +#endif + +#if 0 + if(lev>0) Parallel::Restrict_after(GH->PatL[lev-1],GH->PatL[lev],StateList,StateList,Symmetry); +#endif + + delete[] tporg; + delete[] tporgo; +} +#endif + +//================================================================================================ + + + +//================================================================================================ + +// ParallelStep performs time evolution across AMR levels (parallelized) +// This is another implementation, for the case PSTR == 3 + +//================================================================================================ + +#elif (PSTR == 3) +#warning "remember do not use Shell" +void bssn_class::ParallelStep() +{ + // stringstream a_stream; + // a_stream.setf(ios::left); + + double *tporg, *tporgo; + tporg = new double[3 * BH_num]; + tporgo = new double[3 * BH_num]; + + int lev = GH->mylev; + double dT_lev = dT * pow(0.5, Mymax(GH->levels - 1, trfls)); + if (lev == 1) + { + lev = GH->levels - 1; + for (int i = 0; i < misc::MYpow2(lev); i++) + { + Step(lev, i % 2); + PhysTime += dT_lev; + // if(myrank==nprocs-1) cout<<"OOO level now = "<levels - 2; + for (int i = 1; i < misc::MYpow2(lev + 1); i++) + { + RecursiveStep(lev, i); + PhysTime += dT_lev; + if (i % 2 == 0) + { + // if(myrank==0) cout<<"level now = "<mylev; + if (lev == -1) + lev = 0; + else + lev = GH->levels - 1; + + { + MPI_Status status; + // receive + if (lev == 0) + { + if (myrank == GH->start_rank[lev]) + { + MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[GH->levels - 1], 1, MPI_COMM_WORLD, &status); + // cout<Commlev[lev]); + + for (int i = 0; i < BH_num; i++) + for (int j = 0; j < 3; j++) + Porg0[i][j] = tporg[3 * i + j]; + + // if(myrank==GH->start_rank[lev]) cout<start_rank[lev]) + { + for (int i = 0; i < BH_num; i++) + for (int j = 0; j < 3; j++) + tporg[3 * i + j] = Porg0[i][j]; + + MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[0], 1, MPI_COMM_WORLD); + } + } + + delete[] tporg; + delete[] tporgo; +} + +//================================================================================================ + + + + +//================================================================================================ + +// This member function implements recursive time-stepping across AMR levels + +//================================================================================================ + +void bssn_class::RecursiveStep(int lev, int num) // in all 2^(lev+1)-1 steps +{ + if (trfls > 0) + cout << "error: bssn_class::RecursiveStep does not support trfls > 0 yet" << endl; + + if (num / 2 * 2 == num) + RecursiveStep(lev - 1, num / 2); + else + { + Step(lev, 0); + double dT_lev = dT * pow(0.5, Mymax(lev + 1, trfls)); + if (myrank == 0) + cout << "level now = " << lev + 1 << ", " << (num - 1) % 2 << ", " + << fgt(PhysTime - dT_lev, StartTime, dT_lev / 2) << endl; + RestrictProlong(lev + 1, (num - 1) % 2, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor); + } +} +#endif + +//================================================================================================ + + + + +//================================================================================================ + +// This member function configures a single time-step evolution for each grid level. +// Applicable for the case PSTR == 0 + +//================================================================================================ + +#if (PSTR == 0) +#if 1 void bssn_class::Step(int lev, int YN) { setpbh(BH_num, Porg0, Mass, BH_num_input); @@ -3112,69 +3112,69 @@ void bssn_class::Step(int lev, int YN) #endif // new code 2013-2-15, zjcao -#if (MAPBH == 1) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - for (int ith = 0; ith < 3; ith++) - Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev; - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } -#endif - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { +#if (MAPBH == 1) + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + for (int ith = 0; ith < 3; ith++) + Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev; + if (Symmetry > 0) + Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); + if (Symmetry == 2) + { + Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); + Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); + } + if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" + << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; + + MyList *DG_List = new MyList(Sfx0); + DG_List->insert(Sfx0); + DG_List->insert(Sfy0); + DG_List->insert(Sfz0); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); + DG_List->clearList(); + } + } + } + + // data analysis part + // Warning NOTE: the variables1 are used as temp storege room + if (lev == a_lev) + { + AnalysisStuff(lev, dT_lev); + } +#endif + +#ifdef With_AHF + AH_Step_Find(lev, dT_lev); +#endif + bool BB = fgt(PhysTime, StartTime, dT_lev / 2); + double ndeps = numepss; + if (lev < GH->movls) + ndeps = numepsb; + double TRK4 = PhysTime; + int iter_count = 0; // count RK4 substeps + int pre = 0, cor = 1; + int ERROR = 0; + + MyList *sPp; + // Predictor + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { #if (AGM == 0) if (!use_cuda_resident_sync) f_enforce_ga(cg->shape, @@ -3336,257 +3336,257 @@ void bssn_class::Step(int lev, int YN) if (!used_gpu_resident_state) f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check rhs - { - SH->Dump_Data(RHSList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check rhs"<data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls + +#ifdef WithShell + // evolve Shell Patches + if (lev == 0) + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); +#endif + + if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, pre)) + { + cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; + // we do not check the correspondence here + + while (varl0) + { + // sommerfeld indeed for outter boudary while fix BD for inner boundary + f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], + sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], + cg->fgfs[varlrhs->data->sgfn], + cg->fgfs[varl0->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); + + f_rungekutta4_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); + + varl0 = varl0->next; + varl = varl->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } +#if 0 +// check rhs + { + SH->Dump_Data(RHSList,0,PhysTime,dT_lev); + if(myrank == 0) + { + cout<<"check rhs"<PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_pre, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } } #endif Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry); - -#ifdef WithShell - // Complete non-blocking error reduction and check - MPI_Wait(&err_req, MPI_STATUS_IGNORE); - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - SH->Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } -#endif - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { + +#ifdef WithShell + // Complete non-blocking error reduction and check + MPI_Wait(&err_req, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); + SH->Dump_Data(StateList, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } +#endif + +#if (MAPBH == 0) + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); + if (Symmetry > 0) + Porg[ithBH][2] = fabs(Porg[ithBH][2]); + if (Symmetry == 2) + { + Porg[ithBH][0] = fabs(Porg[ithBH][0]); + Porg[ithBH][1] = fabs(Porg[ithBH][1]); + } + if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" + << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; + + MyList *DG_List = new MyList(Sfx0); + DG_List->insert(Sfx0); + DG_List->insert(Sfy0); + DG_List->insert(Sfz0); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); + DG_List->clearList(); + } + } + } + // data analysis part + // Warning NOTE: the variables1 are used as temp storege room + if (lev == a_lev) + { + AnalysisStuff(lev, dT_lev); + } +#endif + + // corrector + for (iter_count = 1; iter_count < 4; iter_count++) + { + // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; + if (iter_count == 1 || iter_count == 3) + TRK4 += dT_lev / 2; + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { #if (AGM == 0) if (!use_cuda_resident_sync) f_enforce_ga(cg->shape, @@ -3757,286 +3757,286 @@ void bssn_class::Step(int lev, int YN) if (!used_gpu_resident_state) f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, cor)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // Non-blocking error reduction overlapped with Sync to hide Allreduce latency - MPI_Request err_req_cor; - { - int erh = ERROR; - MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); - } -#endif + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls + +#ifdef WithShell + // evolve Shell Patches + if (lev == 0) + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#elif (AGM == 1) + if (iter_count == 3) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#endif + + if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], + cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], + cg->fgfs[Lap->sgfn], + cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], + cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], + cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], + cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], + cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], + cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], + cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], + cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], + cg->fgfs[Lap1->sgfn], + cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], + cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], + cg->fgfs[rho->sgfn], + cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, cor)) + { + cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; + // we do not check the correspondence here + + while (varl0) + { + // sommerfeld indeed for outter boudary while fix BD for inner boundary + f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], + sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varl->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); + + f_rungekutta4_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); + + varl0 = varl0->next; + varl = varl->next; + varl1 = varl1->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req_cor; + { + int erh = ERROR; + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); + } +#endif Parallel::AsyncSyncState async_cor; Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } #endif Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); - -#ifdef WithShell - // Complete non-blocking error reduction and check - MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif - - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } -#endif - } + +#ifdef WithShell + // Complete non-blocking error reduction and check + MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); + SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count + << " variables at t = " << PhysTime + << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } +#endif + +#if (MAPBH == 0) + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); + if (Symmetry > 0) + Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); + if (Symmetry == 2) + { + Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); + Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); + } + if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" + << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] + << ")" << endl; + + MyList *DG_List = new MyList(Sfx0); + DG_List->insert(Sfx0); + DG_List->insert(Sfy0); + DG_List->insert(Sfz0); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); + DG_List->clearList(); + } + } + } +#endif + + // swap time level + if (iter_count < 3) + { + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(SynchList_pre, SynchList_cor, myrank); + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } +#ifdef WithShell + if (lev == 0) + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(SynchList_pre, SynchList_cor, myrank); + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } +#endif + +#if (MAPBH == 0) + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + Porg[ithBH][0] = Porg1[ithBH][0]; + Porg[ithBH][1] = Porg1[ithBH][1]; + Porg[ithBH][2] = Porg1[ithBH][2]; + } + } +#endif + } } #if USE_CUDA_BSSN if (use_cuda_resident_sync) @@ -4045,1911 +4045,1911 @@ void bssn_class::Step(int lev, int YN) #if (RPS == 0) // mesh refinement boundary part RestrictProlong(lev, YN, BB); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } -#endif - -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check StateList - { - SH->Dump_Data(StateList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check StateList"< 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - } - } -} - -//================================================================================================ - - - - -//================================================================================================ - -// This member function implements single-step time evolution for each AMR level (alternate) - -//================================================================================================ - -// ICN for bam comparison - -#else -void bssn_class::Step(int lev, int YN) -{ - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - while (varl0) - { -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - -#endif - f_icn_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_icn_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check rhs - { - SH->Dump_Data(RHSList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check rhs"<PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } -#endif - Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry); - -#ifdef WithShell - // Complete non-blocking error reduction and check - MPI_Wait(&err_req, MPI_STATUS_IGNORE); - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - SH->Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } - // corrector - for (iter_count = 1; iter_count < 3; iter_count++) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, cor)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); -#endif - f_icn_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, cor)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // Non-blocking error reduction overlapped with Sync to hide Allreduce latency - MPI_Request err_req_cor; - { - int erh = ERROR; - MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); - } -#endif - - Parallel::AsyncSyncState async_cor; - Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } -#endif - Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); - -#ifdef WithShell - // Complete non-blocking error reduction and check - MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#endif - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } - } - } -#if (RPS == 0) - // mesh refinement boundary part - RestrictProlong(lev, YN, BB); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } -#endif - -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check StateList - { - SH->Dump_Data(StateList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check StateList"< 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - } - } -} -#endif - -//================================================================================================ - - - -//================================================================================================ - -// This member function implements single-step time evolution for each AMR level -// Variant for the case PSTR == 0 - -//================================================================================================ - -#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) -void bssn_class::Step(int lev, int YN) -{ - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step"); - - setpbh(BH_num, Porg0, Mass, BH_num_input); - - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - -// new code 2013-2-15, zjcao -#if (MAPBH == 1) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - for (int ith = 0; ith < 3; ith++) - Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev; - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor"); - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) -#warning "shell part still bam type" - if (lev == 0) // Shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], - varl0->data->SoA, - Symmetry, pre); -#endif - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation"); - - // Non-blocking error reduction overlapped with Sync to hide Allreduce latency - MPI_Request err_req; - { - int erh = ERROR; - MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req); - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync"); - - Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]); - - // Complete non-blocking error reduction and check - MPI_Wait(&err_req, MPI_STATUS_IGNORE); - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector"); - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"head of Corrector"); - - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, cor)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) - if (lev == 1) // shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn], - varl0->data->SoA, - Symmetry, cor); -#endif - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check"); - - // Non-blocking error reduction overlapped with Sync to hide Allreduce latency - MPI_Request err_req_cor; - { - int erh = ERROR; - MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req_cor); - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync"); - - Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]); - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync"); - - // Complete non-blocking error reduction and check - MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector of black hole position"); -#endif - - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after pre cor swap"); - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } -#endif - } - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"tail of corrector"); - } -#if (RPS == 0) - // mesh refinement boundary part - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before RestrictProlong"); - RestrictProlong(lev, YN, BB); -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - // if(myrank==GH->start_rank[lev]) - // cout<start_rank[lev]) cout<mylev<Commlev[lev],GH->start_rank[lev],"complet GH Step"); -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function configures a single time-step evolution for the spherical-shell grid portion. - -//================================================================================================ - -#ifdef WithShell -void bssn_class::SHStep() -{ - int lev = 0; - // #if (PSTR == 1 || PSTR == 2) - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step"); - // #endif - - setpbh(BH_num, Porg0, Mass, BH_num_input); - - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - - // #if (PSTR == 1 || PSTR == 2) - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor"); - // #endif - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - -#if (PSTR == 1 || PSTR == 2) -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor's error check"); -#endif - // Non-blocking error reduction overlapped with Synch to hide Allreduce latency - MPI_Request err_req; - { - int erh = ERROR; - MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req); - } - - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } - - // Complete non-blocking error reduction and check - MPI_Wait(&err_req, MPI_STATUS_IGNORE); - if (ERROR) - { - SH->Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, cor)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // Non-blocking error reduction overlapped with Synch to hide Allreduce latency - MPI_Request err_req_cor; - { - int erh = ERROR; - MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); - } - - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } - - // Complete non-blocking error reduction and check - MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); - if (ERROR) - { - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count - << " variables at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#if (RPS == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -} -#endif -#endif - -//================================================================================================ - - - -//================================================================================================ - + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->CS_Inter(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } +#endif + +#endif + // note the data structure before update + // SynchList_cor 1 ----------- + // + // StateList 0 ----------- + // + // OldStateList old ----------- + // update + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(StateList, SynchList_cor, myrank); + cg->swapList(OldStateList, SynchList_cor, myrank); + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } +#ifdef WithShell + if (lev == 0) + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(StateList, SynchList_cor, myrank); + cg->swapList(OldStateList, SynchList_cor, myrank); + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } +#if 0 +// check StateList + { + SH->Dump_Data(StateList,0,PhysTime,dT_lev); + if(myrank == 0) + { + cout<<"check StateList"< 0 && lev == GH->levels - 1) + { + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + Porg0[ithBH][0] = Porg1[ithBH][0]; + Porg0[ithBH][1] = Porg1[ithBH][1]; + Porg0[ithBH][2] = Porg1[ithBH][2]; + } + } +} + +//================================================================================================ + + + + +//================================================================================================ + +// This member function implements single-step time evolution for each AMR level (alternate) + +//================================================================================================ + +// ICN for bam comparison + +#else +void bssn_class::Step(int lev, int YN) +{ + double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); +#ifdef With_AHF + AH_Step_Find(lev, dT_lev); +#endif + bool BB = fgt(PhysTime, StartTime, dT_lev / 2); + double ndeps = numepss; + if (lev < GH->movls) + ndeps = numepsb; + double TRK4 = PhysTime; + int iter_count = 0; // count RK4 substeps + int pre = 0, cor = 1; + int ERROR = 0; + + MyList *sPp; + // Predictor + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); +#endif + + if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, ndeps, pre)) + { + cout << "find NaN in domain: (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; + // we do not check the correspondence here + while (varl0) + { +#ifndef WithShell + if (lev == 0) // sommerfeld indeed + f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + cg->fgfs[varlrhs->data->sgfn], + cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA, + Symmetry); + +#endif + f_icn_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); +#ifndef WithShell + if (lev > 0) // fix BD point +#endif + f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + dT_lev, + cg->fgfs[phi0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], + varl0->data->SoA, + Symmetry, cor); + + varl0 = varl0->next; + varl = varl->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls + +#ifdef WithShell + // evolve Shell Patches + if (lev == 0) + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); +#endif + + if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, pre)) + { + cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here + while (varl0) + { + // sommerfeld indeed for outter boudary while fix BD for inner boundary + f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], + sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], + cg->fgfs[varlrhs->data->sgfn], + cg->fgfs[varl0->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); + + f_icn_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); + + varl0 = varl0->next; + varl = varl->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } +#if 0 +// check rhs + { + SH->Dump_Data(RHSList,0,PhysTime,dT_lev); + if(myrank == 0) + { + cout<<"check rhs"<PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre); + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_pre, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } +#endif + Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry); + +#ifdef WithShell + // Complete non-blocking error reduction and check + MPI_Wait(&err_req, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); + SH->Dump_Data(StateList, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime + << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } +#endif + + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); + f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); + f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); + if (Symmetry > 0) + Porg[ithBH][2] = fabs(Porg[ithBH][2]); + if (Symmetry == 2) + { + Porg[ithBH][0] = fabs(Porg[ithBH][0]); + Porg[ithBH][1] = fabs(Porg[ithBH][1]); + } + if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" + << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] + << ")" << endl; + + MyList *DG_List = new MyList(Sfx0); + DG_List->insert(Sfx0); + DG_List->insert(Sfy0); + DG_List->insert(Sfz0); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); + DG_List->clearList(); + } + } + } + // data analysis part + // Warning NOTE: the variables1 are used as temp storege room + if (lev == a_lev) + { + AnalysisStuff(lev, dT_lev); + } + // corrector + for (iter_count = 1; iter_count < 3; iter_count++) + { + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#elif (AGM == 1) + if (iter_count == 3) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#endif + + if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], + cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], + cg->fgfs[Lap->sgfn], + cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], + cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], + cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], + cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], + cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], + cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], + cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], + cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], + cg->fgfs[Lap1->sgfn], + cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], + cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], + cg->fgfs[rho->sgfn], + cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, ndeps, cor)) + { + cout << "find NaN in domain: (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; + // we do not check the correspondence here + + while (varl0) + { +#ifndef WithShell + if (lev == 0) // sommerfeld indeed + f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varl->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); +#endif + f_icn_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); + +#ifndef WithShell + if (lev > 0) // fix BD point +#endif + f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + dT_lev, + cg->fgfs[phi0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], + varl0->data->SoA, + Symmetry, cor); + + varl0 = varl0->next; + varl = varl->next; + varl1 = varl1->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls + +#ifdef WithShell + // evolve Shell Patches + if (lev == 0) + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#elif (AGM == 1) + if (iter_count == 3) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#endif + + if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], + cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], + cg->fgfs[Lap->sgfn], + cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], + cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], + cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], + cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], + cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], + cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], + cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], + cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], + cg->fgfs[Lap1->sgfn], + cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], + cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], + cg->fgfs[rho->sgfn], + cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, cor)) + { + cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; + // we do not check the correspondence here + + while (varl0) + { + // sommerfeld indeed for outter boudary while fix BD for inner boundary + f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], + sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varl->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); + + f_rungekutta4_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); + + varl0 = varl0->next; + varl = varl->next; + varl1 = varl1->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req_cor; + { + int erh = ERROR; + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); + } +#endif + + Parallel::AsyncSyncState async_cor; + Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } +#endif + Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); + +#ifdef WithShell + // Complete non-blocking error reduction and check + MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); + SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count + << " variables at t = " << PhysTime + << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } +#endif + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); + f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); + f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); + if (Symmetry > 0) + Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); + if (Symmetry == 2) + { + Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); + Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); + } + if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" + << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] + << ")" << endl; + + MyList *DG_List = new MyList(Sfx0); + DG_List->insert(Sfx0); + DG_List->insert(Sfy0); + DG_List->insert(Sfz0); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); + DG_List->clearList(); + } + } + } + // swap time level + if (iter_count < 3) + { + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(SynchList_pre, SynchList_cor, myrank); + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } +#ifdef WithShell + if (lev == 0) + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(SynchList_pre, SynchList_cor, myrank); + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } +#endif + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + Porg[ithBH][0] = Porg1[ithBH][0]; + Porg[ithBH][1] = Porg1[ithBH][1]; + Porg[ithBH][2] = Porg1[ithBH][2]; + } + } + } + } +#if (RPS == 0) + // mesh refinement boundary part + RestrictProlong(lev, YN, BB); + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->CS_Inter(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } +#endif + +#endif + // note the data structure before update + // SynchList_cor 1 ----------- + // + // StateList 0 ----------- + // + // OldStateList old ----------- + // update + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(StateList, SynchList_cor, myrank); + cg->swapList(OldStateList, SynchList_cor, myrank); + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } +#ifdef WithShell + if (lev == 0) + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(StateList, SynchList_cor, myrank); + cg->swapList(OldStateList, SynchList_cor, myrank); + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } +#if 0 +// check StateList + { + SH->Dump_Data(StateList,0,PhysTime,dT_lev); + if(myrank == 0) + { + cout<<"check StateList"< 0 && lev == GH->levels - 1) + { + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + Porg0[ithBH][0] = Porg1[ithBH][0]; + Porg0[ithBH][1] = Porg1[ithBH][1]; + Porg0[ithBH][2] = Porg1[ithBH][2]; + } + } +} +#endif + +//================================================================================================ + + + +//================================================================================================ + +// This member function implements single-step time evolution for each AMR level +// Variant for the case PSTR == 0 + +//================================================================================================ + +#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) +void bssn_class::Step(int lev, int YN) +{ + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step"); + + setpbh(BH_num, Porg0, Mass, BH_num_input); + + double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); + +// new code 2013-2-15, zjcao +#if (MAPBH == 1) + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + for (int ith = 0; ith < 3; ith++) + Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev; + if (Symmetry > 0) + Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); + if (Symmetry == 2) + { + Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); + Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); + } + if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" + << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] + << ")" << endl; + + MyList *DG_List = new MyList(Sfx0); + DG_List->insert(Sfx0); + DG_List->insert(Sfy0); + DG_List->insert(Sfz0); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); + DG_List->clearList(); + } + } + } +#endif + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor"); + +#ifdef With_AHF + AH_Step_Find(lev, dT_lev); +#endif + bool BB = fgt(PhysTime, StartTime, dT_lev / 2); + double ndeps = numepss; + if (lev < GH->movls) + ndeps = numepsb; + double TRK4 = PhysTime; + int iter_count = 0; // count RK4 substeps + int pre = 0, cor = 1; + int ERROR = 0; + + MyList *sPp; + // Predictor + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); +#endif + + if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, ndeps, pre)) + { + cout << "find NaN in domain: (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here + while (varl0) + { +#if (SommerType == 0) +#ifndef WithShell + if (lev == 0) // sommerfeld indeed + f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + cg->fgfs[varlrhs->data->sgfn], + cg->fgfs[varl0->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); + +#endif +#endif + f_rungekutta4_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); +#ifndef WithShell + if (lev > 0) // fix BD point +#endif + f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + dT_lev, + cg->fgfs[phi0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], + varl0->data->SoA, + Symmetry, cor); + +#if (SommerType == 1) +#warning "shell part still bam type" + if (lev == 0) // Shibata type sommerfeld + f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + dT_lev, + cg->fgfs[phi0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], + varl0->data->SoA, + Symmetry, pre); +#endif + + varl0 = varl0->next; + varl = varl->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation"); + + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req; + { + int erh = ERROR; + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req); + } + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync"); + + Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]); + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + +#if (MAPBH == 0) + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); + if (Symmetry > 0) + Porg[ithBH][2] = fabs(Porg[ithBH][2]); + if (Symmetry == 2) + { + Porg[ithBH][0] = fabs(Porg[ithBH][0]); + Porg[ithBH][1] = fabs(Porg[ithBH][1]); + } + if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" + << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; + + MyList *DG_List = new MyList(Sfx0); + DG_List->insert(Sfx0); + DG_List->insert(Sfy0); + DG_List->insert(Sfz0); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); + DG_List->clearList(); + } + } + } +#endif + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector"); + + // corrector + for (iter_count = 1; iter_count < 4; iter_count++) + { + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"head of Corrector"); + + // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; + if (iter_count == 1 || iter_count == 3) + TRK4 += dT_lev / 2; + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#elif (AGM == 1) + if (iter_count == 3) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#endif + + if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], + cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], + cg->fgfs[Lap->sgfn], + cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], + cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], + cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], + cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], + cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], + cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], + cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], + cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], + cg->fgfs[Lap1->sgfn], + cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], + cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], + cg->fgfs[rho->sgfn], + cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, ndeps, cor)) + { + cout << "find NaN in domain: (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; + // we do not check the correspondence here + while (varl0) + { +#if (SommerType == 0) +#ifndef WithShell + if (lev == 0) // sommerfeld indeed + f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varl->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); +#endif +#endif + f_rungekutta4_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); + +#ifndef WithShell + if (lev > 0) // fix BD point +#endif + f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + dT_lev, + cg->fgfs[phi0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], + varl0->data->SoA, + Symmetry, cor); + +#if (SommerType == 1) + if (lev == 1) // shibata type sommerfeld + f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], + Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], + dT_lev, + cg->fgfs[phi0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn], + varl0->data->SoA, + Symmetry, cor); +#endif + + varl0 = varl0->next; + varl = varl->next; + varl1 = varl1->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check"); + + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req_cor; + { + int erh = ERROR; + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req_cor); + } + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync"); + + Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]); + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync"); + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count + << " variables at t = " << PhysTime + << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + +#if (MAPBH == 0) + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); + f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); + if (Symmetry > 0) + Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); + if (Symmetry == 2) + { + Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); + Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); + } + if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" + << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] + << ")" << endl; + + MyList *DG_List = new MyList(Sfx0); + DG_List->insert(Sfx0); + DG_List->insert(Sfy0); + DG_List->insert(Sfz0); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); + DG_List->clearList(); + } + } + } +// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector of black hole position"); +#endif + + // swap time level + if (iter_count < 3) + { + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(SynchList_pre, SynchList_cor, myrank); + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after pre cor swap"); + +#if (MAPBH == 0) + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + Porg[ithBH][0] = Porg1[ithBH][0]; + Porg[ithBH][1] = Porg1[ithBH][1]; + Porg[ithBH][2] = Porg1[ithBH][2]; + } + } +#endif + } + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"tail of corrector"); + } +#if (RPS == 0) + // mesh refinement boundary part + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before RestrictProlong"); + RestrictProlong(lev, YN, BB); +#endif + // note the data structure before update + // SynchList_cor 1 ----------- + // + // StateList 0 ----------- + // + // OldStateList old ----------- + // update + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(StateList, SynchList_cor, myrank); + cg->swapList(OldStateList, SynchList_cor, myrank); + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + // for black hole position + if (BH_num > 0 && lev == GH->levels - 1) + { + for (int ithBH = 0; ithBH < BH_num; ithBH++) + { + Porg0[ithBH][0] = Porg1[ithBH][0]; + Porg0[ithBH][1] = Porg1[ithBH][1]; + Porg0[ithBH][2] = Porg1[ithBH][2]; + // if(myrank==GH->start_rank[lev]) + // cout<start_rank[lev]) cout<mylev<Commlev[lev],GH->start_rank[lev],"complet GH Step"); +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function configures a single time-step evolution for the spherical-shell grid portion. + +//================================================================================================ + +#ifdef WithShell +void bssn_class::SHStep() +{ + int lev = 0; + // #if (PSTR == 1 || PSTR == 2) + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step"); + // #endif + + setpbh(BH_num, Porg0, Mass, BH_num_input); + + double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); + + // #if (PSTR == 1 || PSTR == 2) + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor"); + // #endif + +#ifdef With_AHF + AH_Step_Find(lev, dT_lev); +#endif + bool BB = fgt(PhysTime, StartTime, dT_lev / 2); + double ndeps = numepss; + if (lev < GH->movls) + ndeps = numepsb; + double TRK4 = PhysTime; + int iter_count = 0; // count RK4 substeps + int pre = 0, cor = 1; + int ERROR = 0; + + MyList *sPp; + // Predictor + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); +#endif + + if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, pre)) + { + cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; + // we do not check the correspondence here + + while (varl0) + { + // sommerfeld indeed for outter boudary while fix BD for inner boundary + f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], + sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], + cg->fgfs[varlrhs->data->sgfn], + cg->fgfs[varl0->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); + + f_rungekutta4_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); + + varl0 = varl0->next; + varl = varl->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + +#if (PSTR == 1 || PSTR == 2) +// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor's error check"); +#endif + // Non-blocking error reduction overlapped with Synch to hide Allreduce latency + MPI_Request err_req; + { + int erh = ERROR; + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req); + } + + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_pre, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req, MPI_STATUS_IGNORE); + if (ERROR) + { + SH->Dump_Data(StateList, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + // corrector + for (iter_count = 1; iter_count < 4; iter_count++) + { + // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; + if (iter_count == 1 || iter_count == 3) + TRK4 += dT_lev / 2; + + { + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (AGM == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#elif (AGM == 1) + if (iter_count == 3) + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); +#endif + + if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], + cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], + cg->fgfs[Lap->sgfn], + cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], + cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], + cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], + cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], + cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], + cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], + cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], + cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], + cg->fgfs[Lap1->sgfn], + cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], + cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], + cg->fgfs[rho->sgfn], + cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, cor)) + { + cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" + << cg->bbox[0] << ":" << cg->bbox[3] << "," + << cg->bbox[1] << ":" << cg->bbox[4] << "," + << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; + ERROR = 1; + } + // rk4 substep and boundary + { + MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; + // we do not check the correspondence here + + while (varl0) + { + // sommerfeld indeed for outter boudary while fix BD for inner boundary + f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], + sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varl->data->sgfn], + varl0->data->propspeed, varl0->data->SoA, + Symmetry); + + f_rungekutta4_rout(cg->shape, dT_lev, + cg->fgfs[varl0->data->sgfn], + cg->fgfs[varl1->data->sgfn], + cg->fgfs[varlrhs->data->sgfn], + iter_count); + + varl0 = varl0->next; + varl = varl->next; + varl1 = varl1->next; + varlrhs = varlrhs->next; + } + } + f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } + // Non-blocking error reduction overlapped with Synch to hide Allreduce latency + MPI_Request err_req_cor; + { + int erh = ERROR; + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); + } + + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); + if (ERROR) + { + SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count + << " variables at t = " << PhysTime << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(SynchList_pre, SynchList_cor, myrank); + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } +#if (RPS == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->CS_Inter(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } +#endif + // note the data structure before update + // SynchList_cor 1 ----------- + // + // StateList 0 ----------- + // + // OldStateList old ----------- + // update + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + while (BP) + { + Block *cg = BP->data; + cg->swapList(StateList, SynchList_cor, myrank); + cg->swapList(OldStateList, SynchList_cor, myrank); + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } +} +#endif +#endif + +//================================================================================================ + + + +//================================================================================================ + // 0: do not use mixing two levels data for OutBD; 1: do use #define MIXOUTB 0 @@ -5961,2679 +5961,2679 @@ void bssn_class::SHStep() #endif void bssn_class::RestrictProlong(int lev, int YN, bool BB, MyList *SL, MyList *OL, MyList *corL) -// we assume -// StateList 1 ----------- -// -// OldStateList 0 ----------- -// -// SynchList_cor old ----------- -{ -#if (PSTR == 1 || PSTR == 2) -// stringstream a_stream; -// a_stream.setf(ios::left); -#endif - - if (lev > 0) - { - MyList *Pp, *Ppc; - if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level - { - Pp = GH->PatL[lev - 1]; - while (Pp) - { - if (BB) - Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - else - Parallel::prepare_inter_time_level(Pp->data, SL, OL, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - -#if (PSTR == 1 || PSTR == 2) -// Pp->data->checkPatch(0,GH->start_rank[GH->mylev]); -#endif - Pp = Pp->next; - } - -#if (PSTR == 1 || PSTR == 2) -// Pp=GH->PatL[lev]; -// while(Pp) -// { -// Pp->data->checkPatch(0,GH->start_rank[GH->mylev]); -// Pp=Pp->next; -// } - -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 0 before Restrict"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - -#if (RPB == 0) - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); -#endif - -#if (PSTR == 1 || PSTR == 2) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 0 after Restrict"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - +// we assume +// StateList 1 ----------- +// +// OldStateList 0 ----------- +// +// SynchList_cor old ----------- +{ +#if (PSTR == 1 || PSTR == 2) +// stringstream a_stream; +// a_stream.setf(ios::left); +#endif + + if (lev > 0) + { + MyList *Pp, *Ppc; + if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level + { + Pp = GH->PatL[lev - 1]; + while (Pp) + { + if (BB) + Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL, + SynchList_pre, 0); // use SynchList_pre as temporal storage space + else + Parallel::prepare_inter_time_level(Pp->data, SL, OL, + SynchList_pre, 0); // use SynchList_pre as temporal storage space + +#if (PSTR == 1 || PSTR == 2) +// Pp->data->checkPatch(0,GH->start_rank[GH->mylev]); +#endif + Pp = Pp->next; + } + +#if (PSTR == 1 || PSTR == 2) +// Pp=GH->PatL[lev]; +// while(Pp) +// { +// Pp->data->checkPatch(0,GH->start_rank[GH->mylev]); +// Pp=Pp->next; +// } + +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": 0 before Restrict"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + +#if (RPB == 0) + Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]); +#elif (RPB == 1) + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); +#endif + +#if (PSTR == 1 || PSTR == 2) +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": 0 after Restrict"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1) Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #endif - -#if (PSTR == 1 || PSTR == 2) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 0 after Sync"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); -#endif -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry); -#endif - -#if (PSTR == 1 || PSTR == 2) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 0 after OutBdLow2Hi"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - } - else // no time refinement levels and for all same time levels - { - -#if (PSTR == 1 || PSTR == 2) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 1 before Restrict"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - -#if (RPB == 0) - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); -#endif - -#if (PSTR == 1 || PSTR == 2) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 1 before Sync"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - + +#if (PSTR == 1 || PSTR == 2) +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": 0 after Sync"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + +#if (RPB == 0) +#if (MIXOUTB == 0) + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); +#endif +#elif (RPB == 1) + // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry); + Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry); +#endif + +#if (PSTR == 1 || PSTR == 2) +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": 0 after OutBdLow2Hi"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + } + else // no time refinement levels and for all same time levels + { + +#if (PSTR == 1 || PSTR == 2) +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": 1 before Restrict"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + +#if (RPB == 0) + Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]); +#elif (RPB == 1) + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); +#endif + +#if (PSTR == 1 || PSTR == 2) +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": 1 before Sync"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1) Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); #endif - -#if (PSTR == 1 || PSTR == 2) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 1 after Sync"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); -#endif -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry); -#endif - -#if (PSTR == 1 || PSTR == 2) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 1 after OutBdLow2Hi"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - } - - Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); - -#if (PSTR == 1 || PSTR == 2) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": after Sync"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - } -} - -//================================================================================================ - - - -//================================================================================================ - -// auxiliary operation, input lev means original lev-1 - -void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, - MyList *SL, MyList *OL, MyList *corL) -// we assume -// StateList 1 ----------- -// -// OldStateList 0 ----------- -// -// SynchList_cor old ----------- -{ - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux"); - - if (lev >= GH->levels - 1) - return; - lev = lev + 1; - - if (lev > 0) - { - MyList *Pp, *Ppc; - if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level - { - Pp = GH->PatL[lev - 1]; - while (Pp) - { - if (BB) - Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - else - Parallel::prepare_inter_time_level(Pp->data, SL, OL, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - Pp = Pp->next; - } - -#if (RPB == 0) - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); -#endif - + +#if (PSTR == 1 || PSTR == 2) +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": 1 after Sync"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + +#if (RPB == 0) +#if (MIXOUTB == 0) + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); +#endif +#elif (RPB == 1) + // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); + Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry); +#endif + +#if (PSTR == 1 || PSTR == 2) +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": 1 after OutBdLow2Hi"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + } + + Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); + +#if (PSTR == 1 || PSTR == 2) +// a_stream.clear(); +// a_stream.str(""); +// a_stream<mylev<<": after Sync"; +// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); +#endif + } +} + +//================================================================================================ + + + +//================================================================================================ + +// auxiliary operation, input lev means original lev-1 + +void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, + MyList *SL, MyList *OL, MyList *corL) +// we assume +// StateList 1 ----------- +// +// OldStateList 0 ----------- +// +// SynchList_cor old ----------- +{ + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux"); + + if (lev >= GH->levels - 1) + return; + lev = lev + 1; + + if (lev > 0) + { + MyList *Pp, *Ppc; + if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level + { + Pp = GH->PatL[lev - 1]; + while (Pp) + { + if (BB) + Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL, + SynchList_pre, 0); // use SynchList_pre as temporal storage space + else + Parallel::prepare_inter_time_level(Pp->data, SL, OL, + SynchList_pre, 0); // use SynchList_pre as temporal storage space + Pp = Pp->next; + } + +#if (RPB == 0) + Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]); +#elif (RPB == 1) + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); +#endif + #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1) Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #endif - -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); -#endif -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry); -#endif - } - else // no time refinement levels and for all same time levels - { -#if (RPB == 0) - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); -#endif - + +#if (RPB == 0) +#if (MIXOUTB == 0) + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); +#endif +#elif (RPB == 1) + // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry); + Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry); +#endif + } + else // no time refinement levels and for all same time levels + { +#if (RPB == 0) + Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]); +#elif (RPB == 1) + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); +#endif + #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1) Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); #endif - -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); -#endif -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry); -#endif - } - - Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); - } -} - -//================================================================================================ - - - -//================================================================================================ - -void bssn_class::RestrictProlong(int lev, int YN, bool BB) -{ - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - // we assume for fine - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // for coarse - // StateList 1 ----------- - // - // OldStateList 0 ----------- - // - // SynchList_cor old ----------- - if (lev > 0) - { - MyList *Pp, *Ppc; - if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level - { - if (myrank == 0) - cout << "/=: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; - Pp = GH->PatL[lev - 1]; - while (Pp) - { - if (BB) - Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - else - Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - Pp = Pp->next; - } - -#if (RPB == 0) - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); -#endif - + +#if (RPB == 0) +#if (MIXOUTB == 0) + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); +#endif +#elif (RPB == 1) + // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); + Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry); +#endif + } + + Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); + } +} + +//================================================================================================ + + + +//================================================================================================ + +void bssn_class::RestrictProlong(int lev, int YN, bool BB) +{ + double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); + // we assume for fine + // SynchList_cor 1 ----------- + // + // StateList 0 ----------- + // + // OldStateList old ----------- + // for coarse + // StateList 1 ----------- + // + // OldStateList 0 ----------- + // + // SynchList_cor old ----------- + if (lev > 0) + { + MyList *Pp, *Ppc; + if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level + { + if (myrank == 0) + cout << "/=: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; + Pp = GH->PatL[lev - 1]; + while (Pp) + { + if (BB) + Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor, + SynchList_pre, 0); // use SynchList_pre as temporal storage space + else + Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, + SynchList_pre, 0); // use SynchList_pre as temporal storage space + Pp = Pp->next; + } + +#if (RPB == 0) + Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]); +#elif (RPB == 1) + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); +#endif + #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1) Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #endif - -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); -#endif -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); -#endif - } - else // no time refinement levels and for all same time levels - { - if (myrank == 0) - cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; -#if (RPB == 0) - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); -#endif - + +#if (RPB == 0) +#if (MIXOUTB == 0) + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); +#endif +#elif (RPB == 1) + // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); + Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); +#endif + } + else // no time refinement levels and for all same time levels + { + if (myrank == 0) + cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; +#if (RPB == 0) + Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]); +#elif (RPB == 1) + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); +#endif + #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1) Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); #endif - -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); -#endif -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); -#endif - } - - Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); - } -} - -//================================================================================================ - - - -//================================================================================================ - + +#if (RPB == 0) +#if (MIXOUTB == 0) + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); +#endif +#elif (RPB == 1) + // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); + Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); +#endif + } + + Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); + } +} + +//================================================================================================ + + + +//================================================================================================ + void bssn_class::ProlongRestrict(int lev, int YN, bool BB) -{ - if (lev > 0) - { - MyList *Pp, *Ppc; - if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level - { - Pp = GH->PatL[lev - 1]; - while (Pp) - { - if (BB) - Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - else - Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - Pp = Pp->next; - } - -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); -#endif -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); -#endif - } - else // no time refinement levels and for all same time levels - { -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); -#endif -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); -#endif - -#if 0 -#if (RPB == 0) - Parallel::Restrict(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); -#elif (RPB == 1) -// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,GH->rsul[lev],Symmetry); -#endif -#else - Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); -#endif +{ + if (lev > 0) + { + MyList *Pp, *Ppc; + if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level + { + Pp = GH->PatL[lev - 1]; + while (Pp) + { + if (BB) + Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor, + SynchList_pre, 0); // use SynchList_pre as temporal storage space + else + Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, + SynchList_pre, 0); // use SynchList_pre as temporal storage space + Pp = Pp->next; + } + +#if (RPB == 0) +#if (MIXOUTB == 0) + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); +#endif +#elif (RPB == 1) + // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); + Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); +#endif + } + else // no time refinement levels and for all same time levels + { +#if (RPB == 0) +#if (MIXOUTB == 0) + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); +#endif +#elif (RPB == 1) + // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); + Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); +#endif + +#if 0 +#if (RPB == 0) + Parallel::Restrict(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); +#elif (RPB == 1) +// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,GH->rsul[lev],Symmetry); +#endif +#else + Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); +#endif #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1) Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); #endif - } - - Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); - } + } + + Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); + } } #undef MIXOUTB #undef RP_SYNC_COARSE_AFTER_RESTRICT //================================================================================================ - - - -//================================================================================================ - -// This member function computes the gravitational-wave quantity Psi4 - -//================================================================================================ - -void bssn_class::Compute_Psi4(int lev) -{ - MyList *DG_List = new MyList(Rpsi4); - DG_List->insert(Ipsi4); - -#if 0 // test showes this operation does not help -for(int ilev = GH->levels-1;ilev>=lev;ilev--) -{ - MyList *Pp=GH->PatL[ilev]; -#else - MyList *Pp = GH->PatL[lev]; -#endif - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (Psi4type == 0) - if (0) // if Gamma^i_jk and R_ij can be reused from the rhs calculation - f_ricci_gamma(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - Symmetry); - // the input arguments Gamma^i_jk and R_ij do not need synch, because we do not need to derivate them - f_getnp4(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], - Symmetry); -#elif (Psi4type == 1) - f_getnp4old(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], - Symmetry); -#else -#error "not recognized Psi4type" -#endif - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - -#if 0 - Parallel::Sync(GH->PatL[ilev],DG_List,Symmetry); -} -// because of double level data change, you can not do this in above loop -// prolong restrict Psi4 -for(int ilev=GH->levels-1;ilev>lev;ilev--) - RestrictProlong(ilev,1,false,DG_List,DG_List,DG_List); -#else - Parallel::Sync(GH->PatL[lev], DG_List, Symmetry); -#endif - -#ifdef WithShell - // ShellPatch part - if (lev == 0) - { - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - int fngfs = Pp->data->fngfs; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { -#if (Psi4type == 0) - if (0) // if Gamma^i_jk and R_ij can be reused from the rhs calculation - f_ricci_gamma_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + - ShellPatch::gx], cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - Symmetry, lev, Pp->data->sst); - - f_getnp4_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], - Symmetry, Pp->data->sst); -#elif (Psi4type == 1) - f_getnp4old_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], - Symmetry, Pp->data->sst); -#else -#error "not recognized Psi4type" -#endif - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - - SH->Synch(DG_List, Symmetry); -#if 0 -// interpolate Psi4 - SH->CS_Inter(DG_List,Symmetry); -#endif - } -#endif - - DG_List->clearList(); - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end of Compute_Psi4"); -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function sets the black holes' initial puncture positions - -//================================================================================================ - -void bssn_class::Setup_Black_Hole_position() -{ - char filename[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(filename, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - { - BH_num_input = BH_num = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - // set up the data for black holes - // these arrays will be deleted when bssn_class is deleted - Pmom = new double[3 * BH_num]; - Spin = new double[3 * BH_num]; - Mass = new double[BH_num]; - Porg0 = new double *[BH_num]; - Porgbr = new double *[BH_num]; - Porg = new double *[BH_num]; - Porg1 = new double *[BH_num]; - Porg_rhs = new double *[BH_num]; - for (int i = 0; i < BH_num; i++) - { - Porg0[i] = new double[3]; - Porgbr[i] = new double[3]; - Porg[i] = new double[3]; - Porg1[i] = new double[3]; - Porg_rhs[i] = new double[3]; - } - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_num) - { - if (skey == "Mass") - Mass[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg0[sind][0] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg0[sind][1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg0[sind][2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); - } - // echo information of Black holes - if (myrank == 0) - { - cout << endl; - cout << " initial information of " << BH_num << " Black Hole(s) " << endl; - cout << setw(12) << "Mass" - << setw(12) << "x" - << setw(12) << "y" - << setw(12) << "z" - << setw(16) << "Px" - << setw(16) << "Py" - << setw(12) << "Pz" - << setw(12) << "Sx" - << setw(12) << "Sy" - << setw(12) << "Sz" << endl; - for (int i = 0; i < BH_num; i++) - { - cout << setw(12) << Mass[i] - << setw(12) << Porg0[i][0] - << setw(12) << Porg0[i][1] - << setw(12) << Porg0[i][2] - << setw(16) << Pmom[i * 3] - << setw(16) << Pmom[i * 3 + 1] - << setw(12) << Pmom[i * 3 + 2] - << setw(12) << Spin[i * 3] - << setw(12) << Spin[i * 3 + 1] - << setw(12) << Spin[i * 3 + 2] << endl; - } - } - - int maxl = 1; - int levels; - int *grids; - double bbox[6]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind1, sind2, sind3; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "bssn_class::Setup_Black_Hole_position: Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind1); - if (status == -1) - { - cout << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "cgh" && skey == "levels") - { - levels = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - grids = new int[levels]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind1, sind2, sind3; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "bssn_class::Setup_Black_Hole_position: Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind1, sind2, sind3); - if (status == -1) - { - cout << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "cgh" && skey == "grids" && sind1 < levels) - grids[sind1] = atoi(sval.c_str()); - if (sgrp == "cgh" && skey == "bbox" && sind1 == 0 && sind2 == 0) - bbox[sind3] = atof(sval.c_str()); - } - inf.close(); - } - for (int i = 0; i < levels; i++) - if (maxl < grids[i]) - maxl = grids[i]; - - delete[] grids; - - if (BH_num > maxl) - { - int BH_numc = BH_num; - for (int i = 0; i < BH_num; i++) - if (Porg0[i][0] < bbox[0] || Porg0[i][0] > bbox[3] || - Porg0[i][1] < bbox[1] || Porg0[i][1] > bbox[4] || - Porg0[i][2] < bbox[2] || Porg0[i][2] > bbox[5]) - { - delete[] Porg0[i]; - Porg0[i] = 0; - BH_numc--; - } - - if (BH_num > BH_numc) - { - maxl = BH_numc; - int bhi; - double *tmp; - - tmp = Pmom; - Pmom = new double[3 * maxl]; - bhi = 0; - for (int i = 0; i < BH_num; i++) - if (Porg0[i]) - { - for (int j = 0; j < 3; j++) - Pmom[3 * bhi + j] = tmp[3 * i + j]; - bhi++; - } - delete[] tmp; - - tmp = Spin; - Spin = new double[3 * maxl]; - bhi = 0; - for (int i = 0; i < BH_num; i++) - if (Porg0[i]) - { - for (int j = 0; j < 3; j++) - Spin[3 * bhi + j] = tmp[3 * i + j]; - bhi++; - } - delete[] tmp; - - tmp = Mass; - Mass = new double[3 * maxl]; - bhi = 0; - for (int i = 0; i < BH_num; i++) - if (Porg0[i]) - { - Mass[bhi] = tmp[i]; - bhi++; - } - delete[] tmp; - - double **ttmp; - ttmp = Porg0; - Porg0 = new double *[maxl]; - bhi = 0; - for (int i = 0; i < BH_num; i++) - if (ttmp[i]) - { - Porg0[bhi] = ttmp[i]; - bhi++; - } - delete[] ttmp; - - for (int i = 0; i < BH_num; i++) - { - delete[] Porgbr[i]; - delete[] Porg[i]; - delete[] Porg1[i]; - delete[] Porg_rhs[i]; - } - delete[] Porgbr; - delete[] Porg; - delete[] Porg1; - delete[] Porg_rhs; - - BH_num = maxl; - - Porgbr = new double *[BH_num]; - Porg = new double *[BH_num]; - Porg1 = new double *[BH_num]; - Porg_rhs = new double *[BH_num]; - - for (int i = 0; i < BH_num; i++) - { - Porgbr[i] = new double[3]; - Porg[i] = new double[3]; - Porg1[i] = new double[3]; - Porg_rhs[i] = new double[3]; - } - } - } - - for (int i = 0; i < BH_num; i++) - { - for (int j = 0; j < dim; j++) - Porgbr[i][j] = Porg0[i][j]; - } - - setpbh(BH_num, Porg0, Mass, BH_num_input); -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes black hole positions - -//================================================================================================ - -#if 0 -// old code - -void bssn_class::compute_Porg_rhs(double **BH_PS,double **BH_RHS,var *forx,var *fory,var *forz,int lev) -{ - const int InList = 3; - - MyList * DG_List=new MyList(forx); - DG_List->insert(fory); DG_List->insert(forz); - - int n; - double *x1,*y1,*z1; - double *shellf; - shellf=new double[3*BH_num]; - double *pox[3]; - for(int i=0;i<3;i++) pox[i] = new double[BH_num]; - for( n = 0; n < BH_num; n++) - { - pox[0][n] = BH_PS[n][0]; - pox[1][n] = BH_PS[n][1]; - pox[2][n] = BH_PS[n][2]; - } - - if(!Parallel::PatList_Interp_Points(GH->PatL[lev],DG_List,BH_num,pox,shellf,Symmetry)) - { - ErrorMonitor->outfile<<"fail to find black holes at t = "<outfile<<"(x,y,z) = ("<clearList(); - delete[] shellf; - for(int i=0;i<3;i++) delete[] pox[i]; -} - -#else - -// new code considering diferent levels for different black hole - -void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev) -{ - const int InList = 3; - - MyList *DG_List = new MyList(forx); - DG_List->insert(fory); - DG_List->insert(forz); - - double *x1, *y1, *z1; - double *shellf; - shellf = new double[3]; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[1]; - - for (int n = 0; n < BH_num; n++) - { - pox[0][0] = BH_PS[n][0]; - pox[1][0] = BH_PS[n][1]; - pox[2][0] = BH_PS[n][2]; - - int lev = ilev; - -#if (PSTR == 0) - while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry)) -#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) - while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry, GH->Commlev[lev])) -#endif - { - lev--; - if (lev < 0) - { - ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl; - for (n = 0; n < BH_num; n++) - ErrorMonitor->outfile << "(x,y,z) = (" - << pox[0][n] << "," << pox[1][n] << "," << pox[2][n] - << ")" << endl; - break; - } - } - - if (lev >= 0) - { - BH_RHS[n][0] = -shellf[0]; - BH_RHS[n][1] = -shellf[1]; - BH_RHS[n][2] = -shellf[2]; - } - } - - DG_List->clearList(); - delete[] shellf; - for (int i = 0; i < 3; i++) - delete[] pox[i]; -} -#endif - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes gravitational-wave related quantities and performs analysis - -//================================================================================================ - -void bssn_class::AnalysisStuff(int lev, double dT_lev) -{ - LastAnas += dT_lev; - - if (LastAnas >= AnasTime) - { -#ifdef Point_Psi4 -#error "not support parallel levels yet" - // Gam_ijk and R_ij have been calculated in Interp_Constraint() - double SYM = 1, ANT = -1; - for (int levh = lev; levh < GH->levels; levh++) - { - MyList *Pp = GH->PatL[levh]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_fderivs(cg->shape, cg->fgfs[phi0->sgfn], - cg->fgfs[phix->sgfn], cg->fgfs[phiy->sgfn], cg->fgfs[phiz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[trK0->sgfn], - cg->fgfs[trKx->sgfn], cg->fgfs[trKy->sgfn], cg->fgfs[trKz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Axx0->sgfn], - cg->fgfs[Axxx->sgfn], cg->fgfs[Axxy->sgfn], cg->fgfs[Axxz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Axy0->sgfn], - cg->fgfs[Axyx->sgfn], cg->fgfs[Axyy->sgfn], cg->fgfs[Axyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - ANT, ANT, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Axz0->sgfn], - cg->fgfs[Axzx->sgfn], cg->fgfs[Axzy->sgfn], cg->fgfs[Axzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - ANT, SYM, ANT, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Ayy0->sgfn], - cg->fgfs[Ayyx->sgfn], cg->fgfs[Ayyy->sgfn], cg->fgfs[Ayyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Ayz0->sgfn], - cg->fgfs[Ayzx->sgfn], cg->fgfs[Ayzy->sgfn], cg->fgfs[Ayzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, ANT, ANT, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Azz0->sgfn], - cg->fgfs[Azzx->sgfn], cg->fgfs[Azzy->sgfn], cg->fgfs[Azzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - -#ifdef WithShell - // ShellPatch part - if (lev == 0) - { - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - int fngfs = Pp->data->fngfs; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_fderivs_shc(cg->shape, cg->fgfs[phi0->sgfn], - cg->fgfs[phix->sgfn], cg->fgfs[phiy->sgfn], cg->fgfs[phiz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - phi0->SoA[0], phi0->SoA[1], phi0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[trK0->sgfn], - cg->fgfs[trKx->sgfn], cg->fgfs[trKy->sgfn], cg->fgfs[trKz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - trK0->SoA[0], trK0->SoA[1], trK0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Axx0->sgfn], - cg->fgfs[Axxx->sgfn], cg->fgfs[Axxy->sgfn], cg->fgfs[Axxz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Axx0->SoA[0], Axx0->SoA[1], Axx0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Axy0->sgfn], - cg->fgfs[Axyx->sgfn], cg->fgfs[Axyy->sgfn], cg->fgfs[Axyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Axy0->SoA[0], Axy0->SoA[1], Axy0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Axz0->sgfn], - cg->fgfs[Axzx->sgfn], cg->fgfs[Axzy->sgfn], cg->fgfs[Axzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Axz0->SoA[0], Axz0->SoA[1], Axz0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Ayy0->sgfn], - cg->fgfs[Ayyx->sgfn], cg->fgfs[Ayyy->sgfn], cg->fgfs[Ayyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Ayy0->SoA[0], Ayy0->SoA[1], Ayy0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Ayz0->sgfn], - cg->fgfs[Ayzx->sgfn], cg->fgfs[Ayzy->sgfn], cg->fgfs[Ayzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Ayz0->SoA[0], Ayz0->SoA[1], Ayz0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Azz0->sgfn], - cg->fgfs[Azzx->sgfn], cg->fgfs[Azzy->sgfn], cg->fgfs[Azzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Azz0->SoA[0], Azz0->SoA[1], Azz0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } -#endif - } -#else - Compute_Psi4(lev); -#endif - double *RP, *IP, *RoutMAP; - int NN = 0; - for (int pl = 2; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - NN++; - RP = new double[NN]; - IP = new double[NN]; - RoutMAP = new double[7]; - double Rex = maxrex; - for (int i = 0; i < decn; i++) - { -#ifdef Point_Psi4 - Waveshell->surf_Wave(Rex, GH, SH, - phi, trK, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - phix, phiy, phiz, - trKx, trKy, trKz, - Axxx, Axxy, Axxz, - Axyx, Axyy, Axyz, - Axzx, Axzy, Axzz, - Ayyx, Ayyy, Ayyz, - Ayzx, Ayzy, Ayzz, - Azzx, Azzy, Azzz, - Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz, - Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz, - Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz, - Rxx, Rxy, Rxz, Ryy, Ryz, Rzz, - 2, maxl, NN, RP, IP, ErrorMonitor); -#ifdef WithShell - if (lev > 0 || Rex < GH->bbox[0][0][3]) - { - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); - } - else - { - Waveshell->surf_MassPAng(Rex, lev, SH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); - } -#else - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); -#endif -#else -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before surface integral"); -#ifdef WithShell - if (lev > 0 || Rex < GH->bbox[0][0][3]) - { - Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); - } - else - { - Waveshell->surf_Wave(Rex, lev, SH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); - Waveshell->surf_MassPAng(Rex, lev, SH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); - } -#else -#if (PSTR == 0) - Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); -#elif (PSTR == 1 || PSTR == 2) - Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor, GH->Commlev[lev]); - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after surf_Wave"); - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor, GH->Commlev[lev]); -#endif -#endif -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end surface integral"); -#endif - if (i == 0) - { - ADMMass = RoutMAP[0]; - } -#if (PSTR == 1 || PSTR == 2) - if (GH->start_rank[a_lev] > 0) - { - MPI_Status status; - // receive - if (myrank == 0) - { - MPI_Recv(RP, NN, MPI_DOUBLE, GH->start_rank[a_lev], 1, MPI_COMM_WORLD, &status); - MPI_Recv(IP, NN, MPI_DOUBLE, GH->start_rank[a_lev], 2, MPI_COMM_WORLD, &status); - MPI_Recv(RoutMAP, 7, MPI_DOUBLE, GH->start_rank[a_lev], 3, MPI_COMM_WORLD, &status); - } - // send - if (myrank == GH->start_rank[a_lev]) - { - MPI_Send(RP, NN, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); - MPI_Send(IP, NN, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD); - MPI_Send(RoutMAP, 7, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD); - } - } -#endif - Psi4Monitor->writefile(PhysTime, NN, RP, IP); - MAPMonitor->writefile(PhysTime, 7, RoutMAP); - Rex = Rex - drex; - } - delete[] RP; - delete[] IP; - delete[] RoutMAP; - - // black hole's position - { - double *pox; - pox = new double[dim * BH_num]; - for (int bhi = 0; bhi < BH_num; bhi++) - for (int i = 0; i < dim; i++) - pox[dim * bhi + i] = Porg0[bhi][i]; - BHMonitor->writefile(PhysTime, dim * BH_num, pox); - delete[] pox; - } - - LastAnas = 0; - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes and outputs constraint violations - -//================================================================================================ - -void bssn_class::Constraint_Out() -{ - LastConsOut += dT * pow(0.5, Mymax(0, trfls)); - - if (LastConsOut >= AnasTime) - // Constraint violation - { - // recompute least the constraint data lost for moved new grid - for (int lev = 0; lev < GH->levels; lev++) - { - // make sure the data consistent for higher levels - if (lev > 0) // if the constrait quantities can be reused from the step rhs calculation - { - double TRK4 = PhysTime; - double ndeps = numepsb; - int pre = 0; - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - } - Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); - } -#ifdef WithShell - if (0) // if the constrait quantities can be reused from the step rhs calculation - { - MyList *sPp; - sPp = SH->PatL; - while (sPp) - { - double TRK4 = PhysTime; - int pre = 0; - int lev = 0; - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - SH->Synch(ConstraintList, Symmetry); -#endif - - double ConV[7]; -#if (PSTR == 1 || PSTR == 2) - double ConV_h[7]; -#endif - -#ifdef WithShell - ConV[0] = SH->L2Norm(Cons_Ham); - ConV[1] = SH->L2Norm(Cons_Px); - ConV[2] = SH->L2Norm(Cons_Py); - ConV[3] = SH->L2Norm(Cons_Pz); - ConV[4] = SH->L2Norm(Cons_Gx); - ConV[5] = SH->L2Norm(Cons_Gy); - ConV[6] = SH->L2Norm(Cons_Gz); - ConVMonitor->writefile(PhysTime, 7, ConV); -#endif - for (int levi = 0; levi < GH->levels; levi++) - { -#if (PSTR == 0) - ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham); - ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px); - ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py); - ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz); - ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx); - ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy); - ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz); -#elif (PSTR == 1 || PSTR == 2) - ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham, GH->Commlev[levi]); - ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px, GH->Commlev[levi]); - ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py, GH->Commlev[levi]); - ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz, GH->Commlev[levi]); - ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx, GH->Commlev[levi]); - ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy, GH->Commlev[levi]); - ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz, GH->Commlev[levi]); - // misc::tillherecheck("before collect data to cpu0"); - // MPI_ALLREDUCE( sendbuf, recvbuf, count, datatype, op, comm), sendbu and recvbuf must be different - if (levi > 0) - { - if (GH->mylev == levi && myrank == GH->start_rank[levi]) - for (int i = 0; i < 7; i++) - ConV_h[i] = ConV[i]; - else - for (int i = 0; i < 7; i++) - ConV_h[i] = 0; - MPI_Allreduce(ConV_h, ConV, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - } -#endif - ConVMonitor->writefile(PhysTime, 7, ConV); - /* - if(fabs(ConV[0])<0.00001) - { - MyList * DG_List=new MyList(Cons_Ham); - DG_List->insert(Cons_Px); DG_List->insert(Cons_Py); DG_List->insert(Cons_Px); - DG_List->insert(Cons_Gx); DG_List->insert(Cons_Gy); DG_List->insert(Cons_Gx); - Parallel::Dump_Data(GH->PatL[levi],DG_List,"jiu",0,1); - DG_List->clearList(); - if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); - } - */ - } - - Interp_Constraint(false); - - LastConsOut = 0; - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes derivatives required for apparent-horizon calculations - -//================================================================================================ - -#ifdef With_AHF -void bssn_class::AH_Prepare_derivatives() -{ - double SYM = 1.0, ANT = -1.0; - int ZEO = 0; - - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_fderivs(cg->shape, cg->fgfs[phi0->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gxx0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamzxx->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gxy0->sgfn], - cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamzxy->sgfn], - cg->X[0], cg->X[1], cg->X[2], - ANT, ANT, SYM, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gxz0->sgfn], - cg->fgfs[Gamxxz->sgfn], cg->fgfs[Gamyxz->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - ANT, SYM, ANT, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gyy0->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamzyy->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gyz0->sgfn], - cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamzyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, ANT, ANT, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gzz0->sgfn], - cg->fgfs[Gamxzz->sgfn], cg->fgfs[Gamyzz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, ZEO); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - Parallel::Sync(GH->PatL[lev], AHDList, Symmetry); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function interpolates apparent-horizon data - -//================================================================================================ - -bool bssn_class::AH_Interp_Points(MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetryi) -{ - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double pox[3]; - for (int i = 0; i < NN; i++) - { - for (int j = 0; j < 3; j++) - pox[j] = XX[j][i]; - int lev = GH->levels - 1; - bool notfound = true; - - while (notfound) - { - if (lev < 0) - { -#ifdef WithShell - if (SH->Interp_One_Point(VarList, pox, Shellf + i * num_var, Symmetryi)) - { - return true; - } - if (myrank == 0) - { - cout << " bssn_class::AH_Interp_Points: point (" - << pox[0] << "," << pox[1] << "," << pox[2] - << ") is out of cgh and shell domain!" << endl; - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << " bssn_class::AH_Interp_Points: point (" - << pox[0] << "," << pox[1] << "," << pox[2] - << ") is out of cgh and shell domain!" << endl; - } - MPI_Abort(MPI_COMM_WORLD, 1); -#else - if (myrank == 0) - { - cout << " bssn_class::AH_Interp_Points: point (" - << pox[0] << "," << pox[1] << "," << pox[2] - << ") is out of cgh domain!" << endl; - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << " bssn_class::AH_Interp_Points: point (" - << pox[0] << "," << pox[1] << "," << pox[2] - << ") is out of cgh domain!" << endl; - } - MPI_Abort(MPI_COMM_WORLD, 1); -#endif - return false; - } - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - if (Pp->data->Interp_ONE_Point(VarList, pox, Shellf + i * num_var, Symmetryi)) - { - notfound = false; - break; - } - Pp = Pp->next; - } - lev--; - } - } - return true; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes apparent horizons - -//================================================================================================ - -void bssn_class::AH_Step_Find(int lev, double dT_lev) -{ - if ((lev == GH->levels - 1)) - { - int ncount = int(PhysTime / dT_lev); - bool tf = false; - for (int ihn = 0; ihn < HN_num; ihn++) - { - if (ncount % findeveryl[ihn] == 0) - { - tf = true; - break; - } - } - if (tf) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - prev_clock = clock(); - const int cdumpid = int(PhysTime / AHdumptime) + 1; - for (int ihn = 0; ihn < HN_num; ihn++) - dumpid[ihn] = cdumpid; - - double gam; - for (int ihn = 0; ihn < BH_num; ihn++) - { - xc[ihn] = Porg0[ihn][0]; - yc[ihn] = Porg0[ihn][1]; - zc[ihn] = Porg0[ihn][2]; - gam = fabs(Pmom[ihn * 3]) / (Mass[ihn]); - gam = sqrt(1 - gam * gam); - xr[ihn] = Mass[ihn] * gam; - gam = fabs(Pmom[ihn * 3 + 1]) / (Mass[ihn]); - gam = sqrt(1 - gam * gam); - yr[ihn] = Mass[ihn] * gam; - gam = fabs(Pmom[ihn * 3 + 2]) / (Mass[ihn]); - gam = sqrt(1 - gam * gam); - zr[ihn] = Mass[ihn] * gam; - dTT[ihn] = -1; - - if (ncount % findeveryl[ihn] == 0) - { - trigger[ihn] = true; - dTT[ihn] = findeveryl[ihn] * dT_lev; - } - else - trigger[ihn] = false; - if (trigger[ihn] && (dumpid[ihn] > lastahdumpid[ihn])) - lastahdumpid[ihn] = dumpid[ihn]; - else - dumpid[ihn] = 0; - } - int ihn = BH_num; - for (int ia = 0; ia < BH_num; ia++) - for (int ib = ia + 1; ib < BH_num; ib++) - { - xc[ihn] = (Porg0[ia][0] + Porg0[ib][0]) / 2; - yc[ihn] = (Porg0[ia][1] + Porg0[ib][1]) / 2; - zc[ihn] = (Porg0[ia][2] + Porg0[ib][2]) / 2; - - xr[ihn] = yr[ihn] = zr[ihn] = Mass[ia] + Mass[ib]; - - dTT[ihn] = -1; - - if (fabs(Porg0[ia][0] - Porg0[ib][0]) < 2 * xr[ihn] && - fabs(Porg0[ia][1] - Porg0[ib][1]) < 2 * xr[ihn] && - fabs(Porg0[ia][2] - Porg0[ib][2]) < 2 * xr[ihn] && - (ncount % findeveryl[ihn] == 0)) - { - trigger[ihn] = true; - dTT[ihn] = findeveryl[ihn] * dT_lev; - } - else - trigger[ihn] = false; - - if (trigger[ihn] && (dumpid[ihn] > lastahdumpid[ihn])) - lastahdumpid[ihn] = dumpid[ihn]; - else - dumpid[ihn] = 0; - - ihn++; - } -#if (ABEtype == 1) - if (PhysTime > 10) - { - ihn--; - trigger[ihn] = true; - xr[ihn] = yr[ihn] = zr[ihn] = 50; - // if(myrank==0) for(ihn=0;ihn 0) - return; - - // recompute least the constraint data lost for moved new grid - for (int lev = 0; lev < GH->levels; lev++) - { - // make sure the data consistent for higher levels - if (lev > 0) // if the constrait quantities can be reused from the step rhs calculation - { - double TRK4 = PhysTime; - double ndeps = numepsb; - int pre = 0; - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - } - Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); - } -#ifdef WithShell - if (0) // if the constrait quantities can be reused from the step rhs calculation - { - MyList *sPp; - sPp = SH->PatL; - while (sPp) - { - double TRK4 = PhysTime; - int pre = 0; - int lev = 0; - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - SH->Synch(ConstraintList, Symmetry); -#endif - } - // interpolate - double *x1, *y1, *z1; - const int n = 1000; - double lmax, lmin, dd; - lmin = 0; -#ifdef WithShell - lmax = SH->Rrange[1]; -#else - lmax = GH->bbox[0][0][4]; -#endif -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - dd = (lmax - lmin) / (n - 1); -#else -#ifdef Cell - dd = (lmax - lmin) / n; -#else -#error Not define Vertex nor Cell -#endif -#endif - x1 = new double[n]; - y1 = new double[n]; - z1 = new double[n]; - for (int i = 0; i < n; i++) - { - x1[i] = 0; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - y1[i] = lmin + i * dd; -#else -#ifdef Cell - y1[i] = lmin + (i + 0.5) * dd; -#else -#error Not define Vertex nor Cell -#endif -#endif - z1[i] = 0; - } - - int InList = 0; - - MyList *varl = ConstraintList; - while (varl) - { - InList++; - varl = varl->next; - } - double *shellf; - shellf = new double[n * InList]; - for (int i = 0; i < n; i++) - { - double XX[3]; - XX[0] = x1[i]; - XX[1] = y1[i]; - XX[2] = z1[i]; - bool fg = GH->Interp_One_Point(ConstraintList, XX, shellf + i * InList, Symmetry); -#ifdef WithShell - if (!fg) - fg = SH->Interp_One_Point(ConstraintList, XX, shellf + i * InList, Symmetry); -#endif - if (!fg && myrank == 0) - { - cout << "bssn_class::Interp_Constraint meets wrong" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - if (myrank == 0) - { - ofstream outfile; - char filename[50]; - sprintf(filename, "%s/interp_constraint_%05d.dat", ErrorMonitor->out_dir.c_str(), int(PhysTime / dT + 0.5)); - // 0.5 for round off - - outfile.open(filename); - outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl; - for (int i = 0; i < n; i++) - { - outfile << setw(10) << setprecision(10) << y1[i]; - for (int j = 0; j < InList; j++) - outfile << " " << setw(16) << setprecision(15) << shellf[InList * i + j]; - outfile << endl; - } - outfile.close(); - } - - delete[] shellf; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes constraint violations - -//================================================================================================ - -void bssn_class::Compute_Constraint() -{ - double TRK4 = PhysTime; - double ndeps = numepsb; - int pre = 0; - int lev; - - for (lev = 0; lev < GH->levels; lev++) - { - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - } - Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); - } - // prolong restrict constraint quantities - for (lev = GH->levels - 1; lev > 0; lev--) - RestrictProlong(lev, 1, false, ConstraintList, ConstraintList, ConstraintList); - -#ifdef WithShell - lev = 0; - { - MyList *sPp; - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - SH->Synch(ConstraintList, Symmetry); - // interpolate constraint quantities - SH->CS_Inter(ConstraintList, Symmetry); -#endif -} - -//================================================================================================ - - - -//================================================================================================ - -void bssn_class::testRestrict() -{ - MyList *DG_List = new MyList(phi0); - int lev = 0; - double ZEO = 0, ONE = 1; - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ZEO); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - lev = 1; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ONE); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], DG_List, DG_List, Symmetry); - Parallel::Sync(GH->PatL[lev - 1], DG_List, Symmetry); - - Parallel::Dump_Data(GH->PatL[lev - 1], DG_List, 0, PhysTime, dT); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT); - - DG_List->clearList(); - exit(0); -} - -//================================================================================================ - - - -//================================================================================================ - -void bssn_class::testOutBd() -{ - MyList *DG_List = new MyList(phi0); - int lev = 1; - double ZEO = 0, ONE = 1; - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ZEO); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - lev = 0; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ONE); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - lev = 1; - MyList *Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, DG_List, DG_List, Symmetry); - Pp = Pp->next; - } - Ppc = Ppc->next; - } - - Parallel::Sync(GH->PatL[lev], DG_List, Symmetry); - - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT); - Parallel::Dump_Data(GH->PatL[lev - 1], DG_List, 0, PhysTime, dT); - - DG_List->clearList(); - exit(0); -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function enforces/checks the traceless condition - -//================================================================================================ - -void bssn_class::Enforce_algcon(int lev, int fg) -{ - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - if (fg == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); - else - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - -#ifdef WithShell - if (lev == 0) - { - MyList *sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - if (fg == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); - else - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#endif -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function monitors stdin for an 'abort' input - -//================================================================================================ - -bool bssn_class::check_Stdin_Abort() -{ - - fd_set readfds; - - struct timeval timeout; - - FD_ZERO(&readfds); - FD_SET(STDIN_FILENO, &readfds); - - // Set timeout to 0 — perform a non-blocking check - timeout.tv_sec = 0; - timeout.tv_usec = 0; - - int activity = select(STDIN_FILENO + 1, &readfds, nullptr, nullptr, &timeout); - - if (activity > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { - string input_abort; - if (cin >> input_abort) { - if (input_abort == "stop") { - return true; - } - } - } - - return false; -} - -//================================================================================================ - + + + +//================================================================================================ + +// This member function computes the gravitational-wave quantity Psi4 + +//================================================================================================ + +void bssn_class::Compute_Psi4(int lev) +{ + MyList *DG_List = new MyList(Rpsi4); + DG_List->insert(Ipsi4); + +#if 0 // test showes this operation does not help +for(int ilev = GH->levels-1;ilev>=lev;ilev--) +{ + MyList *Pp=GH->PatL[ilev]; +#else + MyList *Pp = GH->PatL[lev]; +#endif + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { +#if (Psi4type == 0) + if (0) // if Gamma^i_jk and R_ij can be reused from the rhs calculation + f_ricci_gamma(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + Symmetry); + // the input arguments Gamma^i_jk and R_ij do not need synch, because we do not need to derivate them + f_getnp4(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], + Symmetry); +#elif (Psi4type == 1) + f_getnp4old(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], + Symmetry); +#else +#error "not recognized Psi4type" +#endif + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + +#if 0 + Parallel::Sync(GH->PatL[ilev],DG_List,Symmetry); +} +// because of double level data change, you can not do this in above loop +// prolong restrict Psi4 +for(int ilev=GH->levels-1;ilev>lev;ilev--) + RestrictProlong(ilev,1,false,DG_List,DG_List,DG_List); +#else + Parallel::Sync(GH->PatL[lev], DG_List, Symmetry); +#endif + +#ifdef WithShell + // ShellPatch part + if (lev == 0) + { + MyList *Pp = SH->PatL; + while (Pp) + { + MyList *BL = Pp->data->blb; + int fngfs = Pp->data->fngfs; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { +#if (Psi4type == 0) + if (0) // if Gamma^i_jk and R_ij can be reused from the rhs calculation + f_ricci_gamma_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + + ShellPatch::gx], cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + Symmetry, lev, Pp->data->sst); + + f_getnp4_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], + Symmetry, Pp->data->sst); +#elif (Psi4type == 1) + f_getnp4old_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], + Symmetry, Pp->data->sst); +#else +#error "not recognized Psi4type" +#endif + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + + SH->Synch(DG_List, Symmetry); +#if 0 +// interpolate Psi4 + SH->CS_Inter(DG_List,Symmetry); +#endif + } +#endif + + DG_List->clearList(); + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end of Compute_Psi4"); +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function sets the black holes' initial puncture positions + +//================================================================================================ + +void bssn_class::Setup_Black_Hole_position() +{ + char filename[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(filename, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && skey == "BH_num") + { + BH_num_input = BH_num = atoi(sval.c_str()); + break; + } + } + inf.close(); + } + // set up the data for black holes + // these arrays will be deleted when bssn_class is deleted + Pmom = new double[3 * BH_num]; + Spin = new double[3 * BH_num]; + Mass = new double[BH_num]; + Porg0 = new double *[BH_num]; + Porgbr = new double *[BH_num]; + Porg = new double *[BH_num]; + Porg1 = new double *[BH_num]; + Porg_rhs = new double *[BH_num]; + for (int i = 0; i < BH_num; i++) + { + Porg0[i] = new double[3]; + Porgbr[i] = new double[3]; + Porg[i] = new double[3]; + Porg1[i] = new double[3]; + Porg_rhs[i] = new double[3]; + } + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && sind < BH_num) + { + if (skey == "Mass") + Mass[sind] = atof(sval.c_str()); + else if (skey == "Porgx") + Porg0[sind][0] = atof(sval.c_str()); + else if (skey == "Porgy") + Porg0[sind][1] = atof(sval.c_str()); + else if (skey == "Porgz") + Porg0[sind][2] = atof(sval.c_str()); + else if (skey == "Spinx") + Spin[sind * 3] = atof(sval.c_str()); + else if (skey == "Spiny") + Spin[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Spinz") + Spin[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Pmomx") + Pmom[sind * 3] = atof(sval.c_str()); + else if (skey == "Pmomy") + Pmom[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Pmomz") + Pmom[sind * 3 + 2] = atof(sval.c_str()); + } + } + inf.close(); + } + // echo information of Black holes + if (myrank == 0) + { + cout << endl; + cout << " initial information of " << BH_num << " Black Hole(s) " << endl; + cout << setw(12) << "Mass" + << setw(12) << "x" + << setw(12) << "y" + << setw(12) << "z" + << setw(16) << "Px" + << setw(16) << "Py" + << setw(12) << "Pz" + << setw(12) << "Sx" + << setw(12) << "Sy" + << setw(12) << "Sz" << endl; + for (int i = 0; i < BH_num; i++) + { + cout << setw(12) << Mass[i] + << setw(12) << Porg0[i][0] + << setw(12) << Porg0[i][1] + << setw(12) << Porg0[i][2] + << setw(16) << Pmom[i * 3] + << setw(16) << Pmom[i * 3 + 1] + << setw(12) << Pmom[i * 3 + 2] + << setw(12) << Spin[i * 3] + << setw(12) << Spin[i * 3 + 1] + << setw(12) << Spin[i * 3 + 2] << endl; + } + } + + int maxl = 1; + int levels; + int *grids; + double bbox[6]; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind1, sind2, sind3; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "bssn_class::Setup_Black_Hole_position: Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind1); + if (status == -1) + { + cout << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "cgh" && skey == "levels") + { + levels = atoi(sval.c_str()); + break; + } + } + inf.close(); + } + grids = new int[levels]; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind1, sind2, sind3; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "bssn_class::Setup_Black_Hole_position: Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind1, sind2, sind3); + if (status == -1) + { + cout << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "cgh" && skey == "grids" && sind1 < levels) + grids[sind1] = atoi(sval.c_str()); + if (sgrp == "cgh" && skey == "bbox" && sind1 == 0 && sind2 == 0) + bbox[sind3] = atof(sval.c_str()); + } + inf.close(); + } + for (int i = 0; i < levels; i++) + if (maxl < grids[i]) + maxl = grids[i]; + + delete[] grids; + + if (BH_num > maxl) + { + int BH_numc = BH_num; + for (int i = 0; i < BH_num; i++) + if (Porg0[i][0] < bbox[0] || Porg0[i][0] > bbox[3] || + Porg0[i][1] < bbox[1] || Porg0[i][1] > bbox[4] || + Porg0[i][2] < bbox[2] || Porg0[i][2] > bbox[5]) + { + delete[] Porg0[i]; + Porg0[i] = 0; + BH_numc--; + } + + if (BH_num > BH_numc) + { + maxl = BH_numc; + int bhi; + double *tmp; + + tmp = Pmom; + Pmom = new double[3 * maxl]; + bhi = 0; + for (int i = 0; i < BH_num; i++) + if (Porg0[i]) + { + for (int j = 0; j < 3; j++) + Pmom[3 * bhi + j] = tmp[3 * i + j]; + bhi++; + } + delete[] tmp; + + tmp = Spin; + Spin = new double[3 * maxl]; + bhi = 0; + for (int i = 0; i < BH_num; i++) + if (Porg0[i]) + { + for (int j = 0; j < 3; j++) + Spin[3 * bhi + j] = tmp[3 * i + j]; + bhi++; + } + delete[] tmp; + + tmp = Mass; + Mass = new double[3 * maxl]; + bhi = 0; + for (int i = 0; i < BH_num; i++) + if (Porg0[i]) + { + Mass[bhi] = tmp[i]; + bhi++; + } + delete[] tmp; + + double **ttmp; + ttmp = Porg0; + Porg0 = new double *[maxl]; + bhi = 0; + for (int i = 0; i < BH_num; i++) + if (ttmp[i]) + { + Porg0[bhi] = ttmp[i]; + bhi++; + } + delete[] ttmp; + + for (int i = 0; i < BH_num; i++) + { + delete[] Porgbr[i]; + delete[] Porg[i]; + delete[] Porg1[i]; + delete[] Porg_rhs[i]; + } + delete[] Porgbr; + delete[] Porg; + delete[] Porg1; + delete[] Porg_rhs; + + BH_num = maxl; + + Porgbr = new double *[BH_num]; + Porg = new double *[BH_num]; + Porg1 = new double *[BH_num]; + Porg_rhs = new double *[BH_num]; + + for (int i = 0; i < BH_num; i++) + { + Porgbr[i] = new double[3]; + Porg[i] = new double[3]; + Porg1[i] = new double[3]; + Porg_rhs[i] = new double[3]; + } + } + } + + for (int i = 0; i < BH_num; i++) + { + for (int j = 0; j < dim; j++) + Porgbr[i][j] = Porg0[i][j]; + } + + setpbh(BH_num, Porg0, Mass, BH_num_input); +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes black hole positions + +//================================================================================================ + +#if 0 +// old code + +void bssn_class::compute_Porg_rhs(double **BH_PS,double **BH_RHS,var *forx,var *fory,var *forz,int lev) +{ + const int InList = 3; + + MyList * DG_List=new MyList(forx); + DG_List->insert(fory); DG_List->insert(forz); + + int n; + double *x1,*y1,*z1; + double *shellf; + shellf=new double[3*BH_num]; + double *pox[3]; + for(int i=0;i<3;i++) pox[i] = new double[BH_num]; + for( n = 0; n < BH_num; n++) + { + pox[0][n] = BH_PS[n][0]; + pox[1][n] = BH_PS[n][1]; + pox[2][n] = BH_PS[n][2]; + } + + if(!Parallel::PatList_Interp_Points(GH->PatL[lev],DG_List,BH_num,pox,shellf,Symmetry)) + { + ErrorMonitor->outfile<<"fail to find black holes at t = "<outfile<<"(x,y,z) = ("<clearList(); + delete[] shellf; + for(int i=0;i<3;i++) delete[] pox[i]; +} + +#else + +// new code considering diferent levels for different black hole + +void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev) +{ + const int InList = 3; + + MyList *DG_List = new MyList(forx); + DG_List->insert(fory); + DG_List->insert(forz); + + double *x1, *y1, *z1; + double *shellf; + shellf = new double[3]; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[1]; + + for (int n = 0; n < BH_num; n++) + { + pox[0][0] = BH_PS[n][0]; + pox[1][0] = BH_PS[n][1]; + pox[2][0] = BH_PS[n][2]; + + int lev = ilev; + +#if (PSTR == 0) + while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry)) +#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) + while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry, GH->Commlev[lev])) +#endif + { + lev--; + if (lev < 0) + { + ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl; + for (n = 0; n < BH_num; n++) + ErrorMonitor->outfile << "(x,y,z) = (" + << pox[0][n] << "," << pox[1][n] << "," << pox[2][n] + << ")" << endl; + break; + } + } + + if (lev >= 0) + { + BH_RHS[n][0] = -shellf[0]; + BH_RHS[n][1] = -shellf[1]; + BH_RHS[n][2] = -shellf[2]; + } + } + + DG_List->clearList(); + delete[] shellf; + for (int i = 0; i < 3; i++) + delete[] pox[i]; +} +#endif + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes gravitational-wave related quantities and performs analysis + +//================================================================================================ + +void bssn_class::AnalysisStuff(int lev, double dT_lev) +{ + LastAnas += dT_lev; + + if (LastAnas >= AnasTime) + { +#ifdef Point_Psi4 +#error "not support parallel levels yet" + // Gam_ijk and R_ij have been calculated in Interp_Constraint() + double SYM = 1, ANT = -1; + for (int levh = lev; levh < GH->levels; levh++) + { + MyList *Pp = GH->PatL[levh]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_fderivs(cg->shape, cg->fgfs[phi0->sgfn], + cg->fgfs[phix->sgfn], cg->fgfs[phiy->sgfn], cg->fgfs[phiz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, levh); + f_fderivs(cg->shape, cg->fgfs[trK0->sgfn], + cg->fgfs[trKx->sgfn], cg->fgfs[trKy->sgfn], cg->fgfs[trKz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, levh); + f_fderivs(cg->shape, cg->fgfs[Axx0->sgfn], + cg->fgfs[Axxx->sgfn], cg->fgfs[Axxy->sgfn], cg->fgfs[Axxz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, levh); + f_fderivs(cg->shape, cg->fgfs[Axy0->sgfn], + cg->fgfs[Axyx->sgfn], cg->fgfs[Axyy->sgfn], cg->fgfs[Axyz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + ANT, ANT, SYM, Symmetry, levh); + f_fderivs(cg->shape, cg->fgfs[Axz0->sgfn], + cg->fgfs[Axzx->sgfn], cg->fgfs[Axzy->sgfn], cg->fgfs[Axzz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + ANT, SYM, ANT, Symmetry, levh); + f_fderivs(cg->shape, cg->fgfs[Ayy0->sgfn], + cg->fgfs[Ayyx->sgfn], cg->fgfs[Ayyy->sgfn], cg->fgfs[Ayyz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, levh); + f_fderivs(cg->shape, cg->fgfs[Ayz0->sgfn], + cg->fgfs[Ayzx->sgfn], cg->fgfs[Ayzy->sgfn], cg->fgfs[Ayzz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, ANT, ANT, Symmetry, levh); + f_fderivs(cg->shape, cg->fgfs[Azz0->sgfn], + cg->fgfs[Azzx->sgfn], cg->fgfs[Azzy->sgfn], cg->fgfs[Azzz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, levh); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + +#ifdef WithShell + // ShellPatch part + if (lev == 0) + { + MyList *Pp = SH->PatL; + while (Pp) + { + MyList *BL = Pp->data->blb; + int fngfs = Pp->data->fngfs; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + f_fderivs_shc(cg->shape, cg->fgfs[phi0->sgfn], + cg->fgfs[phix->sgfn], cg->fgfs[phiy->sgfn], cg->fgfs[phiz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + phi0->SoA[0], phi0->SoA[1], phi0->SoA[2], + Symmetry, levh, Pp->data->sst, + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz]); + f_fderivs_shc(cg->shape, cg->fgfs[trK0->sgfn], + cg->fgfs[trKx->sgfn], cg->fgfs[trKy->sgfn], cg->fgfs[trKz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + trK0->SoA[0], trK0->SoA[1], trK0->SoA[2], + Symmetry, levh, Pp->data->sst, + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz]); + f_fderivs_shc(cg->shape, cg->fgfs[Axx0->sgfn], + cg->fgfs[Axxx->sgfn], cg->fgfs[Axxy->sgfn], cg->fgfs[Axxz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + Axx0->SoA[0], Axx0->SoA[1], Axx0->SoA[2], + Symmetry, levh, Pp->data->sst, + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz]); + f_fderivs_shc(cg->shape, cg->fgfs[Axy0->sgfn], + cg->fgfs[Axyx->sgfn], cg->fgfs[Axyy->sgfn], cg->fgfs[Axyz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + Axy0->SoA[0], Axy0->SoA[1], Axy0->SoA[2], + Symmetry, levh, Pp->data->sst, + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz]); + f_fderivs_shc(cg->shape, cg->fgfs[Axz0->sgfn], + cg->fgfs[Axzx->sgfn], cg->fgfs[Axzy->sgfn], cg->fgfs[Axzz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + Axz0->SoA[0], Axz0->SoA[1], Axz0->SoA[2], + Symmetry, levh, Pp->data->sst, + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz]); + f_fderivs_shc(cg->shape, cg->fgfs[Ayy0->sgfn], + cg->fgfs[Ayyx->sgfn], cg->fgfs[Ayyy->sgfn], cg->fgfs[Ayyz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + Ayy0->SoA[0], Ayy0->SoA[1], Ayy0->SoA[2], + Symmetry, levh, Pp->data->sst, + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz]); + f_fderivs_shc(cg->shape, cg->fgfs[Ayz0->sgfn], + cg->fgfs[Ayzx->sgfn], cg->fgfs[Ayzy->sgfn], cg->fgfs[Ayzz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + Ayz0->SoA[0], Ayz0->SoA[1], Ayz0->SoA[2], + Symmetry, levh, Pp->data->sst, + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz]); + f_fderivs_shc(cg->shape, cg->fgfs[Azz0->sgfn], + cg->fgfs[Azzx->sgfn], cg->fgfs[Azzy->sgfn], cg->fgfs[Azzz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + Azz0->SoA[0], Azz0->SoA[1], Azz0->SoA[2], + Symmetry, levh, Pp->data->sst, + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz]); + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + } +#endif + } +#else + Compute_Psi4(lev); +#endif + double *RP, *IP, *RoutMAP; + int NN = 0; + for (int pl = 2; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + NN++; + RP = new double[NN]; + IP = new double[NN]; + RoutMAP = new double[7]; + double Rex = maxrex; + for (int i = 0; i < decn; i++) + { +#ifdef Point_Psi4 + Waveshell->surf_Wave(Rex, GH, SH, + phi, trK, + gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, + Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, + phix, phiy, phiz, + trKx, trKy, trKz, + Axxx, Axxy, Axxz, + Axyx, Axyy, Axyz, + Axzx, Axzy, Axzz, + Ayyx, Ayyy, Ayyz, + Ayzx, Ayzy, Ayzz, + Azzx, Azzy, Azzz, + Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz, + Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz, + Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz, + Rxx, Rxy, Rxz, Ryy, Ryz, Rzz, + 2, maxl, NN, RP, IP, ErrorMonitor); +#ifdef WithShell + if (lev > 0 || Rex < GH->bbox[0][0][3]) + { + Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, + gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, + Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, + Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables + RoutMAP, ErrorMonitor); + } + else + { + Waveshell->surf_MassPAng(Rex, lev, SH, phi0, trK0, + gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, + Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, + Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables + RoutMAP, ErrorMonitor); + } +#else + Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, + gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, + Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, + Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables + RoutMAP, ErrorMonitor); +#endif +#else +// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before surface integral"); +#ifdef WithShell + if (lev > 0 || Rex < GH->bbox[0][0][3]) + { + Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); + Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, + gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, + Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, + Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables + RoutMAP, ErrorMonitor); + } + else + { + Waveshell->surf_Wave(Rex, lev, SH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); + Waveshell->surf_MassPAng(Rex, lev, SH, phi0, trK0, + gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, + Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, + Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables + RoutMAP, ErrorMonitor); + } +#else +#if (PSTR == 0) + Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); + Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, + gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, + Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, + Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables + RoutMAP, ErrorMonitor); +#elif (PSTR == 1 || PSTR == 2) + Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor, GH->Commlev[lev]); + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after surf_Wave"); + Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, + gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, + Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, + Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables + RoutMAP, ErrorMonitor, GH->Commlev[lev]); +#endif +#endif +// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end surface integral"); +#endif + if (i == 0) + { + ADMMass = RoutMAP[0]; + } +#if (PSTR == 1 || PSTR == 2) + if (GH->start_rank[a_lev] > 0) + { + MPI_Status status; + // receive + if (myrank == 0) + { + MPI_Recv(RP, NN, MPI_DOUBLE, GH->start_rank[a_lev], 1, MPI_COMM_WORLD, &status); + MPI_Recv(IP, NN, MPI_DOUBLE, GH->start_rank[a_lev], 2, MPI_COMM_WORLD, &status); + MPI_Recv(RoutMAP, 7, MPI_DOUBLE, GH->start_rank[a_lev], 3, MPI_COMM_WORLD, &status); + } + // send + if (myrank == GH->start_rank[a_lev]) + { + MPI_Send(RP, NN, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); + MPI_Send(IP, NN, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD); + MPI_Send(RoutMAP, 7, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD); + } + } +#endif + Psi4Monitor->writefile(PhysTime, NN, RP, IP); + MAPMonitor->writefile(PhysTime, 7, RoutMAP); + Rex = Rex - drex; + } + delete[] RP; + delete[] IP; + delete[] RoutMAP; + + // black hole's position + { + double *pox; + pox = new double[dim * BH_num]; + for (int bhi = 0; bhi < BH_num; bhi++) + for (int i = 0; i < dim; i++) + pox[dim * bhi + i] = Porg0[bhi][i]; + BHMonitor->writefile(PhysTime, dim * BH_num, pox); + delete[] pox; + } + + LastAnas = 0; + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes and outputs constraint violations + +//================================================================================================ + +void bssn_class::Constraint_Out() +{ + LastConsOut += dT * pow(0.5, Mymax(0, trfls)); + + if (LastConsOut >= AnasTime) + // Constraint violation + { + // recompute least the constraint data lost for moved new grid + for (int lev = 0; lev < GH->levels; lev++) + { + // make sure the data consistent for higher levels + if (lev > 0) // if the constrait quantities can be reused from the step rhs calculation + { + double TRK4 = PhysTime; + double ndeps = numepsb; + int pre = 0; + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, ndeps, pre); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + } + Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); + } +#ifdef WithShell + if (0) // if the constrait quantities can be reused from the step rhs calculation + { + MyList *sPp; + sPp = SH->PatL; + while (sPp) + { + double TRK4 = PhysTime; + int pre = 0; + int lev = 0; + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, pre); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } + SH->Synch(ConstraintList, Symmetry); +#endif + + double ConV[7]; +#if (PSTR == 1 || PSTR == 2) + double ConV_h[7]; +#endif + +#ifdef WithShell + ConV[0] = SH->L2Norm(Cons_Ham); + ConV[1] = SH->L2Norm(Cons_Px); + ConV[2] = SH->L2Norm(Cons_Py); + ConV[3] = SH->L2Norm(Cons_Pz); + ConV[4] = SH->L2Norm(Cons_Gx); + ConV[5] = SH->L2Norm(Cons_Gy); + ConV[6] = SH->L2Norm(Cons_Gz); + ConVMonitor->writefile(PhysTime, 7, ConV); +#endif + for (int levi = 0; levi < GH->levels; levi++) + { +#if (PSTR == 0) + ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham); + ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px); + ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py); + ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz); + ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx); + ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy); + ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz); +#elif (PSTR == 1 || PSTR == 2) + ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham, GH->Commlev[levi]); + ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px, GH->Commlev[levi]); + ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py, GH->Commlev[levi]); + ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz, GH->Commlev[levi]); + ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx, GH->Commlev[levi]); + ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy, GH->Commlev[levi]); + ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz, GH->Commlev[levi]); + // misc::tillherecheck("before collect data to cpu0"); + // MPI_ALLREDUCE( sendbuf, recvbuf, count, datatype, op, comm), sendbu and recvbuf must be different + if (levi > 0) + { + if (GH->mylev == levi && myrank == GH->start_rank[levi]) + for (int i = 0; i < 7; i++) + ConV_h[i] = ConV[i]; + else + for (int i = 0; i < 7; i++) + ConV_h[i] = 0; + MPI_Allreduce(ConV_h, ConV, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + } +#endif + ConVMonitor->writefile(PhysTime, 7, ConV); + /* + if(fabs(ConV[0])<0.00001) + { + MyList * DG_List=new MyList(Cons_Ham); + DG_List->insert(Cons_Px); DG_List->insert(Cons_Py); DG_List->insert(Cons_Px); + DG_List->insert(Cons_Gx); DG_List->insert(Cons_Gy); DG_List->insert(Cons_Gx); + Parallel::Dump_Data(GH->PatL[levi],DG_List,"jiu",0,1); + DG_List->clearList(); + if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); + } + */ + } + + Interp_Constraint(false); + + LastConsOut = 0; + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes derivatives required for apparent-horizon calculations + +//================================================================================================ + +#ifdef With_AHF +void bssn_class::AH_Prepare_derivatives() +{ + double SYM = 1.0, ANT = -1.0; + int ZEO = 0; + + for (int lev = 0; lev < GH->levels; lev++) + { + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_fderivs(cg->shape, cg->fgfs[phi0->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, ZEO); + f_fderivs(cg->shape, cg->fgfs[gxx0->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamzxx->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, ZEO); + f_fderivs(cg->shape, cg->fgfs[gxy0->sgfn], + cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamzxy->sgfn], + cg->X[0], cg->X[1], cg->X[2], + ANT, ANT, SYM, Symmetry, ZEO); + f_fderivs(cg->shape, cg->fgfs[gxz0->sgfn], + cg->fgfs[Gamxxz->sgfn], cg->fgfs[Gamyxz->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + ANT, SYM, ANT, Symmetry, ZEO); + f_fderivs(cg->shape, cg->fgfs[gyy0->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamzyy->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, ZEO); + f_fderivs(cg->shape, cg->fgfs[gyz0->sgfn], + cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamzyz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, ANT, ANT, Symmetry, ZEO); + f_fderivs(cg->shape, cg->fgfs[gzz0->sgfn], + cg->fgfs[Gamxzz->sgfn], cg->fgfs[Gamyzz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->X[0], cg->X[1], cg->X[2], + SYM, SYM, SYM, Symmetry, ZEO); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + Parallel::Sync(GH->PatL[lev], AHDList, Symmetry); + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function interpolates apparent-horizon data + +//================================================================================================ + +bool bssn_class::AH_Interp_Points(MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetryi) +{ + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + double pox[3]; + for (int i = 0; i < NN; i++) + { + for (int j = 0; j < 3; j++) + pox[j] = XX[j][i]; + int lev = GH->levels - 1; + bool notfound = true; + + while (notfound) + { + if (lev < 0) + { +#ifdef WithShell + if (SH->Interp_One_Point(VarList, pox, Shellf + i * num_var, Symmetryi)) + { + return true; + } + if (myrank == 0) + { + cout << " bssn_class::AH_Interp_Points: point (" + << pox[0] << "," << pox[1] << "," << pox[2] + << ") is out of cgh and shell domain!" << endl; + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << " bssn_class::AH_Interp_Points: point (" + << pox[0] << "," << pox[1] << "," << pox[2] + << ") is out of cgh and shell domain!" << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); +#else + if (myrank == 0) + { + cout << " bssn_class::AH_Interp_Points: point (" + << pox[0] << "," << pox[1] << "," << pox[2] + << ") is out of cgh domain!" << endl; + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << " bssn_class::AH_Interp_Points: point (" + << pox[0] << "," << pox[1] << "," << pox[2] + << ") is out of cgh domain!" << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); +#endif + return false; + } + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + if (Pp->data->Interp_ONE_Point(VarList, pox, Shellf + i * num_var, Symmetryi)) + { + notfound = false; + break; + } + Pp = Pp->next; + } + lev--; + } + } + return true; +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes apparent horizons + +//================================================================================================ + +void bssn_class::AH_Step_Find(int lev, double dT_lev) +{ + if ((lev == GH->levels - 1)) + { + int ncount = int(PhysTime / dT_lev); + bool tf = false; + for (int ihn = 0; ihn < HN_num; ihn++) + { + if (ncount % findeveryl[ihn] == 0) + { + tf = true; + break; + } + } + if (tf) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + prev_clock = clock(); + const int cdumpid = int(PhysTime / AHdumptime) + 1; + for (int ihn = 0; ihn < HN_num; ihn++) + dumpid[ihn] = cdumpid; + + double gam; + for (int ihn = 0; ihn < BH_num; ihn++) + { + xc[ihn] = Porg0[ihn][0]; + yc[ihn] = Porg0[ihn][1]; + zc[ihn] = Porg0[ihn][2]; + gam = fabs(Pmom[ihn * 3]) / (Mass[ihn]); + gam = sqrt(1 - gam * gam); + xr[ihn] = Mass[ihn] * gam; + gam = fabs(Pmom[ihn * 3 + 1]) / (Mass[ihn]); + gam = sqrt(1 - gam * gam); + yr[ihn] = Mass[ihn] * gam; + gam = fabs(Pmom[ihn * 3 + 2]) / (Mass[ihn]); + gam = sqrt(1 - gam * gam); + zr[ihn] = Mass[ihn] * gam; + dTT[ihn] = -1; + + if (ncount % findeveryl[ihn] == 0) + { + trigger[ihn] = true; + dTT[ihn] = findeveryl[ihn] * dT_lev; + } + else + trigger[ihn] = false; + if (trigger[ihn] && (dumpid[ihn] > lastahdumpid[ihn])) + lastahdumpid[ihn] = dumpid[ihn]; + else + dumpid[ihn] = 0; + } + int ihn = BH_num; + for (int ia = 0; ia < BH_num; ia++) + for (int ib = ia + 1; ib < BH_num; ib++) + { + xc[ihn] = (Porg0[ia][0] + Porg0[ib][0]) / 2; + yc[ihn] = (Porg0[ia][1] + Porg0[ib][1]) / 2; + zc[ihn] = (Porg0[ia][2] + Porg0[ib][2]) / 2; + + xr[ihn] = yr[ihn] = zr[ihn] = Mass[ia] + Mass[ib]; + + dTT[ihn] = -1; + + if (fabs(Porg0[ia][0] - Porg0[ib][0]) < 2 * xr[ihn] && + fabs(Porg0[ia][1] - Porg0[ib][1]) < 2 * xr[ihn] && + fabs(Porg0[ia][2] - Porg0[ib][2]) < 2 * xr[ihn] && + (ncount % findeveryl[ihn] == 0)) + { + trigger[ihn] = true; + dTT[ihn] = findeveryl[ihn] * dT_lev; + } + else + trigger[ihn] = false; + + if (trigger[ihn] && (dumpid[ihn] > lastahdumpid[ihn])) + lastahdumpid[ihn] = dumpid[ihn]; + else + dumpid[ihn] = 0; + + ihn++; + } +#if (ABEtype == 1) + if (PhysTime > 10) + { + ihn--; + trigger[ihn] = true; + xr[ihn] = yr[ihn] = zr[ihn] = 50; + // if(myrank==0) for(ihn=0;ihn 0) + return; + + // recompute least the constraint data lost for moved new grid + for (int lev = 0; lev < GH->levels; lev++) + { + // make sure the data consistent for higher levels + if (lev > 0) // if the constrait quantities can be reused from the step rhs calculation + { + double TRK4 = PhysTime; + double ndeps = numepsb; + int pre = 0; + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, ndeps, pre); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + } + Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); + } +#ifdef WithShell + if (0) // if the constrait quantities can be reused from the step rhs calculation + { + MyList *sPp; + sPp = SH->PatL; + while (sPp) + { + double TRK4 = PhysTime; + int pre = 0; + int lev = 0; + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, pre); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } + SH->Synch(ConstraintList, Symmetry); +#endif + } + // interpolate + double *x1, *y1, *z1; + const int n = 1000; + double lmax, lmin, dd; + lmin = 0; +#ifdef WithShell + lmax = SH->Rrange[1]; +#else + lmax = GH->bbox[0][0][4]; +#endif +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + dd = (lmax - lmin) / (n - 1); +#else +#ifdef Cell + dd = (lmax - lmin) / n; +#else +#error Not define Vertex nor Cell +#endif +#endif + x1 = new double[n]; + y1 = new double[n]; + z1 = new double[n]; + for (int i = 0; i < n; i++) + { + x1[i] = 0; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + y1[i] = lmin + i * dd; +#else +#ifdef Cell + y1[i] = lmin + (i + 0.5) * dd; +#else +#error Not define Vertex nor Cell +#endif +#endif + z1[i] = 0; + } + + int InList = 0; + + MyList *varl = ConstraintList; + while (varl) + { + InList++; + varl = varl->next; + } + double *shellf; + shellf = new double[n * InList]; + for (int i = 0; i < n; i++) + { + double XX[3]; + XX[0] = x1[i]; + XX[1] = y1[i]; + XX[2] = z1[i]; + bool fg = GH->Interp_One_Point(ConstraintList, XX, shellf + i * InList, Symmetry); +#ifdef WithShell + if (!fg) + fg = SH->Interp_One_Point(ConstraintList, XX, shellf + i * InList, Symmetry); +#endif + if (!fg && myrank == 0) + { + cout << "bssn_class::Interp_Constraint meets wrong" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + if (myrank == 0) + { + ofstream outfile; + char filename[50]; + sprintf(filename, "%s/interp_constraint_%05d.dat", ErrorMonitor->out_dir.c_str(), int(PhysTime / dT + 0.5)); + // 0.5 for round off + + outfile.open(filename); + outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl; + for (int i = 0; i < n; i++) + { + outfile << setw(10) << setprecision(10) << y1[i]; + for (int j = 0; j < InList; j++) + outfile << " " << setw(16) << setprecision(15) << shellf[InList * i + j]; + outfile << endl; + } + outfile.close(); + } + + delete[] shellf; +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function computes constraint violations + +//================================================================================================ + +void bssn_class::Compute_Constraint() +{ + double TRK4 = PhysTime; + double ndeps = numepsb; + int pre = 0; + int lev; + + for (lev = 0; lev < GH->levels; lev++) + { + { + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, ndeps, pre); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + } + Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); + } + // prolong restrict constraint quantities + for (lev = GH->levels - 1; lev > 0; lev--) + RestrictProlong(lev, 1, false, ConstraintList, ConstraintList, ConstraintList); + +#ifdef WithShell + lev = 0; + { + MyList *sPp; + sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], + cg->fgfs[fngfs + ShellPatch::gy], + cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], + cg->fgfs[fngfs + ShellPatch::drhody], + cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], + cg->fgfs[fngfs + ShellPatch::dsigmady], + cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], + cg->fgfs[fngfs + ShellPatch::dRdy], + cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], + cg->fgfs[fngfs + ShellPatch::drhodxy], + cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], + cg->fgfs[fngfs + ShellPatch::drhodyz], + cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], + cg->fgfs[fngfs + ShellPatch::dsigmadxy], + cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], + cg->fgfs[fngfs + ShellPatch::dsigmadyz], + cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], + cg->fgfs[fngfs + ShellPatch::dRdxy], + cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], + cg->fgfs[fngfs + ShellPatch::dRdyz], + cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], + cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], + cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], + cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], + cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], + cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], + cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], + cg->fgfs[Lap_rhs->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], + cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], + cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], + cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], + cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], + cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], + cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], + cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], + cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], + cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], + cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], + cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], + cg->fgfs[Cons_Ham->sgfn], + cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], + cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], + Symmetry, lev, numepsh, sPp->data->sst, pre); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } + SH->Synch(ConstraintList, Symmetry); + // interpolate constraint quantities + SH->CS_Inter(ConstraintList, Symmetry); +#endif +} + +//================================================================================================ + + + +//================================================================================================ + +void bssn_class::testRestrict() +{ + MyList *DG_List = new MyList(phi0); + int lev = 0; + double ZEO = 0, ONE = 1; + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ZEO); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + lev = 1; + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ONE); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], DG_List, DG_List, Symmetry); + Parallel::Sync(GH->PatL[lev - 1], DG_List, Symmetry); + + Parallel::Dump_Data(GH->PatL[lev - 1], DG_List, 0, PhysTime, dT); + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT); + + DG_List->clearList(); + exit(0); +} + +//================================================================================================ + + + +//================================================================================================ + +void bssn_class::testOutBd() +{ + MyList *DG_List = new MyList(phi0); + int lev = 1; + double ZEO = 0, ONE = 1; + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ZEO); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + lev = 0; + Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ONE); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + lev = 1; + MyList *Ppc = GH->PatL[lev - 1]; + while (Ppc) + { + Pp = GH->PatL[lev]; + while (Pp) + { + Parallel::OutBdLow2Hi(Ppc->data, Pp->data, DG_List, DG_List, Symmetry); + Pp = Pp->next; + } + Ppc = Ppc->next; + } + + Parallel::Sync(GH->PatL[lev], DG_List, Symmetry); + + Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT); + Parallel::Dump_Data(GH->PatL[lev - 1], DG_List, 0, PhysTime, dT); + + DG_List->clearList(); + exit(0); +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function enforces/checks the traceless condition + +//================================================================================================ + +void bssn_class::Enforce_algcon(int lev, int fg) +{ + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + if (fg == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); + else + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + +#ifdef WithShell + if (lev == 0) + { + MyList *sPp = SH->PatL; + while (sPp) + { + MyList *BP = sPp->data->blb; + int fngfs = sPp->data->fngfs; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + if (fg == 0) + f_enforce_ga(cg->shape, + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], + cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); + else + f_enforce_ga(cg->shape, + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], + cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); + } + if (BP == sPp->data->ble) + break; + BP = BP->next; + } + sPp = sPp->next; + } + } +#endif +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function monitors stdin for an 'abort' input + +//================================================================================================ + +bool bssn_class::check_Stdin_Abort() +{ + + fd_set readfds; + + struct timeval timeout; + + FD_ZERO(&readfds); + FD_SET(STDIN_FILENO, &readfds); + + // Set timeout to 0 — perform a non-blocking check + timeout.tv_sec = 0; + timeout.tv_usec = 0; + + int activity = select(STDIN_FILENO + 1, &readfds, nullptr, nullptr, &timeout); + + if (activity > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { + string input_abort; + if (cin >> input_abort) { + if (input_abort == "stop") { + return true; + } + } + } + + return false; +} + +//================================================================================================ + diff --git a/AMSS_NCKU_source/bssn_rhs_cuda.cu b/AMSS_NCKU_source/bssn_rhs_cuda.cu index d0de9ab..e1ec962 100644 --- a/AMSS_NCKU_source/bssn_rhs_cuda.cu +++ b/AMSS_NCKU_source/bssn_rhs_cuda.cu @@ -1,10 +1,10 @@ -/* - * bssn_rhs_cuda.cu — GPU implementation of f_compute_rhs_bssn - * - * Drop-in replacement for bssn_rhs_c.C. - * Compile with nvcc, link bssn_rhs_cuda.o in place of bssn_rhs_c.o. - */ - +/* + * bssn_rhs_cuda.cu — GPU implementation of f_compute_rhs_bssn + * + * Drop-in replacement for bssn_rhs_c.C. + * Compile with nvcc, link bssn_rhs_cuda.o in place of bssn_rhs_c.o. + */ + #include #include #include @@ -15,10 +15,10 @@ #include #include "macrodef.h" #include "bssn_rhs.h" - -/* ------------------------------------------------------------------ */ -/* Multi-GPU dispatch: distribute ranks across available GPUs */ -/* ------------------------------------------------------------------ */ + +/* ------------------------------------------------------------------ */ +/* Multi-GPU dispatch: distribute ranks across available GPUs */ +/* ------------------------------------------------------------------ */ static struct { int num_gpus; int my_rank; @@ -129,87 +129,87 @@ static void cuda_profile_maybe_log() { stats.output_ms / (double)stats.calls); fflush(stderr); } - -/* ------------------------------------------------------------------ */ -/* Error checking */ -/* ------------------------------------------------------------------ */ -#define CUDA_CHECK(call) do { \ - cudaError_t err = (call); \ - if (err != cudaSuccess) { \ - fprintf(stderr, "CUDA error %s:%d: %s\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ -} while(0) - -/* ------------------------------------------------------------------ */ -/* Physical / gauge constants (matching bssn_rhs_c.C) */ -/* ------------------------------------------------------------------ */ -static const double PI_VAL = 3.14159265358979323846; -static const double FF_VAL = 0.75; -static const double ETA_VAL = 2.0; - -/* ------------------------------------------------------------------ */ -/* Constant memory for grid parameters and stencil coefficients */ -/* ------------------------------------------------------------------ */ -struct GridParams { - int ex[3]; /* nx, ny, nz */ - int all; /* nx*ny*nz */ - double dX, dY, dZ; - /* fderivs coefficients */ - double d12dx, d12dy, d12dz; /* 1/(12*dX) etc */ - double d2dx, d2dy, d2dz; /* 1/(2*dX) etc */ - /* fdderivs coefficients */ - double Fdxdx, Fdydy, Fdzdz; /* 1/(12*dX^2) etc */ - double Sdxdx, Sdydy, Sdzdz; /* 1/(dX^2) etc */ - double Fdxdy, Fdxdz, Fdydz; /* 1/(144*dX*dY) etc */ - double Sdxdy, Sdxdz, Sdydz; /* 1/(4*dX*dY) etc */ - /* symmetry bounds (Fortran 1-based) */ - int iminF, jminF, kminF; - int imaxF, jmaxF, kmaxF; - /* symmetry bounds for ord=3 (lopsided/kodis) */ - int iminF3, jminF3, kminF3; - int Symmetry; - double eps; - int co; - /* padded sizes */ - int fh2_nx, fh2_ny, fh2_nz; /* (nx+2), (ny+2), (nz+2) for ord=2 */ - int fh3_nx, fh3_ny, fh3_nz; /* (nx+3), (ny+3), (nz+3) for ord=3 */ -}; - -__constant__ GridParams d_gp; - -/* ------------------------------------------------------------------ */ -/* Device indexing helpers */ -/* ------------------------------------------------------------------ */ -__device__ __forceinline__ int idx_ex_d(int i0, int j0, int k0) { - return i0 + j0 * d_gp.ex[0] + k0 * d_gp.ex[0] * d_gp.ex[1]; -} - -/* ord=2 ghost-padded: Fortran index iF -> flat index */ -__device__ __forceinline__ int idx_fh2(int iF, int jF, int kF) { - return (iF + 1) + (jF + 1) * d_gp.fh2_nx + (kF + 1) * d_gp.fh2_nx * d_gp.fh2_ny; -} - -/* ord=3 ghost-padded: Fortran index iF -> flat index */ -__device__ __forceinline__ int idx_fh3(int iF, int jF, int kF) { - return (iF + 2) + (jF + 2) * d_gp.fh3_nx + (kF + 2) * d_gp.fh3_nx * d_gp.fh3_ny; -} - -/* ------------------------------------------------------------------ */ -/* GPU buffer management */ -/* ------------------------------------------------------------------ */ -/* - * Array slot indices — all arrays live in one big cudaMalloc block. - * INPUT arrays (H2D): 39 slots - * OUTPUT arrays (D2H): 52 slots - * TEMPORARY arrays (GPU-only): ~65 slots - * Plus 2 extended arrays for ghost-padded stencils (fh_ord2, fh_ord3) - */ - -/* Total number of "all"-sized slots */ -#define NUM_SLOTS 160 - + +/* ------------------------------------------------------------------ */ +/* Error checking */ +/* ------------------------------------------------------------------ */ +#define CUDA_CHECK(call) do { \ + cudaError_t err = (call); \ + if (err != cudaSuccess) { \ + fprintf(stderr, "CUDA error %s:%d: %s\n", \ + __FILE__, __LINE__, cudaGetErrorString(err)); \ + exit(EXIT_FAILURE); \ + } \ +} while(0) + +/* ------------------------------------------------------------------ */ +/* Physical / gauge constants (matching bssn_rhs_c.C) */ +/* ------------------------------------------------------------------ */ +static const double PI_VAL = 3.14159265358979323846; +static const double FF_VAL = 0.75; +static const double ETA_VAL = 2.0; + +/* ------------------------------------------------------------------ */ +/* Constant memory for grid parameters and stencil coefficients */ +/* ------------------------------------------------------------------ */ +struct GridParams { + int ex[3]; /* nx, ny, nz */ + int all; /* nx*ny*nz */ + double dX, dY, dZ; + /* fderivs coefficients */ + double d12dx, d12dy, d12dz; /* 1/(12*dX) etc */ + double d2dx, d2dy, d2dz; /* 1/(2*dX) etc */ + /* fdderivs coefficients */ + double Fdxdx, Fdydy, Fdzdz; /* 1/(12*dX^2) etc */ + double Sdxdx, Sdydy, Sdzdz; /* 1/(dX^2) etc */ + double Fdxdy, Fdxdz, Fdydz; /* 1/(144*dX*dY) etc */ + double Sdxdy, Sdxdz, Sdydz; /* 1/(4*dX*dY) etc */ + /* symmetry bounds (Fortran 1-based) */ + int iminF, jminF, kminF; + int imaxF, jmaxF, kmaxF; + /* symmetry bounds for ord=3 (lopsided/kodis) */ + int iminF3, jminF3, kminF3; + int Symmetry; + double eps; + int co; + /* padded sizes */ + int fh2_nx, fh2_ny, fh2_nz; /* (nx+2), (ny+2), (nz+2) for ord=2 */ + int fh3_nx, fh3_ny, fh3_nz; /* (nx+3), (ny+3), (nz+3) for ord=3 */ +}; + +__constant__ GridParams d_gp; + +/* ------------------------------------------------------------------ */ +/* Device indexing helpers */ +/* ------------------------------------------------------------------ */ +__device__ __forceinline__ int idx_ex_d(int i0, int j0, int k0) { + return i0 + j0 * d_gp.ex[0] + k0 * d_gp.ex[0] * d_gp.ex[1]; +} + +/* ord=2 ghost-padded: Fortran index iF -> flat index */ +__device__ __forceinline__ int idx_fh2(int iF, int jF, int kF) { + return (iF + 1) + (jF + 1) * d_gp.fh2_nx + (kF + 1) * d_gp.fh2_nx * d_gp.fh2_ny; +} + +/* ord=3 ghost-padded: Fortran index iF -> flat index */ +__device__ __forceinline__ int idx_fh3(int iF, int jF, int kF) { + return (iF + 2) + (jF + 2) * d_gp.fh3_nx + (kF + 2) * d_gp.fh3_nx * d_gp.fh3_ny; +} + +/* ------------------------------------------------------------------ */ +/* GPU buffer management */ +/* ------------------------------------------------------------------ */ +/* + * Array slot indices — all arrays live in one big cudaMalloc block. + * INPUT arrays (H2D): 39 slots + * OUTPUT arrays (D2H): 52 slots + * TEMPORARY arrays (GPU-only): ~65 slots + * Plus 2 extended arrays for ghost-padded stencils (fh_ord2, fh_ord3) + */ + +/* Total number of "all"-sized slots */ +#define NUM_SLOTS 160 + struct GpuBuffers { double *d_mem; /* single big allocation */ double *d_fh2; /* ghost-padded ord=2: (nx+2)*(ny+2)*(nz+2) */ @@ -228,56 +228,56 @@ static GpuBuffers g_buf = { nullptr, nullptr, nullptr, nullptr, false, {}, 0, 0, 0, 0, 0, 0, false }; - -/* Slot assignments — INPUT (H2D) */ -enum { - S_chi=0, S_trK, S_dxx, S_gxy, S_gxz, S_dyy, S_gyz, S_dzz, - S_Axx, S_Axy, S_Axz, S_Ayy, S_Ayz, S_Azz, - S_Gamx, S_Gamy, S_Gamz, - S_Lap, S_betax, S_betay, S_betaz, - S_dtSfx, S_dtSfy, S_dtSfz, - S_rho, S_Sx, S_Sy, S_Sz, - S_Sxx, S_Sxy, S_Sxz, S_Syy, S_Syz, S_Szz, - S_X, S_Y, S_Z, /* coordinate arrays — only nx/ny/nz long */ - /* 37 input slots so far; X/Y/Z are special-sized */ - - /* OUTPUT (D2H) */ - S_chi_rhs, S_trK_rhs, - S_gxx_rhs, S_gxy_rhs, S_gxz_rhs, S_gyy_rhs, S_gyz_rhs, S_gzz_rhs, - S_Axx_rhs, S_Axy_rhs, S_Axz_rhs, S_Ayy_rhs, S_Ayz_rhs, S_Azz_rhs, - S_Gamx_rhs, S_Gamy_rhs, S_Gamz_rhs, - S_Lap_rhs, S_betax_rhs, S_betay_rhs, S_betaz_rhs, - S_dtSfx_rhs, S_dtSfy_rhs, S_dtSfz_rhs, - S_Gamxxx, S_Gamxxy, S_Gamxxz, S_Gamxyy, S_Gamxyz, S_Gamxzz, - S_Gamyxx, S_Gamyxy, S_Gamyxz, S_Gamyyy, S_Gamyyz, S_Gamyzz, - S_Gamzxx, S_Gamzxy, S_Gamzxz, S_Gamzyy, S_Gamzyz, S_Gamzzz, - S_Rxx, S_Rxy, S_Rxz, S_Ryy, S_Ryz, S_Rzz, - S_ham_Res, S_movx_Res, S_movy_Res, S_movz_Res, - S_Gmx_Res, S_Gmy_Res, S_Gmz_Res, - - /* TEMPORARY (GPU-only) */ - S_gxx, S_gyy, S_gzz, /* physical metric = dxx+1 etc */ - S_alpn1, S_chin1, - S_chix, S_chiy, S_chiz, - S_gxxx, S_gxyx, S_gxzx, S_gyyx, S_gyzx, S_gzzx, - S_gxxy, S_gxyy, S_gxzy, S_gyyy, S_gyzy, S_gzzy, - S_gxxz, S_gxyz, S_gxzz, S_gyyz, S_gyzz, S_gzzz, - S_Lapx, S_Lapy, S_Lapz, - S_betaxx, S_betaxy, S_betaxz, - S_betayx, S_betayy, S_betayz, - S_betazx, S_betazy, S_betazz, - S_Gamxx, S_Gamxy, S_Gamxz, - S_Gamyx, S_Gamyy_t, S_Gamyz_t, - S_Gamzx, S_Gamzy, S_Gamzz_t, + +/* Slot assignments — INPUT (H2D) */ +enum { + S_chi=0, S_trK, S_dxx, S_gxy, S_gxz, S_dyy, S_gyz, S_dzz, + S_Axx, S_Axy, S_Axz, S_Ayy, S_Ayz, S_Azz, + S_Gamx, S_Gamy, S_Gamz, + S_Lap, S_betax, S_betay, S_betaz, + S_dtSfx, S_dtSfy, S_dtSfz, + S_rho, S_Sx, S_Sy, S_Sz, + S_Sxx, S_Sxy, S_Sxz, S_Syy, S_Syz, S_Szz, + S_X, S_Y, S_Z, /* coordinate arrays — only nx/ny/nz long */ + /* 37 input slots so far; X/Y/Z are special-sized */ + + /* OUTPUT (D2H) */ + S_chi_rhs, S_trK_rhs, + S_gxx_rhs, S_gxy_rhs, S_gxz_rhs, S_gyy_rhs, S_gyz_rhs, S_gzz_rhs, + S_Axx_rhs, S_Axy_rhs, S_Axz_rhs, S_Ayy_rhs, S_Ayz_rhs, S_Azz_rhs, + S_Gamx_rhs, S_Gamy_rhs, S_Gamz_rhs, + S_Lap_rhs, S_betax_rhs, S_betay_rhs, S_betaz_rhs, + S_dtSfx_rhs, S_dtSfy_rhs, S_dtSfz_rhs, + S_Gamxxx, S_Gamxxy, S_Gamxxz, S_Gamxyy, S_Gamxyz, S_Gamxzz, + S_Gamyxx, S_Gamyxy, S_Gamyxz, S_Gamyyy, S_Gamyyz, S_Gamyzz, + S_Gamzxx, S_Gamzxy, S_Gamzxz, S_Gamzyy, S_Gamzyz, S_Gamzzz, + S_Rxx, S_Rxy, S_Rxz, S_Ryy, S_Ryz, S_Rzz, + S_ham_Res, S_movx_Res, S_movy_Res, S_movz_Res, + S_Gmx_Res, S_Gmy_Res, S_Gmz_Res, + + /* TEMPORARY (GPU-only) */ + S_gxx, S_gyy, S_gzz, /* physical metric = dxx+1 etc */ + S_alpn1, S_chin1, + S_chix, S_chiy, S_chiz, + S_gxxx, S_gxyx, S_gxzx, S_gyyx, S_gyzx, S_gzzx, + S_gxxy, S_gxyy, S_gxzy, S_gyyy, S_gyzy, S_gzzy, + S_gxxz, S_gxyz, S_gxzz, S_gyyz, S_gyzz, S_gzzz, + S_Lapx, S_Lapy, S_Lapz, + S_betaxx, S_betaxy, S_betaxz, + S_betayx, S_betayy, S_betayz, + S_betazx, S_betazy, S_betazz, + S_Gamxx, S_Gamxy, S_Gamxz, + S_Gamyx, S_Gamyy_t, S_Gamyz_t, + S_Gamzx, S_Gamzy, S_Gamzz_t, S_Kx, S_Ky, S_Kz, S_S_arr, S_f_arr, - S_fxx, S_fxy, S_fxz, S_fyy, S_fyz, S_fzz, - S_Gamxa, S_Gamya, S_Gamza, - S_gupxx, S_gupxy, S_gupxz, - S_gupyy, S_gupyz, S_gupzz, - NUM_USED_SLOTS -}; - + S_fxx, S_fxy, S_fxz, S_fyy, S_fyz, S_fzz, + S_Gamxa, S_Gamya, S_Gamza, + S_gupxx, S_gupxy, S_gupxz, + S_gupyy, S_gupyz, S_gupzz, + NUM_USED_SLOTS +}; + static_assert(NUM_USED_SLOTS <= NUM_SLOTS, "Increase NUM_SLOTS"); static const int H2D_INPUT_SLOT_COUNT = (S_Szz - S_chi + 1); @@ -437,27 +437,27 @@ static void release_step_ctx(void *block_tag) if (it->second.d_matter_mem) cudaFree(it->second.d_matter_mem); g_step_ctx.erase(it); } - -/* ================================================================== */ -/* A. Symmetry boundary kernels (ord=2 and ord=3) */ -/* ================================================================== */ - + +/* ================================================================== */ +/* A. Symmetry boundary kernels (ord=2 and ord=3) */ +/* ================================================================== */ + /* Step 1: Copy interior into ghost-padded array */ __global__ void kern_symbd_copy_interior_ord2(const double * __restrict__ func, double * __restrict__ fh, double SoA0, double SoA1, double SoA2) -{ - const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; - const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < d_gp.all; - tid += blockDim.x * gridDim.x) - { - int i0 = tid % nx; - int j0 = (tid / nx) % ny; - int k0 = tid / (nx * ny); - int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; - fh[(iF+1) + (jF+1)*fnx + (kF+1)*fnx*fny] = func[tid]; +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + fh[(iF+1) + (jF+1)*fnx + (kF+1)*fnx*fny] = func[tid]; } } @@ -499,93 +499,93 @@ __global__ void kern_symbd_pack_ord2(const double * __restrict__ func, /* Step 2: Fill i-ghosts (x-direction symmetry) */ __global__ void kern_symbd_ighost_ord2(double * __restrict__ fh, double SoA0) { - const int ny = d_gp.ex[1], nz = d_gp.ex[2]; - const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; - /* ord=2: fill iF=0 and iF=-1, i.e. ghost layers ii=0 from ii=2, ii=1 from ii=1 */ - /* Fortran: do ii=0,ord-1: funcc(-ii,jF,kF) = funcc(ii+1,jF,kF)*SoA[0] */ - int total = ny * nz; /* jF=1..ny, kF=1..nz */ - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < total * 2; /* 2 ghost layers */ - tid += blockDim.x * gridDim.x) - { - int ii = tid / total; /* 0 or 1 */ - int rem = tid % total; - int j0 = rem % ny; - int k0 = rem / ny; - int jF = j0 + 1, kF = k0 + 1; - int iF_dst = -ii; /* 0, -1 */ - int iF_src = ii + 1; /* 1, 2 */ - fh[(iF_dst+1) + (jF+1)*fnx + (kF+1)*fnx*fny] = - fh[(iF_src+1) + (jF+1)*fnx + (kF+1)*fnx*fny] * SoA0; - } -} - -/* Step 3: Fill j-ghosts (y-direction symmetry) */ -__global__ void kern_symbd_jghost_ord2(double * __restrict__ fh, double SoA1) -{ - const int nx = d_gp.ex[0], nz = d_gp.ex[2]; - const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; - /* iF ranges from -1 to nx (i.e. -ord+1 to ex1), total = nx+2 */ - int irange = nx + 2; - int total = irange * nz; - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < total * 2; - tid += blockDim.x * gridDim.x) - { - int jj = tid / total; - int rem = tid % total; - int ii = rem % irange; - int k0 = rem / irange; - int iF = ii - 1; /* -1 .. nx */ - int kF = k0 + 1; - int jF_dst = -jj; - int jF_src = jj + 1; - fh[(iF+1) + (jF_dst+1)*fnx + (kF+1)*fnx*fny] = - fh[(iF+1) + (jF_src+1)*fnx + (kF+1)*fnx*fny] * SoA1; - } -} - -/* Step 4: Fill k-ghosts (z-direction symmetry) */ -__global__ void kern_symbd_kghost_ord2(double * __restrict__ fh, double SoA2) -{ - const int nx = d_gp.ex[0], ny = d_gp.ex[1]; - const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; - int irange = nx + 2; - int jrange = ny + 2; - int total = irange * jrange; - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < total * 2; - tid += blockDim.x * gridDim.x) - { - int kk = tid / total; - int rem = tid % total; - int ii = rem % irange; - int jj = rem / irange; - int iF = ii - 1; - int jF = jj - 1; - int kF_dst = -kk; - int kF_src = kk + 1; - fh[(iF+1) + (jF+1)*fnx + (kF_dst+1)*fnx*fny] = - fh[(iF+1) + (jF+1)*fnx + (kF_src+1)*fnx*fny] * SoA2; - } -} - -/* ---- ord=3 variants (for lopsided / kodis) ---- */ - + const int ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; + /* ord=2: fill iF=0 and iF=-1, i.e. ghost layers ii=0 from ii=2, ii=1 from ii=1 */ + /* Fortran: do ii=0,ord-1: funcc(-ii,jF,kF) = funcc(ii+1,jF,kF)*SoA[0] */ + int total = ny * nz; /* jF=1..ny, kF=1..nz */ + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 2; /* 2 ghost layers */ + tid += blockDim.x * gridDim.x) + { + int ii = tid / total; /* 0 or 1 */ + int rem = tid % total; + int j0 = rem % ny; + int k0 = rem / ny; + int jF = j0 + 1, kF = k0 + 1; + int iF_dst = -ii; /* 0, -1 */ + int iF_src = ii + 1; /* 1, 2 */ + fh[(iF_dst+1) + (jF+1)*fnx + (kF+1)*fnx*fny] = + fh[(iF_src+1) + (jF+1)*fnx + (kF+1)*fnx*fny] * SoA0; + } +} + +/* Step 3: Fill j-ghosts (y-direction symmetry) */ +__global__ void kern_symbd_jghost_ord2(double * __restrict__ fh, double SoA1) +{ + const int nx = d_gp.ex[0], nz = d_gp.ex[2]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; + /* iF ranges from -1 to nx (i.e. -ord+1 to ex1), total = nx+2 */ + int irange = nx + 2; + int total = irange * nz; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 2; + tid += blockDim.x * gridDim.x) + { + int jj = tid / total; + int rem = tid % total; + int ii = rem % irange; + int k0 = rem / irange; + int iF = ii - 1; /* -1 .. nx */ + int kF = k0 + 1; + int jF_dst = -jj; + int jF_src = jj + 1; + fh[(iF+1) + (jF_dst+1)*fnx + (kF+1)*fnx*fny] = + fh[(iF+1) + (jF_src+1)*fnx + (kF+1)*fnx*fny] * SoA1; + } +} + +/* Step 4: Fill k-ghosts (z-direction symmetry) */ +__global__ void kern_symbd_kghost_ord2(double * __restrict__ fh, double SoA2) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; + int irange = nx + 2; + int jrange = ny + 2; + int total = irange * jrange; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 2; + tid += blockDim.x * gridDim.x) + { + int kk = tid / total; + int rem = tid % total; + int ii = rem % irange; + int jj = rem / irange; + int iF = ii - 1; + int jF = jj - 1; + int kF_dst = -kk; + int kF_src = kk + 1; + fh[(iF+1) + (jF+1)*fnx + (kF_dst+1)*fnx*fny] = + fh[(iF+1) + (jF+1)*fnx + (kF_src+1)*fnx*fny] * SoA2; + } +} + +/* ---- ord=3 variants (for lopsided / kodis) ---- */ + __global__ void kern_symbd_copy_interior_ord3(const double * __restrict__ func, double * __restrict__ fh) { - const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; - const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < d_gp.all; - tid += blockDim.x * gridDim.x) - { - int i0 = tid % nx; - int j0 = (tid / nx) % ny; - int k0 = tid / (nx * ny); - int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; - fh[(iF+2) + (jF+2)*fnx + (kF+2)*fnx*fny] = func[tid]; + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + fh[(iF+2) + (jF+2)*fnx + (kF+2)*fnx*fny] = func[tid]; } } @@ -626,422 +626,422 @@ __global__ void kern_symbd_pack_ord3(const double * __restrict__ func, __global__ void kern_symbd_ighost_ord3(double * __restrict__ fh, double SoA0) { - const int ny = d_gp.ex[1], nz = d_gp.ex[2]; - const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; - int total = ny * nz; - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < total * 3; - tid += blockDim.x * gridDim.x) - { - int ii = tid / total; - int rem = tid % total; - int j0 = rem % ny; - int k0 = rem / ny; - int jF = j0 + 1, kF = k0 + 1; - int iF_dst = -ii; - int iF_src = ii + 1; - fh[(iF_dst+2) + (jF+2)*fnx + (kF+2)*fnx*fny] = - fh[(iF_src+2) + (jF+2)*fnx + (kF+2)*fnx*fny] * SoA0; - } -} - -__global__ void kern_symbd_jghost_ord3(double * __restrict__ fh, double SoA1) -{ - const int nx = d_gp.ex[0], nz = d_gp.ex[2]; - const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; - int irange = nx + 3; - int total = irange * nz; - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < total * 3; - tid += blockDim.x * gridDim.x) - { - int jj = tid / total; - int rem = tid % total; - int ii = rem % irange; - int k0 = rem / irange; - int iF = ii - 2; - int kF = k0 + 1; - int jF_dst = -jj; - int jF_src = jj + 1; - fh[(iF+2) + (jF_dst+2)*fnx + (kF+2)*fnx*fny] = - fh[(iF+2) + (jF_src+2)*fnx + (kF+2)*fnx*fny] * SoA1; - } -} - -__global__ void kern_symbd_kghost_ord3(double * __restrict__ fh, double SoA2) -{ - const int nx = d_gp.ex[0], ny = d_gp.ex[1]; - const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; - int irange = nx + 3; - int jrange = ny + 3; - int total = irange * jrange; - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < total * 3; - tid += blockDim.x * gridDim.x) - { - int kk = tid / total; - int rem = tid % total; - int ii = rem % irange; - int jj = rem / irange; - int iF = ii - 2; - int jF = jj - 2; - int kF_dst = -kk; - int kF_src = kk + 1; - fh[(iF+2) + (jF+2)*fnx + (kF_dst+2)*fnx*fny] = - fh[(iF+2) + (jF+2)*fnx + (kF_src+2)*fnx*fny] * SoA2; - } -} - -/* ================================================================== */ -/* B. Stencil kernels */ -/* ================================================================== */ - -/* ---- First derivatives (ord=2, 4th/2nd order) ---- */ -__global__ __launch_bounds__(128, 4) -void kern_fderivs(const double * __restrict__ fh, - double * __restrict__ fx, - double * __restrict__ fy, - double * __restrict__ fz) -{ - const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; - const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; - const int iminF = d_gp.iminF, jminF = d_gp.jminF, kminF = d_gp.kminF; - - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < d_gp.all; - tid += blockDim.x * gridDim.x) - { - int i0 = tid % nx; - int j0 = (tid / nx) % ny; - int k0 = tid / (nx * ny); - - /* boundary points: leave as zero */ - if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) { - fx[tid] = 0.0; fy[tid] = 0.0; fz[tid] = 0.0; - continue; - } - - int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; - - if ((iF+2) <= imaxF && (iF-2) >= iminF && - (jF+2) <= jmaxF && (jF-2) >= jminF && - (kF+2) <= kmaxF && (kF-2) >= kminF) - { - fx[tid] = d_gp.d12dx * ( - fh[idx_fh2(iF-2,jF,kF)] - 8.0*fh[idx_fh2(iF-1,jF,kF)] - + 8.0*fh[idx_fh2(iF+1,jF,kF)] - fh[idx_fh2(iF+2,jF,kF)]); - fy[tid] = d_gp.d12dy * ( - fh[idx_fh2(iF,jF-2,kF)] - 8.0*fh[idx_fh2(iF,jF-1,kF)] - + 8.0*fh[idx_fh2(iF,jF+1,kF)] - fh[idx_fh2(iF,jF+2,kF)]); - fz[tid] = d_gp.d12dz * ( - fh[idx_fh2(iF,jF,kF-2)] - 8.0*fh[idx_fh2(iF,jF,kF-1)] - + 8.0*fh[idx_fh2(iF,jF,kF+1)] - fh[idx_fh2(iF,jF,kF+2)]); - } - else if ((iF+1) <= imaxF && (iF-1) >= iminF && - (jF+1) <= jmaxF && (jF-1) >= jminF && - (kF+1) <= kmaxF && (kF-1) >= kminF) - { - fx[tid] = d_gp.d2dx * ( - -fh[idx_fh2(iF-1,jF,kF)] + fh[idx_fh2(iF+1,jF,kF)]); - fy[tid] = d_gp.d2dy * ( - -fh[idx_fh2(iF,jF-1,kF)] + fh[idx_fh2(iF,jF+1,kF)]); - fz[tid] = d_gp.d2dz * ( - -fh[idx_fh2(iF,jF,kF-1)] + fh[idx_fh2(iF,jF,kF+1)]); - } - else { - fx[tid] = 0.0; fy[tid] = 0.0; fz[tid] = 0.0; - } - } -} - -/* ---- Second derivatives (ord=2, 4th/2nd order) ---- */ -__global__ __launch_bounds__(128, 4) -void kern_fdderivs(const double * __restrict__ fh, - double * __restrict__ fxx, double * __restrict__ fxy, - double * __restrict__ fxz, double * __restrict__ fyy, - double * __restrict__ fyz, double * __restrict__ fzz) -{ - const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; - const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; - const int iminF = d_gp.iminF, jminF = d_gp.jminF, kminF = d_gp.kminF; - - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < d_gp.all; - tid += blockDim.x * gridDim.x) - { - int i0 = tid % nx; - int j0 = (tid / nx) % ny; - int k0 = tid / (nx * ny); - - if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) { - fxx[tid]=0; fxy[tid]=0; fxz[tid]=0; - fyy[tid]=0; fyz[tid]=0; fzz[tid]=0; - continue; - } - - int iF = i0+1, jF = j0+1, kF = k0+1; - - if ((iF+2)<=imaxF && (iF-2)>=iminF && - (jF+2)<=jmaxF && (jF-2)>=jminF && - (kF+2)<=kmaxF && (kF-2)>=kminF) - { - /* 4th-order diagonal */ - double c = fh[idx_fh2(iF,jF,kF)]; - fxx[tid] = d_gp.Fdxdx*( - -fh[idx_fh2(iF-2,jF,kF)] + 16.0*fh[idx_fh2(iF-1,jF,kF)] - -30.0*c + 16.0*fh[idx_fh2(iF+1,jF,kF)] - fh[idx_fh2(iF+2,jF,kF)]); - fyy[tid] = d_gp.Fdydy*( - -fh[idx_fh2(iF,jF-2,kF)] + 16.0*fh[idx_fh2(iF,jF-1,kF)] - -30.0*c + 16.0*fh[idx_fh2(iF,jF+1,kF)] - fh[idx_fh2(iF,jF+2,kF)]); - fzz[tid] = d_gp.Fdzdz*( - -fh[idx_fh2(iF,jF,kF-2)] + 16.0*fh[idx_fh2(iF,jF,kF-1)] - -30.0*c + 16.0*fh[idx_fh2(iF,jF,kF+1)] - fh[idx_fh2(iF,jF,kF+2)]); - - /* 4th-order cross: fxy */ - { - double t_jm2 = fh[idx_fh2(iF-2,jF-2,kF)] - 8.0*fh[idx_fh2(iF-1,jF-2,kF)] - + 8.0*fh[idx_fh2(iF+1,jF-2,kF)] - fh[idx_fh2(iF+2,jF-2,kF)]; - double t_jm1 = fh[idx_fh2(iF-2,jF-1,kF)] - 8.0*fh[idx_fh2(iF-1,jF-1,kF)] - + 8.0*fh[idx_fh2(iF+1,jF-1,kF)] - fh[idx_fh2(iF+2,jF-1,kF)]; - double t_jp1 = fh[idx_fh2(iF-2,jF+1,kF)] - 8.0*fh[idx_fh2(iF-1,jF+1,kF)] - + 8.0*fh[idx_fh2(iF+1,jF+1,kF)] - fh[idx_fh2(iF+2,jF+1,kF)]; - double t_jp2 = fh[idx_fh2(iF-2,jF+2,kF)] - 8.0*fh[idx_fh2(iF-1,jF+2,kF)] - + 8.0*fh[idx_fh2(iF+1,jF+2,kF)] - fh[idx_fh2(iF+2,jF+2,kF)]; - fxy[tid] = d_gp.Fdxdy*(t_jm2 - 8.0*t_jm1 + 8.0*t_jp1 - t_jp2); - } - /* 4th-order cross: fxz */ - { - double t_km2 = fh[idx_fh2(iF-2,jF,kF-2)] - 8.0*fh[idx_fh2(iF-1,jF,kF-2)] - + 8.0*fh[idx_fh2(iF+1,jF,kF-2)] - fh[idx_fh2(iF+2,jF,kF-2)]; - double t_km1 = fh[idx_fh2(iF-2,jF,kF-1)] - 8.0*fh[idx_fh2(iF-1,jF,kF-1)] - + 8.0*fh[idx_fh2(iF+1,jF,kF-1)] - fh[idx_fh2(iF+2,jF,kF-1)]; - double t_kp1 = fh[idx_fh2(iF-2,jF,kF+1)] - 8.0*fh[idx_fh2(iF-1,jF,kF+1)] - + 8.0*fh[idx_fh2(iF+1,jF,kF+1)] - fh[idx_fh2(iF+2,jF,kF+1)]; - double t_kp2 = fh[idx_fh2(iF-2,jF,kF+2)] - 8.0*fh[idx_fh2(iF-1,jF,kF+2)] - + 8.0*fh[idx_fh2(iF+1,jF,kF+2)] - fh[idx_fh2(iF+2,jF,kF+2)]; - fxz[tid] = d_gp.Fdxdz*(t_km2 - 8.0*t_km1 + 8.0*t_kp1 - t_kp2); - } - /* 4th-order cross: fyz */ - { - double t_km2 = fh[idx_fh2(iF,jF-2,kF-2)] - 8.0*fh[idx_fh2(iF,jF-1,kF-2)] - + 8.0*fh[idx_fh2(iF,jF+1,kF-2)] - fh[idx_fh2(iF,jF+2,kF-2)]; - double t_km1 = fh[idx_fh2(iF,jF-2,kF-1)] - 8.0*fh[idx_fh2(iF,jF-1,kF-1)] - + 8.0*fh[idx_fh2(iF,jF+1,kF-1)] - fh[idx_fh2(iF,jF+2,kF-1)]; - double t_kp1 = fh[idx_fh2(iF,jF-2,kF+1)] - 8.0*fh[idx_fh2(iF,jF-1,kF+1)] - + 8.0*fh[idx_fh2(iF,jF+1,kF+1)] - fh[idx_fh2(iF,jF+2,kF+1)]; - double t_kp2 = fh[idx_fh2(iF,jF-2,kF+2)] - 8.0*fh[idx_fh2(iF,jF-1,kF+2)] - + 8.0*fh[idx_fh2(iF,jF+1,kF+2)] - fh[idx_fh2(iF,jF+2,kF+2)]; - fyz[tid] = d_gp.Fdydz*(t_km2 - 8.0*t_km1 + 8.0*t_kp1 - t_kp2); - } - } - else if ((iF+1)<=imaxF && (iF-1)>=iminF && - (jF+1)<=jmaxF && (jF-1)>=jminF && - (kF+1)<=kmaxF && (kF-1)>=kminF) - { - double c = fh[idx_fh2(iF,jF,kF)]; - fxx[tid] = d_gp.Sdxdx*(fh[idx_fh2(iF-1,jF,kF)] - 2.0*c + fh[idx_fh2(iF+1,jF,kF)]); - fyy[tid] = d_gp.Sdydy*(fh[idx_fh2(iF,jF-1,kF)] - 2.0*c + fh[idx_fh2(iF,jF+1,kF)]); - fzz[tid] = d_gp.Sdzdz*(fh[idx_fh2(iF,jF,kF-1)] - 2.0*c + fh[idx_fh2(iF,jF,kF+1)]); - fxy[tid] = d_gp.Sdxdy*(fh[idx_fh2(iF-1,jF-1,kF)] - fh[idx_fh2(iF+1,jF-1,kF)] - -fh[idx_fh2(iF-1,jF+1,kF)] + fh[idx_fh2(iF+1,jF+1,kF)]); - fxz[tid] = d_gp.Sdxdz*(fh[idx_fh2(iF-1,jF,kF-1)] - fh[idx_fh2(iF+1,jF,kF-1)] - -fh[idx_fh2(iF-1,jF,kF+1)] + fh[idx_fh2(iF+1,jF,kF+1)]); - fyz[tid] = d_gp.Sdydz*(fh[idx_fh2(iF,jF-1,kF-1)] - fh[idx_fh2(iF,jF+1,kF-1)] - -fh[idx_fh2(iF,jF-1,kF+1)] + fh[idx_fh2(iF,jF+1,kF+1)]); - } - else { - fxx[tid]=0; fxy[tid]=0; fxz[tid]=0; - fyy[tid]=0; fyz[tid]=0; fzz[tid]=0; - } - } -} - -/* ---- Lopsided (upwind advection) kernel ---- */ -__global__ __launch_bounds__(128, 4) -void kern_lopsided(const double * __restrict__ fh, - double * __restrict__ f_rhs, - const double * __restrict__ Sfx, - const double * __restrict__ Sfy, - const double * __restrict__ Sfz) -{ - const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; - const int iminF = d_gp.iminF3, jminF = d_gp.jminF3, kminF = d_gp.kminF3; - - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < d_gp.all; - tid += blockDim.x * gridDim.x) - { - int i0 = tid % nx; - int j0 = (tid / nx) % ny; - int k0 = tid / (nx * ny); - - if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) continue; - - int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; - double val = 0.0; - - /* --- x direction --- */ - double sfx = Sfx[tid]; - if (sfx > 0.0) { - if (i0 <= nx - 4) { - val += sfx * d_gp.d12dx * ( - -3.0*fh[idx_fh3(iF-1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF+1,jF,kF)] - 6.0*fh[idx_fh3(iF+2,jF,kF)] - + fh[idx_fh3(iF+3,jF,kF)]); - } else if (i0 <= nx - 3) { - val += sfx * d_gp.d12dx * ( - fh[idx_fh3(iF-2,jF,kF)] - 8.0*fh[idx_fh3(iF-1,jF,kF)] - +8.0*fh[idx_fh3(iF+1,jF,kF)] - fh[idx_fh3(iF+2,jF,kF)]); - } else if (i0 <= nx - 2) { - val -= sfx * d_gp.d12dx * ( - -3.0*fh[idx_fh3(iF+1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF-1,jF,kF)] - 6.0*fh[idx_fh3(iF-2,jF,kF)] - + fh[idx_fh3(iF-3,jF,kF)]); - } - } else if (sfx < 0.0) { - if ((i0 - 2) >= iminF) { - val -= sfx * d_gp.d12dx * ( - -3.0*fh[idx_fh3(iF+1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF-1,jF,kF)] - 6.0*fh[idx_fh3(iF-2,jF,kF)] - + fh[idx_fh3(iF-3,jF,kF)]); - } else if ((i0 - 1) >= iminF) { - val += sfx * d_gp.d12dx * ( - fh[idx_fh3(iF-2,jF,kF)] - 8.0*fh[idx_fh3(iF-1,jF,kF)] - +8.0*fh[idx_fh3(iF+1,jF,kF)] - fh[idx_fh3(iF+2,jF,kF)]); - } else if (i0 >= iminF) { - val += sfx * d_gp.d12dx * ( - -3.0*fh[idx_fh3(iF-1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF+1,jF,kF)] - 6.0*fh[idx_fh3(iF+2,jF,kF)] - + fh[idx_fh3(iF+3,jF,kF)]); - } - } - - /* --- y direction --- */ - double sfy = Sfy[tid]; - if (sfy > 0.0) { - if (j0 <= ny - 4) { - val += sfy * d_gp.d12dy * ( - -3.0*fh[idx_fh3(iF,jF-1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF,jF+1,kF)] - 6.0*fh[idx_fh3(iF,jF+2,kF)] - + fh[idx_fh3(iF,jF+3,kF)]); - } else if (j0 <= ny - 3) { - val += sfy * d_gp.d12dy * ( - fh[idx_fh3(iF,jF-2,kF)] - 8.0*fh[idx_fh3(iF,jF-1,kF)] - +8.0*fh[idx_fh3(iF,jF+1,kF)] - fh[idx_fh3(iF,jF+2,kF)]); - } else if (j0 <= ny - 2) { - val -= sfy * d_gp.d12dy * ( - -3.0*fh[idx_fh3(iF,jF+1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF,jF-1,kF)] - 6.0*fh[idx_fh3(iF,jF-2,kF)] - + fh[idx_fh3(iF,jF-3,kF)]); - } - } else if (sfy < 0.0) { - if ((j0 - 2) >= jminF) { - val -= sfy * d_gp.d12dy * ( - -3.0*fh[idx_fh3(iF,jF+1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF,jF-1,kF)] - 6.0*fh[idx_fh3(iF,jF-2,kF)] - + fh[idx_fh3(iF,jF-3,kF)]); - } else if ((j0 - 1) >= jminF) { - val += sfy * d_gp.d12dy * ( - fh[idx_fh3(iF,jF-2,kF)] - 8.0*fh[idx_fh3(iF,jF-1,kF)] - +8.0*fh[idx_fh3(iF,jF+1,kF)] - fh[idx_fh3(iF,jF+2,kF)]); - } else if (j0 >= jminF) { - val += sfy * d_gp.d12dy * ( - -3.0*fh[idx_fh3(iF,jF-1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF,jF+1,kF)] - 6.0*fh[idx_fh3(iF,jF+2,kF)] - + fh[idx_fh3(iF,jF+3,kF)]); - } - } - - /* --- z direction --- */ - double sfz = Sfz[tid]; - if (sfz > 0.0) { - if (k0 <= nz - 4) { - val += sfz * d_gp.d12dz * ( - -3.0*fh[idx_fh3(iF,jF,kF-1)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF,jF,kF+1)] - 6.0*fh[idx_fh3(iF,jF,kF+2)] - + fh[idx_fh3(iF,jF,kF+3)]); - } else if (k0 <= nz - 3) { - val += sfz * d_gp.d12dz * ( - fh[idx_fh3(iF,jF,kF-2)] - 8.0*fh[idx_fh3(iF,jF,kF-1)] - +8.0*fh[idx_fh3(iF,jF,kF+1)] - fh[idx_fh3(iF,jF,kF+2)]); - } else if (k0 <= nz - 2) { - val -= sfz * d_gp.d12dz * ( - -3.0*fh[idx_fh3(iF,jF,kF+1)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF,jF,kF-1)] - 6.0*fh[idx_fh3(iF,jF,kF-2)] - + fh[idx_fh3(iF,jF,kF-3)]); - } - } else if (sfz < 0.0) { - if ((k0 - 2) >= kminF) { - val -= sfz * d_gp.d12dz * ( - -3.0*fh[idx_fh3(iF,jF,kF+1)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF,jF,kF-1)] - 6.0*fh[idx_fh3(iF,jF,kF-2)] - + fh[idx_fh3(iF,jF,kF-3)]); - } else if ((k0 - 1) >= kminF) { - val += sfz * d_gp.d12dz * ( - fh[idx_fh3(iF,jF,kF-2)] - 8.0*fh[idx_fh3(iF,jF,kF-1)] - +8.0*fh[idx_fh3(iF,jF,kF+1)] - fh[idx_fh3(iF,jF,kF+2)]); - } else if (k0 >= kminF) { - val += sfz * d_gp.d12dz * ( - -3.0*fh[idx_fh3(iF,jF,kF-1)] - 10.0*fh[idx_fh3(iF,jF,kF)] - +18.0*fh[idx_fh3(iF,jF,kF+1)] - 6.0*fh[idx_fh3(iF,jF,kF+2)] - + fh[idx_fh3(iF,jF,kF+3)]); - } - } - - f_rhs[tid] += val; - } -} - -/* ---- KO dissipation kernel (ord=3, 6th-order) ---- */ -__global__ __launch_bounds__(128, 4) -void kern_kodis(const double * __restrict__ fh, - double * __restrict__ f_rhs, - double eps_val) -{ - const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; - const int iminF = d_gp.iminF3, jminF = d_gp.jminF3, kminF = d_gp.kminF3; - const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; - const double cof = 64.0; - - for (int tid = blockIdx.x * blockDim.x + threadIdx.x; - tid < d_gp.all; - tid += blockDim.x * gridDim.x) - { - int i0 = tid % nx; - int j0 = (tid / nx) % ny; - int k0 = tid / (nx * ny); - int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; - - if ((iF-3) >= iminF && (iF+3) <= imaxF && - (jF-3) >= jminF && (jF+3) <= jmaxF && - (kF-3) >= kminF && (kF+3) <= kmaxF) - { - double Dx = (fh[idx_fh3(iF-3,jF,kF)] + fh[idx_fh3(iF+3,jF,kF)]) - - 6.0*(fh[idx_fh3(iF-2,jF,kF)] + fh[idx_fh3(iF+2,jF,kF)]) - +15.0*(fh[idx_fh3(iF-1,jF,kF)] + fh[idx_fh3(iF+1,jF,kF)]) - -20.0* fh[idx_fh3(iF,jF,kF)]; - Dx /= d_gp.dX; - - double Dy = (fh[idx_fh3(iF,jF-3,kF)] + fh[idx_fh3(iF,jF+3,kF)]) - - 6.0*(fh[idx_fh3(iF,jF-2,kF)] + fh[idx_fh3(iF,jF+2,kF)]) - +15.0*(fh[idx_fh3(iF,jF-1,kF)] + fh[idx_fh3(iF,jF+1,kF)]) - -20.0* fh[idx_fh3(iF,jF,kF)]; - Dy /= d_gp.dY; - - double Dz = (fh[idx_fh3(iF,jF,kF-3)] + fh[idx_fh3(iF,jF,kF+3)]) - - 6.0*(fh[idx_fh3(iF,jF,kF-2)] + fh[idx_fh3(iF,jF,kF+2)]) - +15.0*(fh[idx_fh3(iF,jF,kF-1)] + fh[idx_fh3(iF,jF,kF+1)]) - -20.0* fh[idx_fh3(iF,jF,kF)]; - Dz /= d_gp.dZ; - - f_rhs[tid] += (eps_val / cof) * (Dx + Dy + Dz); - } - } -} - -/* ================================================================== */ -/* Host wrapper helpers */ -/* ================================================================== */ + const int ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; + int total = ny * nz; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 3; + tid += blockDim.x * gridDim.x) + { + int ii = tid / total; + int rem = tid % total; + int j0 = rem % ny; + int k0 = rem / ny; + int jF = j0 + 1, kF = k0 + 1; + int iF_dst = -ii; + int iF_src = ii + 1; + fh[(iF_dst+2) + (jF+2)*fnx + (kF+2)*fnx*fny] = + fh[(iF_src+2) + (jF+2)*fnx + (kF+2)*fnx*fny] * SoA0; + } +} + +__global__ void kern_symbd_jghost_ord3(double * __restrict__ fh, double SoA1) +{ + const int nx = d_gp.ex[0], nz = d_gp.ex[2]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; + int irange = nx + 3; + int total = irange * nz; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 3; + tid += blockDim.x * gridDim.x) + { + int jj = tid / total; + int rem = tid % total; + int ii = rem % irange; + int k0 = rem / irange; + int iF = ii - 2; + int kF = k0 + 1; + int jF_dst = -jj; + int jF_src = jj + 1; + fh[(iF+2) + (jF_dst+2)*fnx + (kF+2)*fnx*fny] = + fh[(iF+2) + (jF_src+2)*fnx + (kF+2)*fnx*fny] * SoA1; + } +} + +__global__ void kern_symbd_kghost_ord3(double * __restrict__ fh, double SoA2) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; + int irange = nx + 3; + int jrange = ny + 3; + int total = irange * jrange; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 3; + tid += blockDim.x * gridDim.x) + { + int kk = tid / total; + int rem = tid % total; + int ii = rem % irange; + int jj = rem / irange; + int iF = ii - 2; + int jF = jj - 2; + int kF_dst = -kk; + int kF_src = kk + 1; + fh[(iF+2) + (jF+2)*fnx + (kF_dst+2)*fnx*fny] = + fh[(iF+2) + (jF+2)*fnx + (kF_src+2)*fnx*fny] * SoA2; + } +} + +/* ================================================================== */ +/* B. Stencil kernels */ +/* ================================================================== */ + +/* ---- First derivatives (ord=2, 4th/2nd order) ---- */ +__global__ __launch_bounds__(128, 4) +void kern_fderivs(const double * __restrict__ fh, + double * __restrict__ fx, + double * __restrict__ fy, + double * __restrict__ fz) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; + const int iminF = d_gp.iminF, jminF = d_gp.jminF, kminF = d_gp.kminF; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + + /* boundary points: leave as zero */ + if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) { + fx[tid] = 0.0; fy[tid] = 0.0; fz[tid] = 0.0; + continue; + } + + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + + if ((iF+2) <= imaxF && (iF-2) >= iminF && + (jF+2) <= jmaxF && (jF-2) >= jminF && + (kF+2) <= kmaxF && (kF-2) >= kminF) + { + fx[tid] = d_gp.d12dx * ( + fh[idx_fh2(iF-2,jF,kF)] - 8.0*fh[idx_fh2(iF-1,jF,kF)] + + 8.0*fh[idx_fh2(iF+1,jF,kF)] - fh[idx_fh2(iF+2,jF,kF)]); + fy[tid] = d_gp.d12dy * ( + fh[idx_fh2(iF,jF-2,kF)] - 8.0*fh[idx_fh2(iF,jF-1,kF)] + + 8.0*fh[idx_fh2(iF,jF+1,kF)] - fh[idx_fh2(iF,jF+2,kF)]); + fz[tid] = d_gp.d12dz * ( + fh[idx_fh2(iF,jF,kF-2)] - 8.0*fh[idx_fh2(iF,jF,kF-1)] + + 8.0*fh[idx_fh2(iF,jF,kF+1)] - fh[idx_fh2(iF,jF,kF+2)]); + } + else if ((iF+1) <= imaxF && (iF-1) >= iminF && + (jF+1) <= jmaxF && (jF-1) >= jminF && + (kF+1) <= kmaxF && (kF-1) >= kminF) + { + fx[tid] = d_gp.d2dx * ( + -fh[idx_fh2(iF-1,jF,kF)] + fh[idx_fh2(iF+1,jF,kF)]); + fy[tid] = d_gp.d2dy * ( + -fh[idx_fh2(iF,jF-1,kF)] + fh[idx_fh2(iF,jF+1,kF)]); + fz[tid] = d_gp.d2dz * ( + -fh[idx_fh2(iF,jF,kF-1)] + fh[idx_fh2(iF,jF,kF+1)]); + } + else { + fx[tid] = 0.0; fy[tid] = 0.0; fz[tid] = 0.0; + } + } +} + +/* ---- Second derivatives (ord=2, 4th/2nd order) ---- */ +__global__ __launch_bounds__(128, 4) +void kern_fdderivs(const double * __restrict__ fh, + double * __restrict__ fxx, double * __restrict__ fxy, + double * __restrict__ fxz, double * __restrict__ fyy, + double * __restrict__ fyz, double * __restrict__ fzz) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; + const int iminF = d_gp.iminF, jminF = d_gp.jminF, kminF = d_gp.kminF; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + + if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) { + fxx[tid]=0; fxy[tid]=0; fxz[tid]=0; + fyy[tid]=0; fyz[tid]=0; fzz[tid]=0; + continue; + } + + int iF = i0+1, jF = j0+1, kF = k0+1; + + if ((iF+2)<=imaxF && (iF-2)>=iminF && + (jF+2)<=jmaxF && (jF-2)>=jminF && + (kF+2)<=kmaxF && (kF-2)>=kminF) + { + /* 4th-order diagonal */ + double c = fh[idx_fh2(iF,jF,kF)]; + fxx[tid] = d_gp.Fdxdx*( + -fh[idx_fh2(iF-2,jF,kF)] + 16.0*fh[idx_fh2(iF-1,jF,kF)] + -30.0*c + 16.0*fh[idx_fh2(iF+1,jF,kF)] - fh[idx_fh2(iF+2,jF,kF)]); + fyy[tid] = d_gp.Fdydy*( + -fh[idx_fh2(iF,jF-2,kF)] + 16.0*fh[idx_fh2(iF,jF-1,kF)] + -30.0*c + 16.0*fh[idx_fh2(iF,jF+1,kF)] - fh[idx_fh2(iF,jF+2,kF)]); + fzz[tid] = d_gp.Fdzdz*( + -fh[idx_fh2(iF,jF,kF-2)] + 16.0*fh[idx_fh2(iF,jF,kF-1)] + -30.0*c + 16.0*fh[idx_fh2(iF,jF,kF+1)] - fh[idx_fh2(iF,jF,kF+2)]); + + /* 4th-order cross: fxy */ + { + double t_jm2 = fh[idx_fh2(iF-2,jF-2,kF)] - 8.0*fh[idx_fh2(iF-1,jF-2,kF)] + + 8.0*fh[idx_fh2(iF+1,jF-2,kF)] - fh[idx_fh2(iF+2,jF-2,kF)]; + double t_jm1 = fh[idx_fh2(iF-2,jF-1,kF)] - 8.0*fh[idx_fh2(iF-1,jF-1,kF)] + + 8.0*fh[idx_fh2(iF+1,jF-1,kF)] - fh[idx_fh2(iF+2,jF-1,kF)]; + double t_jp1 = fh[idx_fh2(iF-2,jF+1,kF)] - 8.0*fh[idx_fh2(iF-1,jF+1,kF)] + + 8.0*fh[idx_fh2(iF+1,jF+1,kF)] - fh[idx_fh2(iF+2,jF+1,kF)]; + double t_jp2 = fh[idx_fh2(iF-2,jF+2,kF)] - 8.0*fh[idx_fh2(iF-1,jF+2,kF)] + + 8.0*fh[idx_fh2(iF+1,jF+2,kF)] - fh[idx_fh2(iF+2,jF+2,kF)]; + fxy[tid] = d_gp.Fdxdy*(t_jm2 - 8.0*t_jm1 + 8.0*t_jp1 - t_jp2); + } + /* 4th-order cross: fxz */ + { + double t_km2 = fh[idx_fh2(iF-2,jF,kF-2)] - 8.0*fh[idx_fh2(iF-1,jF,kF-2)] + + 8.0*fh[idx_fh2(iF+1,jF,kF-2)] - fh[idx_fh2(iF+2,jF,kF-2)]; + double t_km1 = fh[idx_fh2(iF-2,jF,kF-1)] - 8.0*fh[idx_fh2(iF-1,jF,kF-1)] + + 8.0*fh[idx_fh2(iF+1,jF,kF-1)] - fh[idx_fh2(iF+2,jF,kF-1)]; + double t_kp1 = fh[idx_fh2(iF-2,jF,kF+1)] - 8.0*fh[idx_fh2(iF-1,jF,kF+1)] + + 8.0*fh[idx_fh2(iF+1,jF,kF+1)] - fh[idx_fh2(iF+2,jF,kF+1)]; + double t_kp2 = fh[idx_fh2(iF-2,jF,kF+2)] - 8.0*fh[idx_fh2(iF-1,jF,kF+2)] + + 8.0*fh[idx_fh2(iF+1,jF,kF+2)] - fh[idx_fh2(iF+2,jF,kF+2)]; + fxz[tid] = d_gp.Fdxdz*(t_km2 - 8.0*t_km1 + 8.0*t_kp1 - t_kp2); + } + /* 4th-order cross: fyz */ + { + double t_km2 = fh[idx_fh2(iF,jF-2,kF-2)] - 8.0*fh[idx_fh2(iF,jF-1,kF-2)] + + 8.0*fh[idx_fh2(iF,jF+1,kF-2)] - fh[idx_fh2(iF,jF+2,kF-2)]; + double t_km1 = fh[idx_fh2(iF,jF-2,kF-1)] - 8.0*fh[idx_fh2(iF,jF-1,kF-1)] + + 8.0*fh[idx_fh2(iF,jF+1,kF-1)] - fh[idx_fh2(iF,jF+2,kF-1)]; + double t_kp1 = fh[idx_fh2(iF,jF-2,kF+1)] - 8.0*fh[idx_fh2(iF,jF-1,kF+1)] + + 8.0*fh[idx_fh2(iF,jF+1,kF+1)] - fh[idx_fh2(iF,jF+2,kF+1)]; + double t_kp2 = fh[idx_fh2(iF,jF-2,kF+2)] - 8.0*fh[idx_fh2(iF,jF-1,kF+2)] + + 8.0*fh[idx_fh2(iF,jF+1,kF+2)] - fh[idx_fh2(iF,jF+2,kF+2)]; + fyz[tid] = d_gp.Fdydz*(t_km2 - 8.0*t_km1 + 8.0*t_kp1 - t_kp2); + } + } + else if ((iF+1)<=imaxF && (iF-1)>=iminF && + (jF+1)<=jmaxF && (jF-1)>=jminF && + (kF+1)<=kmaxF && (kF-1)>=kminF) + { + double c = fh[idx_fh2(iF,jF,kF)]; + fxx[tid] = d_gp.Sdxdx*(fh[idx_fh2(iF-1,jF,kF)] - 2.0*c + fh[idx_fh2(iF+1,jF,kF)]); + fyy[tid] = d_gp.Sdydy*(fh[idx_fh2(iF,jF-1,kF)] - 2.0*c + fh[idx_fh2(iF,jF+1,kF)]); + fzz[tid] = d_gp.Sdzdz*(fh[idx_fh2(iF,jF,kF-1)] - 2.0*c + fh[idx_fh2(iF,jF,kF+1)]); + fxy[tid] = d_gp.Sdxdy*(fh[idx_fh2(iF-1,jF-1,kF)] - fh[idx_fh2(iF+1,jF-1,kF)] + -fh[idx_fh2(iF-1,jF+1,kF)] + fh[idx_fh2(iF+1,jF+1,kF)]); + fxz[tid] = d_gp.Sdxdz*(fh[idx_fh2(iF-1,jF,kF-1)] - fh[idx_fh2(iF+1,jF,kF-1)] + -fh[idx_fh2(iF-1,jF,kF+1)] + fh[idx_fh2(iF+1,jF,kF+1)]); + fyz[tid] = d_gp.Sdydz*(fh[idx_fh2(iF,jF-1,kF-1)] - fh[idx_fh2(iF,jF+1,kF-1)] + -fh[idx_fh2(iF,jF-1,kF+1)] + fh[idx_fh2(iF,jF+1,kF+1)]); + } + else { + fxx[tid]=0; fxy[tid]=0; fxz[tid]=0; + fyy[tid]=0; fyz[tid]=0; fzz[tid]=0; + } + } +} + +/* ---- Lopsided (upwind advection) kernel ---- */ +__global__ __launch_bounds__(128, 4) +void kern_lopsided(const double * __restrict__ fh, + double * __restrict__ f_rhs, + const double * __restrict__ Sfx, + const double * __restrict__ Sfy, + const double * __restrict__ Sfz) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int iminF = d_gp.iminF3, jminF = d_gp.jminF3, kminF = d_gp.kminF3; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + + if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) continue; + + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + double val = 0.0; + + /* --- x direction --- */ + double sfx = Sfx[tid]; + if (sfx > 0.0) { + if (i0 <= nx - 4) { + val += sfx * d_gp.d12dx * ( + -3.0*fh[idx_fh3(iF-1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF+1,jF,kF)] - 6.0*fh[idx_fh3(iF+2,jF,kF)] + + fh[idx_fh3(iF+3,jF,kF)]); + } else if (i0 <= nx - 3) { + val += sfx * d_gp.d12dx * ( + fh[idx_fh3(iF-2,jF,kF)] - 8.0*fh[idx_fh3(iF-1,jF,kF)] + +8.0*fh[idx_fh3(iF+1,jF,kF)] - fh[idx_fh3(iF+2,jF,kF)]); + } else if (i0 <= nx - 2) { + val -= sfx * d_gp.d12dx * ( + -3.0*fh[idx_fh3(iF+1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF-1,jF,kF)] - 6.0*fh[idx_fh3(iF-2,jF,kF)] + + fh[idx_fh3(iF-3,jF,kF)]); + } + } else if (sfx < 0.0) { + if ((i0 - 2) >= iminF) { + val -= sfx * d_gp.d12dx * ( + -3.0*fh[idx_fh3(iF+1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF-1,jF,kF)] - 6.0*fh[idx_fh3(iF-2,jF,kF)] + + fh[idx_fh3(iF-3,jF,kF)]); + } else if ((i0 - 1) >= iminF) { + val += sfx * d_gp.d12dx * ( + fh[idx_fh3(iF-2,jF,kF)] - 8.0*fh[idx_fh3(iF-1,jF,kF)] + +8.0*fh[idx_fh3(iF+1,jF,kF)] - fh[idx_fh3(iF+2,jF,kF)]); + } else if (i0 >= iminF) { + val += sfx * d_gp.d12dx * ( + -3.0*fh[idx_fh3(iF-1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF+1,jF,kF)] - 6.0*fh[idx_fh3(iF+2,jF,kF)] + + fh[idx_fh3(iF+3,jF,kF)]); + } + } + + /* --- y direction --- */ + double sfy = Sfy[tid]; + if (sfy > 0.0) { + if (j0 <= ny - 4) { + val += sfy * d_gp.d12dy * ( + -3.0*fh[idx_fh3(iF,jF-1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF+1,kF)] - 6.0*fh[idx_fh3(iF,jF+2,kF)] + + fh[idx_fh3(iF,jF+3,kF)]); + } else if (j0 <= ny - 3) { + val += sfy * d_gp.d12dy * ( + fh[idx_fh3(iF,jF-2,kF)] - 8.0*fh[idx_fh3(iF,jF-1,kF)] + +8.0*fh[idx_fh3(iF,jF+1,kF)] - fh[idx_fh3(iF,jF+2,kF)]); + } else if (j0 <= ny - 2) { + val -= sfy * d_gp.d12dy * ( + -3.0*fh[idx_fh3(iF,jF+1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF-1,kF)] - 6.0*fh[idx_fh3(iF,jF-2,kF)] + + fh[idx_fh3(iF,jF-3,kF)]); + } + } else if (sfy < 0.0) { + if ((j0 - 2) >= jminF) { + val -= sfy * d_gp.d12dy * ( + -3.0*fh[idx_fh3(iF,jF+1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF-1,kF)] - 6.0*fh[idx_fh3(iF,jF-2,kF)] + + fh[idx_fh3(iF,jF-3,kF)]); + } else if ((j0 - 1) >= jminF) { + val += sfy * d_gp.d12dy * ( + fh[idx_fh3(iF,jF-2,kF)] - 8.0*fh[idx_fh3(iF,jF-1,kF)] + +8.0*fh[idx_fh3(iF,jF+1,kF)] - fh[idx_fh3(iF,jF+2,kF)]); + } else if (j0 >= jminF) { + val += sfy * d_gp.d12dy * ( + -3.0*fh[idx_fh3(iF,jF-1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF+1,kF)] - 6.0*fh[idx_fh3(iF,jF+2,kF)] + + fh[idx_fh3(iF,jF+3,kF)]); + } + } + + /* --- z direction --- */ + double sfz = Sfz[tid]; + if (sfz > 0.0) { + if (k0 <= nz - 4) { + val += sfz * d_gp.d12dz * ( + -3.0*fh[idx_fh3(iF,jF,kF-1)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF,kF+1)] - 6.0*fh[idx_fh3(iF,jF,kF+2)] + + fh[idx_fh3(iF,jF,kF+3)]); + } else if (k0 <= nz - 3) { + val += sfz * d_gp.d12dz * ( + fh[idx_fh3(iF,jF,kF-2)] - 8.0*fh[idx_fh3(iF,jF,kF-1)] + +8.0*fh[idx_fh3(iF,jF,kF+1)] - fh[idx_fh3(iF,jF,kF+2)]); + } else if (k0 <= nz - 2) { + val -= sfz * d_gp.d12dz * ( + -3.0*fh[idx_fh3(iF,jF,kF+1)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF,kF-1)] - 6.0*fh[idx_fh3(iF,jF,kF-2)] + + fh[idx_fh3(iF,jF,kF-3)]); + } + } else if (sfz < 0.0) { + if ((k0 - 2) >= kminF) { + val -= sfz * d_gp.d12dz * ( + -3.0*fh[idx_fh3(iF,jF,kF+1)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF,kF-1)] - 6.0*fh[idx_fh3(iF,jF,kF-2)] + + fh[idx_fh3(iF,jF,kF-3)]); + } else if ((k0 - 1) >= kminF) { + val += sfz * d_gp.d12dz * ( + fh[idx_fh3(iF,jF,kF-2)] - 8.0*fh[idx_fh3(iF,jF,kF-1)] + +8.0*fh[idx_fh3(iF,jF,kF+1)] - fh[idx_fh3(iF,jF,kF+2)]); + } else if (k0 >= kminF) { + val += sfz * d_gp.d12dz * ( + -3.0*fh[idx_fh3(iF,jF,kF-1)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF,kF+1)] - 6.0*fh[idx_fh3(iF,jF,kF+2)] + + fh[idx_fh3(iF,jF,kF+3)]); + } + } + + f_rhs[tid] += val; + } +} + +/* ---- KO dissipation kernel (ord=3, 6th-order) ---- */ +__global__ __launch_bounds__(128, 4) +void kern_kodis(const double * __restrict__ fh, + double * __restrict__ f_rhs, + double eps_val) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int iminF = d_gp.iminF3, jminF = d_gp.jminF3, kminF = d_gp.kminF3; + const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; + const double cof = 64.0; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + + if ((iF-3) >= iminF && (iF+3) <= imaxF && + (jF-3) >= jminF && (jF+3) <= jmaxF && + (kF-3) >= kminF && (kF+3) <= kmaxF) + { + double Dx = (fh[idx_fh3(iF-3,jF,kF)] + fh[idx_fh3(iF+3,jF,kF)]) + - 6.0*(fh[idx_fh3(iF-2,jF,kF)] + fh[idx_fh3(iF+2,jF,kF)]) + +15.0*(fh[idx_fh3(iF-1,jF,kF)] + fh[idx_fh3(iF+1,jF,kF)]) + -20.0* fh[idx_fh3(iF,jF,kF)]; + Dx /= d_gp.dX; + + double Dy = (fh[idx_fh3(iF,jF-3,kF)] + fh[idx_fh3(iF,jF+3,kF)]) + - 6.0*(fh[idx_fh3(iF,jF-2,kF)] + fh[idx_fh3(iF,jF+2,kF)]) + +15.0*(fh[idx_fh3(iF,jF-1,kF)] + fh[idx_fh3(iF,jF+1,kF)]) + -20.0* fh[idx_fh3(iF,jF,kF)]; + Dy /= d_gp.dY; + + double Dz = (fh[idx_fh3(iF,jF,kF-3)] + fh[idx_fh3(iF,jF,kF+3)]) + - 6.0*(fh[idx_fh3(iF,jF,kF-2)] + fh[idx_fh3(iF,jF,kF+2)]) + +15.0*(fh[idx_fh3(iF,jF,kF-1)] + fh[idx_fh3(iF,jF,kF+1)]) + -20.0* fh[idx_fh3(iF,jF,kF)]; + Dz /= d_gp.dZ; + + f_rhs[tid] += (eps_val / cof) * (Dx + Dy + Dz); + } + } +} + +/* ================================================================== */ +/* Host wrapper helpers */ +/* ================================================================== */ static const int BLK = 128; static inline int grid(size_t n) { if (n == 0) return 1; @@ -1049,8 +1049,8 @@ static inline int grid(size_t n) { if (g > 2147483647u) g = 2147483647u; return (int)g; } - -/* symmetry_bd on GPU for ord=2, then launch fderivs kernel */ + +/* symmetry_bd on GPU for ord=2, then launch fderivs kernel */ static void gpu_fderivs(double *d_f, double *d_fx, double *d_fy, double *d_fz, double SoA0, double SoA1, double SoA2, int all) { @@ -1063,12 +1063,12 @@ static void gpu_fderivs(double *d_f, double *d_fx, double *d_fy, double *d_fz, kern_symbd_pack_ord2<<>>(d_f, fh, SoA0, SoA1, SoA2); kern_fderivs<<>>(fh, d_fx, d_fy, d_fz); } - -/* symmetry_bd on GPU for ord=2, then launch fdderivs kernel */ -static void gpu_fdderivs(double *d_f, - double *d_fxx, double *d_fxy, double *d_fxz, - double *d_fyy, double *d_fyz, double *d_fzz, - double SoA0, double SoA1, double SoA2, int all) + +/* symmetry_bd on GPU for ord=2, then launch fdderivs kernel */ +static void gpu_fdderivs(double *d_f, + double *d_fxx, double *d_fxy, double *d_fxz, + double *d_fyy, double *d_fyz, double *d_fzz, + double SoA0, double SoA1, double SoA2, int all) { double *fh = g_buf.d_fh2; const size_t nx = (size_t)g_buf.prev_nx; @@ -1079,7 +1079,7 @@ static void gpu_fdderivs(double *d_f, kern_symbd_pack_ord2<<>>(d_f, fh, SoA0, SoA1, SoA2); kern_fdderivs<<>>(fh, d_fxx, d_fxy, d_fxz, d_fyy, d_fyz, d_fzz); } - + /* Combined ord=3 advection + KO dissipation. * When advection and KO use the same source field, symmetry packing is shared. * If they differ (e.g. gxx advection + dxx KO), only KO repacks. @@ -1346,364 +1346,364 @@ static void gpu_sommerfeld_routbam(double *d_f0, double *d_f_rhs, if (touch_ymin) kern_sommerfeld_face_bam<<>>(g_buf.d_fh2, d_f_rhs, FACE_YMIN, velocity, X[0], Y[0], Z[0]); if (touch_zmin) kern_sommerfeld_face_bam<<>>(g_buf.d_fh2, d_f_rhs, FACE_ZMIN, velocity, X[0], Y[0], Z[0]); } - -/* ================================================================== */ -/* C. Point-wise computation kernels */ -/* ================================================================== */ - -/* Phase 1: alpn1, chin1, gxx=dxx+1, gyy=dyy+1, gzz=dzz+1 */ -__global__ void kern_phase1_prep( - const double* __restrict__ Lap, const double* __restrict__ chi, - const double* __restrict__ dxx, const double* __restrict__ dyy, - const double* __restrict__ dzz, - double* __restrict__ alpn1, double* __restrict__ chin1, - double* __restrict__ gxx, double* __restrict__ gyy, double* __restrict__ gzz) -{ - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - alpn1[i] = Lap[i] + 1.0; - chin1[i] = chi[i] + 1.0; - gxx[i] = dxx[i] + 1.0; - gyy[i] = dyy[i] + 1.0; - gzz[i] = dzz[i] + 1.0; - } -} - + +/* ================================================================== */ +/* C. Point-wise computation kernels */ +/* ================================================================== */ + +/* Phase 1: alpn1, chin1, gxx=dxx+1, gyy=dyy+1, gzz=dzz+1 */ +__global__ void kern_phase1_prep( + const double* __restrict__ Lap, const double* __restrict__ chi, + const double* __restrict__ dxx, const double* __restrict__ dyy, + const double* __restrict__ dzz, + double* __restrict__ alpn1, double* __restrict__ chin1, + double* __restrict__ gxx, double* __restrict__ gyy, double* __restrict__ gzz) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + alpn1[i] = Lap[i] + 1.0; + chin1[i] = chi[i] + 1.0; + gxx[i] = dxx[i] + 1.0; + gyy[i] = dyy[i] + 1.0; + gzz[i] = dzz[i] + 1.0; + } +} + /* Phase 2a: chi_rhs, gij_rhs */ __global__ void kern_phase2_metric_rhs( - const double* __restrict__ alpn1, const double* __restrict__ chin1, - const double* __restrict__ gxx, const double* __restrict__ gxy, - const double* __restrict__ gxz, const double* __restrict__ gyy, - const double* __restrict__ gyz, const double* __restrict__ gzz, - const double* __restrict__ trK, - const double* __restrict__ Axx, const double* __restrict__ Axy, - const double* __restrict__ Axz, const double* __restrict__ Ayy, - const double* __restrict__ Ayz, const double* __restrict__ Azz, - const double* __restrict__ betaxx, const double* __restrict__ betaxy, - const double* __restrict__ betaxz, const double* __restrict__ betayx, - const double* __restrict__ betayy, const double* __restrict__ betayz, + const double* __restrict__ alpn1, const double* __restrict__ chin1, + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ trK, + const double* __restrict__ Axx, const double* __restrict__ Axy, + const double* __restrict__ Axz, const double* __restrict__ Ayy, + const double* __restrict__ Ayz, const double* __restrict__ Azz, + const double* __restrict__ betaxx, const double* __restrict__ betaxy, + const double* __restrict__ betaxz, const double* __restrict__ betayx, + const double* __restrict__ betayy, const double* __restrict__ betayz, const double* __restrict__ betazx, const double* __restrict__ betazy, const double* __restrict__ betazz, double* __restrict__ chi_rhs, double* __restrict__ gxx_rhs, - double* __restrict__ gyy_rhs, double* __restrict__ gzz_rhs, - double* __restrict__ gxy_rhs, double* __restrict__ gyz_rhs, - double* __restrict__ gxz_rhs) -{ + double* __restrict__ gyy_rhs, double* __restrict__ gzz_rhs, + double* __restrict__ gxy_rhs, double* __restrict__ gyz_rhs, + double* __restrict__ gxz_rhs) +{ const double F2o3 = 2.0/3.0, F1o3 = 1.0/3.0, TWO = 2.0; for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { double db = betaxx[i] + betayy[i] + betazz[i]; chi_rhs[i] = F2o3 * chin1[i] * (alpn1[i] * trK[i] - db); - gxx_rhs[i] = -TWO*alpn1[i]*Axx[i] - F2o3*gxx[i]*db - + TWO*(gxx[i]*betaxx[i] + gxy[i]*betayx[i] + gxz[i]*betazx[i]); - gyy_rhs[i] = -TWO*alpn1[i]*Ayy[i] - F2o3*gyy[i]*db - + TWO*(gxy[i]*betaxy[i] + gyy[i]*betayy[i] + gyz[i]*betazy[i]); - gzz_rhs[i] = -TWO*alpn1[i]*Azz[i] - F2o3*gzz[i]*db - + TWO*(gxz[i]*betaxz[i] + gyz[i]*betayz[i] + gzz[i]*betazz[i]); - gxy_rhs[i] = -TWO*alpn1[i]*Axy[i] + F1o3*gxy[i]*db - + gxx[i]*betaxy[i] + gxz[i]*betazy[i] + gyy[i]*betayx[i] - + gyz[i]*betazx[i] - gxy[i]*betazz[i]; - gyz_rhs[i] = -TWO*alpn1[i]*Ayz[i] + F1o3*gyz[i]*db - + gxy[i]*betaxz[i] + gyy[i]*betayz[i] + gxz[i]*betaxy[i] - + gzz[i]*betazy[i] - gyz[i]*betaxx[i]; - gxz_rhs[i] = -TWO*alpn1[i]*Axz[i] + F1o3*gxz[i]*db - + gxx[i]*betaxz[i] + gxy[i]*betayz[i] + gyz[i]*betayx[i] - + gzz[i]*betazx[i] - gxz[i]*betayy[i]; - } -} - -/* Phase 2b: metric inverse */ -__global__ void kern_phase2_inverse( - const double* __restrict__ gxx, const double* __restrict__ gxy, - const double* __restrict__ gxz, const double* __restrict__ gyy, - const double* __restrict__ gyz, const double* __restrict__ gzz, - double* __restrict__ gupxx, double* __restrict__ gupxy, - double* __restrict__ gupxz, double* __restrict__ gupyy, - double* __restrict__ gupyz, double* __restrict__ gupzz) -{ - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double det = gxx[i]*gyy[i]*gzz[i] + gxy[i]*gyz[i]*gxz[i] + gxz[i]*gxy[i]*gyz[i] - - gxz[i]*gyy[i]*gxz[i] - gxy[i]*gxy[i]*gzz[i] - gxx[i]*gyz[i]*gyz[i]; - double inv = 1.0 / det; - gupxx[i] = (gyy[i]*gzz[i] - gyz[i]*gyz[i]) * inv; - gupxy[i] = -(gxy[i]*gzz[i] - gyz[i]*gxz[i]) * inv; - gupxz[i] = (gxy[i]*gyz[i] - gyy[i]*gxz[i]) * inv; - gupyy[i] = (gxx[i]*gzz[i] - gxz[i]*gxz[i]) * inv; - gupyz[i] = -(gxx[i]*gyz[i] - gxy[i]*gxz[i]) * inv; - gupzz[i] = (gxx[i]*gyy[i] - gxy[i]*gxy[i]) * inv; - } -} - -/* Phase 3: Gamma constraint residuals (co==0 only) */ -__global__ void kern_phase3_gamma_constraint( - const double* __restrict__ Gamx, const double* __restrict__ Gamy, - const double* __restrict__ Gamz, - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ gxxx, const double* __restrict__ gxyx, - const double* __restrict__ gxzx, const double* __restrict__ gyyx, - const double* __restrict__ gyzx, const double* __restrict__ gzzx, - const double* __restrict__ gxxy, const double* __restrict__ gxyy, - const double* __restrict__ gxzy, const double* __restrict__ gyyy, - const double* __restrict__ gyzy, const double* __restrict__ gzzy, - const double* __restrict__ gxxz, const double* __restrict__ gxyz, - const double* __restrict__ gxzz, const double* __restrict__ gyyz, - const double* __restrict__ gyzz, const double* __restrict__ gzzz, - double* __restrict__ Gmx_Res, double* __restrict__ Gmy_Res, - double* __restrict__ Gmz_Res) -{ - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double uxx=gupxx[i], uxy=gupxy[i], uxz=gupxz[i]; - double uyy=gupyy[i], uyz=gupyz[i], uzz=gupzz[i]; - - Gmx_Res[i] = Gamx[i] - ( - uxx*(uxx*gxxx[i]+uxy*gxyx[i]+uxz*gxzx[i]) + - uxy*(uxx*gxyx[i]+uxy*gyyx[i]+uxz*gyzx[i]) + - uxz*(uxx*gxzx[i]+uxy*gyzx[i]+uxz*gzzx[i]) + - uxx*(uxy*gxxy[i]+uyy*gxyy[i]+uyz*gxzy[i]) + - uxy*(uxy*gxyy[i]+uyy*gyyy[i]+uyz*gyzy[i]) + - uxz*(uxy*gxzy[i]+uyy*gyzy[i]+uyz*gzzy[i]) + - uxx*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + - uxy*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + - uxz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); - - Gmy_Res[i] = Gamy[i] - ( - uxx*(uxy*gxxx[i]+uyy*gxyx[i]+uyz*gxzx[i]) + - uxy*(uxy*gxyx[i]+uyy*gyyx[i]+uyz*gyzx[i]) + - uxz*(uxy*gxzx[i]+uyy*gyzx[i]+uyz*gzzx[i]) + - uxy*(uxy*gxxy[i]+uyy*gxyy[i]+uyz*gxzy[i]) + - uyy*(uxy*gxyy[i]+uyy*gyyy[i]+uyz*gyzy[i]) + - uyz*(uxy*gxzy[i]+uyy*gyzy[i]+uyz*gzzy[i]) + - uxy*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + - uyy*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + - uyz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); - - Gmz_Res[i] = Gamz[i] - ( - uxx*(uxz*gxxx[i]+uyz*gxyx[i]+uzz*gxzx[i]) + - uxy*(uxz*gxyx[i]+uyz*gyyx[i]+uzz*gyzx[i]) + - uxz*(uxz*gxzx[i]+uyz*gyzx[i]+uzz*gzzx[i]) + - uxy*(uxz*gxxy[i]+uyz*gxyy[i]+uzz*gxzy[i]) + - uyy*(uxz*gxyy[i]+uyz*gyyy[i]+uzz*gyzy[i]) + - uyz*(uxz*gxzy[i]+uyz*gyzy[i]+uzz*gzzy[i]) + - uxz*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + - uyz*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + - uzz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); - } -} - -/* Phase 4: 18 Christoffel symbols */ -__global__ __launch_bounds__(128, 4) -void kern_phase4_christoffel( - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ gxxx, const double* __restrict__ gxyx, - const double* __restrict__ gxzx, const double* __restrict__ gyyx, - const double* __restrict__ gyzx, const double* __restrict__ gzzx, - const double* __restrict__ gxxy, const double* __restrict__ gxyy, - const double* __restrict__ gxzy, const double* __restrict__ gyyy, - const double* __restrict__ gyzy, const double* __restrict__ gzzy, - const double* __restrict__ gxxz, const double* __restrict__ gxyz, - const double* __restrict__ gxzz, const double* __restrict__ gyyz, - const double* __restrict__ gyzz, const double* __restrict__ gzzz, - double* __restrict__ Gxxx, double* __restrict__ Gxxy, double* __restrict__ Gxxz, - double* __restrict__ Gxyy, double* __restrict__ Gxyz, double* __restrict__ Gxzz, - double* __restrict__ Gyxx, double* __restrict__ Gyxy, double* __restrict__ Gyxz, - double* __restrict__ Gyyy, double* __restrict__ Gyyz, double* __restrict__ Gyzz, - double* __restrict__ Gzxx, double* __restrict__ Gzxy, double* __restrict__ Gzxz, - double* __restrict__ Gzyy, double* __restrict__ Gzyz, double* __restrict__ Gzzz_o) -{ - const double H = 0.5, TWO = 2.0; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - /* Gamma^x_{xx} */ - Gxxx[i]=H*(uxx*gxxx[i]+uxy*(TWO*gxyx[i]-gxxy[i])+uxz*(TWO*gxzx[i]-gxxz[i])); - Gyxx[i]=H*(uxy*gxxx[i]+uyy*(TWO*gxyx[i]-gxxy[i])+uyz*(TWO*gxzx[i]-gxxz[i])); - Gzxx[i]=H*(uxz*gxxx[i]+uyz*(TWO*gxyx[i]-gxxy[i])+uzz*(TWO*gxzx[i]-gxxz[i])); - /* yy */ - Gxyy[i]=H*(uxx*(TWO*gxyy[i]-gyyx[i])+uxy*gyyy[i]+uxz*(TWO*gyzy[i]-gyyz[i])); - Gyyy[i]=H*(uxy*(TWO*gxyy[i]-gyyx[i])+uyy*gyyy[i]+uyz*(TWO*gyzy[i]-gyyz[i])); - Gzyy[i]=H*(uxz*(TWO*gxyy[i]-gyyx[i])+uyz*gyyy[i]+uzz*(TWO*gyzy[i]-gyyz[i])); - /* zz */ - Gxzz[i]=H*(uxx*(TWO*gxzz[i]-gzzx[i])+uxy*(TWO*gyzz[i]-gzzy[i])+uxz*gzzz[i]); - Gyzz[i]=H*(uxy*(TWO*gxzz[i]-gzzx[i])+uyy*(TWO*gyzz[i]-gzzy[i])+uyz*gzzz[i]); - Gzzz_o[i]=H*(uxz*(TWO*gxzz[i]-gzzx[i])+uyz*(TWO*gyzz[i]-gzzy[i])+uzz*gzzz[i]); - /* xy */ - Gxxy[i]=H*(uxx*gxxy[i]+uxy*gyyx[i]+uxz*(gxzy[i]+gyzx[i]-gxyz[i])); - Gyxy[i]=H*(uxy*gxxy[i]+uyy*gyyx[i]+uyz*(gxzy[i]+gyzx[i]-gxyz[i])); - Gzxy[i]=H*(uxz*gxxy[i]+uyz*gyyx[i]+uzz*(gxzy[i]+gyzx[i]-gxyz[i])); - /* xz */ - Gxxz[i]=H*(uxx*gxxz[i]+uxy*(gxyz[i]+gyzx[i]-gxzy[i])+uxz*gzzx[i]); - Gyxz[i]=H*(uxy*gxxz[i]+uyy*(gxyz[i]+gyzx[i]-gxzy[i])+uyz*gzzx[i]); - Gzxz[i]=H*(uxz*gxxz[i]+uyz*(gxyz[i]+gyzx[i]-gxzy[i])+uzz*gzzx[i]); - /* yz */ - Gxyz[i]=H*(uxx*(gxyz[i]+gxzy[i]-gyzx[i])+uxy*gyyz[i]+uxz*gzzy[i]); - Gyyz[i]=H*(uxy*(gxyz[i]+gxzy[i]-gyzx[i])+uyy*gyyz[i]+uyz*gzzy[i]); - Gzyz[i]=H*(uxz*(gxyz[i]+gxzy[i]-gyzx[i])+uyz*gyyz[i]+uzz*gzzy[i]); - } -} - -/* Phase 5: A^ij = gup^ia gup^jb A_ab (stored temporarily in Rxx..Rzz) */ -__global__ void kern_phase5_raise_A( - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ Axx, const double* __restrict__ Axy, - const double* __restrict__ Axz, const double* __restrict__ Ayy, - const double* __restrict__ Ayz, const double* __restrict__ Azz, - double* __restrict__ Rxx, double* __restrict__ Rxy, double* __restrict__ Rxz, - double* __restrict__ Ryy, double* __restrict__ Ryz, double* __restrict__ Rzz) -{ - const double TWO = 2.0; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - Rxx[i]=uxx*uxx*Axx[i]+uxy*uxy*Ayy[i]+uxz*uxz*Azz[i] - +TWO*(uxx*uxy*Axy[i]+uxx*uxz*Axz[i]+uxy*uxz*Ayz[i]); - Ryy[i]=uxy*uxy*Axx[i]+uyy*uyy*Ayy[i]+uyz*uyz*Azz[i] - +TWO*(uxy*uyy*Axy[i]+uxy*uyz*Axz[i]+uyy*uyz*Ayz[i]); - Rzz[i]=uxz*uxz*Axx[i]+uyz*uyz*Ayy[i]+uzz*uzz*Azz[i] - +TWO*(uxz*uyz*Axy[i]+uxz*uzz*Axz[i]+uyz*uzz*Ayz[i]); - Rxy[i]=uxx*uxy*Axx[i]+uxy*uyy*Ayy[i]+uxz*uyz*Azz[i] - +(uxx*uyy+uxy*uxy)*Axy[i]+(uxx*uyz+uxz*uxy)*Axz[i]+(uxy*uyz+uxz*uyy)*Ayz[i]; - Rxz[i]=uxx*uxz*Axx[i]+uxy*uyz*Ayy[i]+uxz*uzz*Azz[i] - +(uxx*uyz+uxy*uxz)*Axy[i]+(uxx*uzz+uxz*uxz)*Axz[i]+(uxy*uzz+uxz*uyz)*Ayz[i]; - Ryz[i]=uxy*uxz*Axx[i]+uyy*uyz*Ayy[i]+uyz*uzz*Azz[i] - +(uxy*uyz+uyy*uxz)*Axy[i]+(uxy*uzz+uyz*uxz)*Axz[i]+(uyy*uzz+uyz*uyz)*Ayz[i]; - } -} - -/* Phase 6: Gamma_rhs part 1 (before fdderivs(beta) and fderivs(Gamma)) */ -__global__ __launch_bounds__(128, 4) -void kern_phase6_gamma_rhs_part1( - const double* __restrict__ Lapx, const double* __restrict__ Lapy, - const double* __restrict__ Lapz, - const double* __restrict__ alpn1, const double* __restrict__ chin1, - const double* __restrict__ chix, const double* __restrict__ chiy, - const double* __restrict__ chiz, - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ Kx, const double* __restrict__ Ky, - const double* __restrict__ Kz, - const double* __restrict__ Sx, const double* __restrict__ Sy, - const double* __restrict__ Sz, - const double* __restrict__ Rxx, const double* __restrict__ Rxy, - const double* __restrict__ Rxz, const double* __restrict__ Ryy, - const double* __restrict__ Ryz, const double* __restrict__ Rzz, - const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, - const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, - const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, - const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, - const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, - const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, - const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, - const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, - const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, - double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, - double* __restrict__ Gamz_rhs) -{ - const double TWO=2.0, F3o2=1.5, F2o3=2.0/3.0, EIGHT=8.0; - const double PI_V = 3.14159265358979323846; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - double lx=Lapx[i],ly=Lapy[i],lz=Lapz[i]; - double a=alpn1[i], c1=chin1[i]; - double cx=chix[i],cy=chiy[i],cz=chiz[i]; - - Gamx_rhs[i] = -TWO*(lx*Rxx[i]+ly*Rxy[i]+lz*Rxz[i]) - + TWO*a*( - -F3o2/c1*(cx*Rxx[i]+cy*Rxy[i]+cz*Rxz[i]) - -uxx*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) - -uxy*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) - -uxz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) - +Gxxx[i]*Rxx[i]+Gxyy[i]*Ryy[i]+Gxzz[i]*Rzz[i] - +TWO*(Gxxy[i]*Rxy[i]+Gxxz[i]*Rxz[i]+Gxyz[i]*Ryz[i])); - - Gamy_rhs[i] = -TWO*(lx*Rxy[i]+ly*Ryy[i]+lz*Ryz[i]) - + TWO*a*( - -F3o2/c1*(cx*Rxy[i]+cy*Ryy[i]+cz*Ryz[i]) - -uxy*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) - -uyy*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) - -uyz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) - +Gyxx[i]*Rxx[i]+Gyyy[i]*Ryy[i]+Gyzz[i]*Rzz[i] - +TWO*(Gyxy[i]*Rxy[i]+Gyxz[i]*Rxz[i]+Gyyz[i]*Ryz[i])); - - Gamz_rhs[i] = -TWO*(lx*Rxz[i]+ly*Ryz[i]+lz*Rzz[i]) - + TWO*a*( - -F3o2/c1*(cx*Rxz[i]+cy*Ryz[i]+cz*Rzz[i]) - -uxz*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) - -uyz*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) - -uzz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) - +Gzxx[i]*Rxx[i]+Gzyy[i]*Ryy[i]+Gzzz[i]*Rzz[i] - +TWO*(Gzxy[i]*Rxy[i]+Gzxz[i]*Rxz[i]+Gzyz[i]*Ryz[i])); - } -} - -/* Phase 8: Gamma_rhs part 2 — after fdderivs(beta) and fderivs(Gamma) - * Computes: fxx=div(beta_xx), Gamxa, then updates Gamx_rhs etc. - * Input arrays gxxx..gzzz here hold fdderivs(beta) results, - * Gamxx..Gamzz hold fderivs(Gamma) results. - */ -__global__ __launch_bounds__(128, 4) + gxx_rhs[i] = -TWO*alpn1[i]*Axx[i] - F2o3*gxx[i]*db + + TWO*(gxx[i]*betaxx[i] + gxy[i]*betayx[i] + gxz[i]*betazx[i]); + gyy_rhs[i] = -TWO*alpn1[i]*Ayy[i] - F2o3*gyy[i]*db + + TWO*(gxy[i]*betaxy[i] + gyy[i]*betayy[i] + gyz[i]*betazy[i]); + gzz_rhs[i] = -TWO*alpn1[i]*Azz[i] - F2o3*gzz[i]*db + + TWO*(gxz[i]*betaxz[i] + gyz[i]*betayz[i] + gzz[i]*betazz[i]); + gxy_rhs[i] = -TWO*alpn1[i]*Axy[i] + F1o3*gxy[i]*db + + gxx[i]*betaxy[i] + gxz[i]*betazy[i] + gyy[i]*betayx[i] + + gyz[i]*betazx[i] - gxy[i]*betazz[i]; + gyz_rhs[i] = -TWO*alpn1[i]*Ayz[i] + F1o3*gyz[i]*db + + gxy[i]*betaxz[i] + gyy[i]*betayz[i] + gxz[i]*betaxy[i] + + gzz[i]*betazy[i] - gyz[i]*betaxx[i]; + gxz_rhs[i] = -TWO*alpn1[i]*Axz[i] + F1o3*gxz[i]*db + + gxx[i]*betaxz[i] + gxy[i]*betayz[i] + gyz[i]*betayx[i] + + gzz[i]*betazx[i] - gxz[i]*betayy[i]; + } +} + +/* Phase 2b: metric inverse */ +__global__ void kern_phase2_inverse( + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + double* __restrict__ gupxx, double* __restrict__ gupxy, + double* __restrict__ gupxz, double* __restrict__ gupyy, + double* __restrict__ gupyz, double* __restrict__ gupzz) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double det = gxx[i]*gyy[i]*gzz[i] + gxy[i]*gyz[i]*gxz[i] + gxz[i]*gxy[i]*gyz[i] + - gxz[i]*gyy[i]*gxz[i] - gxy[i]*gxy[i]*gzz[i] - gxx[i]*gyz[i]*gyz[i]; + double inv = 1.0 / det; + gupxx[i] = (gyy[i]*gzz[i] - gyz[i]*gyz[i]) * inv; + gupxy[i] = -(gxy[i]*gzz[i] - gyz[i]*gxz[i]) * inv; + gupxz[i] = (gxy[i]*gyz[i] - gyy[i]*gxz[i]) * inv; + gupyy[i] = (gxx[i]*gzz[i] - gxz[i]*gxz[i]) * inv; + gupyz[i] = -(gxx[i]*gyz[i] - gxy[i]*gxz[i]) * inv; + gupzz[i] = (gxx[i]*gyy[i] - gxy[i]*gxy[i]) * inv; + } +} + +/* Phase 3: Gamma constraint residuals (co==0 only) */ +__global__ void kern_phase3_gamma_constraint( + const double* __restrict__ Gamx, const double* __restrict__ Gamy, + const double* __restrict__ Gamz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ gxxx, const double* __restrict__ gxyx, + const double* __restrict__ gxzx, const double* __restrict__ gyyx, + const double* __restrict__ gyzx, const double* __restrict__ gzzx, + const double* __restrict__ gxxy, const double* __restrict__ gxyy, + const double* __restrict__ gxzy, const double* __restrict__ gyyy, + const double* __restrict__ gyzy, const double* __restrict__ gzzy, + const double* __restrict__ gxxz, const double* __restrict__ gxyz, + const double* __restrict__ gxzz, const double* __restrict__ gyyz, + const double* __restrict__ gyzz, const double* __restrict__ gzzz, + double* __restrict__ Gmx_Res, double* __restrict__ Gmy_Res, + double* __restrict__ Gmz_Res) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i], uxy=gupxy[i], uxz=gupxz[i]; + double uyy=gupyy[i], uyz=gupyz[i], uzz=gupzz[i]; + + Gmx_Res[i] = Gamx[i] - ( + uxx*(uxx*gxxx[i]+uxy*gxyx[i]+uxz*gxzx[i]) + + uxy*(uxx*gxyx[i]+uxy*gyyx[i]+uxz*gyzx[i]) + + uxz*(uxx*gxzx[i]+uxy*gyzx[i]+uxz*gzzx[i]) + + uxx*(uxy*gxxy[i]+uyy*gxyy[i]+uyz*gxzy[i]) + + uxy*(uxy*gxyy[i]+uyy*gyyy[i]+uyz*gyzy[i]) + + uxz*(uxy*gxzy[i]+uyy*gyzy[i]+uyz*gzzy[i]) + + uxx*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + + uxy*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + + uxz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); + + Gmy_Res[i] = Gamy[i] - ( + uxx*(uxy*gxxx[i]+uyy*gxyx[i]+uyz*gxzx[i]) + + uxy*(uxy*gxyx[i]+uyy*gyyx[i]+uyz*gyzx[i]) + + uxz*(uxy*gxzx[i]+uyy*gyzx[i]+uyz*gzzx[i]) + + uxy*(uxy*gxxy[i]+uyy*gxyy[i]+uyz*gxzy[i]) + + uyy*(uxy*gxyy[i]+uyy*gyyy[i]+uyz*gyzy[i]) + + uyz*(uxy*gxzy[i]+uyy*gyzy[i]+uyz*gzzy[i]) + + uxy*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + + uyy*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + + uyz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); + + Gmz_Res[i] = Gamz[i] - ( + uxx*(uxz*gxxx[i]+uyz*gxyx[i]+uzz*gxzx[i]) + + uxy*(uxz*gxyx[i]+uyz*gyyx[i]+uzz*gyzx[i]) + + uxz*(uxz*gxzx[i]+uyz*gyzx[i]+uzz*gzzx[i]) + + uxy*(uxz*gxxy[i]+uyz*gxyy[i]+uzz*gxzy[i]) + + uyy*(uxz*gxyy[i]+uyz*gyyy[i]+uzz*gyzy[i]) + + uyz*(uxz*gxzy[i]+uyz*gyzy[i]+uzz*gzzy[i]) + + uxz*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + + uyz*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + + uzz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); + } +} + +/* Phase 4: 18 Christoffel symbols */ +__global__ __launch_bounds__(128, 4) +void kern_phase4_christoffel( + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ gxxx, const double* __restrict__ gxyx, + const double* __restrict__ gxzx, const double* __restrict__ gyyx, + const double* __restrict__ gyzx, const double* __restrict__ gzzx, + const double* __restrict__ gxxy, const double* __restrict__ gxyy, + const double* __restrict__ gxzy, const double* __restrict__ gyyy, + const double* __restrict__ gyzy, const double* __restrict__ gzzy, + const double* __restrict__ gxxz, const double* __restrict__ gxyz, + const double* __restrict__ gxzz, const double* __restrict__ gyyz, + const double* __restrict__ gyzz, const double* __restrict__ gzzz, + double* __restrict__ Gxxx, double* __restrict__ Gxxy, double* __restrict__ Gxxz, + double* __restrict__ Gxyy, double* __restrict__ Gxyz, double* __restrict__ Gxzz, + double* __restrict__ Gyxx, double* __restrict__ Gyxy, double* __restrict__ Gyxz, + double* __restrict__ Gyyy, double* __restrict__ Gyyz, double* __restrict__ Gyzz, + double* __restrict__ Gzxx, double* __restrict__ Gzxy, double* __restrict__ Gzxz, + double* __restrict__ Gzyy, double* __restrict__ Gzyz, double* __restrict__ Gzzz_o) +{ + const double H = 0.5, TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + /* Gamma^x_{xx} */ + Gxxx[i]=H*(uxx*gxxx[i]+uxy*(TWO*gxyx[i]-gxxy[i])+uxz*(TWO*gxzx[i]-gxxz[i])); + Gyxx[i]=H*(uxy*gxxx[i]+uyy*(TWO*gxyx[i]-gxxy[i])+uyz*(TWO*gxzx[i]-gxxz[i])); + Gzxx[i]=H*(uxz*gxxx[i]+uyz*(TWO*gxyx[i]-gxxy[i])+uzz*(TWO*gxzx[i]-gxxz[i])); + /* yy */ + Gxyy[i]=H*(uxx*(TWO*gxyy[i]-gyyx[i])+uxy*gyyy[i]+uxz*(TWO*gyzy[i]-gyyz[i])); + Gyyy[i]=H*(uxy*(TWO*gxyy[i]-gyyx[i])+uyy*gyyy[i]+uyz*(TWO*gyzy[i]-gyyz[i])); + Gzyy[i]=H*(uxz*(TWO*gxyy[i]-gyyx[i])+uyz*gyyy[i]+uzz*(TWO*gyzy[i]-gyyz[i])); + /* zz */ + Gxzz[i]=H*(uxx*(TWO*gxzz[i]-gzzx[i])+uxy*(TWO*gyzz[i]-gzzy[i])+uxz*gzzz[i]); + Gyzz[i]=H*(uxy*(TWO*gxzz[i]-gzzx[i])+uyy*(TWO*gyzz[i]-gzzy[i])+uyz*gzzz[i]); + Gzzz_o[i]=H*(uxz*(TWO*gxzz[i]-gzzx[i])+uyz*(TWO*gyzz[i]-gzzy[i])+uzz*gzzz[i]); + /* xy */ + Gxxy[i]=H*(uxx*gxxy[i]+uxy*gyyx[i]+uxz*(gxzy[i]+gyzx[i]-gxyz[i])); + Gyxy[i]=H*(uxy*gxxy[i]+uyy*gyyx[i]+uyz*(gxzy[i]+gyzx[i]-gxyz[i])); + Gzxy[i]=H*(uxz*gxxy[i]+uyz*gyyx[i]+uzz*(gxzy[i]+gyzx[i]-gxyz[i])); + /* xz */ + Gxxz[i]=H*(uxx*gxxz[i]+uxy*(gxyz[i]+gyzx[i]-gxzy[i])+uxz*gzzx[i]); + Gyxz[i]=H*(uxy*gxxz[i]+uyy*(gxyz[i]+gyzx[i]-gxzy[i])+uyz*gzzx[i]); + Gzxz[i]=H*(uxz*gxxz[i]+uyz*(gxyz[i]+gyzx[i]-gxzy[i])+uzz*gzzx[i]); + /* yz */ + Gxyz[i]=H*(uxx*(gxyz[i]+gxzy[i]-gyzx[i])+uxy*gyyz[i]+uxz*gzzy[i]); + Gyyz[i]=H*(uxy*(gxyz[i]+gxzy[i]-gyzx[i])+uyy*gyyz[i]+uyz*gzzy[i]); + Gzyz[i]=H*(uxz*(gxyz[i]+gxzy[i]-gyzx[i])+uyz*gyyz[i]+uzz*gzzy[i]); + } +} + +/* Phase 5: A^ij = gup^ia gup^jb A_ab (stored temporarily in Rxx..Rzz) */ +__global__ void kern_phase5_raise_A( + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Axx, const double* __restrict__ Axy, + const double* __restrict__ Axz, const double* __restrict__ Ayy, + const double* __restrict__ Ayz, const double* __restrict__ Azz, + double* __restrict__ Rxx, double* __restrict__ Rxy, double* __restrict__ Rxz, + double* __restrict__ Ryy, double* __restrict__ Ryz, double* __restrict__ Rzz) +{ + const double TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + Rxx[i]=uxx*uxx*Axx[i]+uxy*uxy*Ayy[i]+uxz*uxz*Azz[i] + +TWO*(uxx*uxy*Axy[i]+uxx*uxz*Axz[i]+uxy*uxz*Ayz[i]); + Ryy[i]=uxy*uxy*Axx[i]+uyy*uyy*Ayy[i]+uyz*uyz*Azz[i] + +TWO*(uxy*uyy*Axy[i]+uxy*uyz*Axz[i]+uyy*uyz*Ayz[i]); + Rzz[i]=uxz*uxz*Axx[i]+uyz*uyz*Ayy[i]+uzz*uzz*Azz[i] + +TWO*(uxz*uyz*Axy[i]+uxz*uzz*Axz[i]+uyz*uzz*Ayz[i]); + Rxy[i]=uxx*uxy*Axx[i]+uxy*uyy*Ayy[i]+uxz*uyz*Azz[i] + +(uxx*uyy+uxy*uxy)*Axy[i]+(uxx*uyz+uxz*uxy)*Axz[i]+(uxy*uyz+uxz*uyy)*Ayz[i]; + Rxz[i]=uxx*uxz*Axx[i]+uxy*uyz*Ayy[i]+uxz*uzz*Azz[i] + +(uxx*uyz+uxy*uxz)*Axy[i]+(uxx*uzz+uxz*uxz)*Axz[i]+(uxy*uzz+uxz*uyz)*Ayz[i]; + Ryz[i]=uxy*uxz*Axx[i]+uyy*uyz*Ayy[i]+uyz*uzz*Azz[i] + +(uxy*uyz+uyy*uxz)*Axy[i]+(uxy*uzz+uyz*uxz)*Axz[i]+(uyy*uzz+uyz*uyz)*Ayz[i]; + } +} + +/* Phase 6: Gamma_rhs part 1 (before fdderivs(beta) and fderivs(Gamma)) */ +__global__ __launch_bounds__(128, 4) +void kern_phase6_gamma_rhs_part1( + const double* __restrict__ Lapx, const double* __restrict__ Lapy, + const double* __restrict__ Lapz, + const double* __restrict__ alpn1, const double* __restrict__ chin1, + const double* __restrict__ chix, const double* __restrict__ chiy, + const double* __restrict__ chiz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Kx, const double* __restrict__ Ky, + const double* __restrict__ Kz, + const double* __restrict__ Sx, const double* __restrict__ Sy, + const double* __restrict__ Sz, + const double* __restrict__ Rxx, const double* __restrict__ Rxy, + const double* __restrict__ Rxz, const double* __restrict__ Ryy, + const double* __restrict__ Ryz, const double* __restrict__ Rzz, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, + double* __restrict__ Gamz_rhs) +{ + const double TWO=2.0, F3o2=1.5, F2o3=2.0/3.0, EIGHT=8.0; + const double PI_V = 3.14159265358979323846; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + double lx=Lapx[i],ly=Lapy[i],lz=Lapz[i]; + double a=alpn1[i], c1=chin1[i]; + double cx=chix[i],cy=chiy[i],cz=chiz[i]; + + Gamx_rhs[i] = -TWO*(lx*Rxx[i]+ly*Rxy[i]+lz*Rxz[i]) + + TWO*a*( + -F3o2/c1*(cx*Rxx[i]+cy*Rxy[i]+cz*Rxz[i]) + -uxx*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) + -uxy*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) + -uxz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) + +Gxxx[i]*Rxx[i]+Gxyy[i]*Ryy[i]+Gxzz[i]*Rzz[i] + +TWO*(Gxxy[i]*Rxy[i]+Gxxz[i]*Rxz[i]+Gxyz[i]*Ryz[i])); + + Gamy_rhs[i] = -TWO*(lx*Rxy[i]+ly*Ryy[i]+lz*Ryz[i]) + + TWO*a*( + -F3o2/c1*(cx*Rxy[i]+cy*Ryy[i]+cz*Ryz[i]) + -uxy*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) + -uyy*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) + -uyz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) + +Gyxx[i]*Rxx[i]+Gyyy[i]*Ryy[i]+Gyzz[i]*Rzz[i] + +TWO*(Gyxy[i]*Rxy[i]+Gyxz[i]*Rxz[i]+Gyyz[i]*Ryz[i])); + + Gamz_rhs[i] = -TWO*(lx*Rxz[i]+ly*Ryz[i]+lz*Rzz[i]) + + TWO*a*( + -F3o2/c1*(cx*Rxz[i]+cy*Ryz[i]+cz*Rzz[i]) + -uxz*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) + -uyz*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) + -uzz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) + +Gzxx[i]*Rxx[i]+Gzyy[i]*Ryy[i]+Gzzz[i]*Rzz[i] + +TWO*(Gzxy[i]*Rxy[i]+Gzxz[i]*Rxz[i]+Gzyz[i]*Ryz[i])); + } +} + +/* Phase 8: Gamma_rhs part 2 — after fdderivs(beta) and fderivs(Gamma) + * Computes: fxx=div(beta_xx), Gamxa, then updates Gamx_rhs etc. + * Input arrays gxxx..gzzz here hold fdderivs(beta) results, + * Gamxx..Gamzz hold fderivs(Gamma) results. + */ +__global__ __launch_bounds__(128, 4) void kern_phase8_gamma_rhs_part2( const double* __restrict__ gupxx, const double* __restrict__ gupxy, const double* __restrict__ gupxz, const double* __restrict__ gupyy, const double* __restrict__ gupyz, const double* __restrict__ gupzz, - /* fdderivs(betax) -> gxxx,gxyx,gxzx,gyyx,gyzx,gzzx */ - const double* __restrict__ bxx_xx, const double* __restrict__ bxx_xy, - const double* __restrict__ bxx_xz, const double* __restrict__ bxx_yy, - const double* __restrict__ bxx_yz, const double* __restrict__ bxx_zz, - /* fdderivs(betay) -> gxxy,gxyy,gxzy,gyyy,gyzy,gzzy */ - const double* __restrict__ bxy_xx, const double* __restrict__ bxy_xy, - const double* __restrict__ bxy_xz, const double* __restrict__ bxy_yy, - const double* __restrict__ bxy_yz, const double* __restrict__ bxy_zz, - /* fdderivs(betaz) -> gxxz,gxyz,gxzz,gyyz,gyzz,gzzz */ - const double* __restrict__ bxz_xx, const double* __restrict__ bxz_xy, - const double* __restrict__ bxz_xz, const double* __restrict__ bxz_yy, - const double* __restrict__ bxz_yz, const double* __restrict__ bxz_zz, - /* fderivs(Gamx) -> Gamxx,Gamxy,Gamxz */ - const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, - const double* __restrict__ Gamxz, - /* fderivs(Gamy) -> Gamyx,Gamyy,Gamyz */ - const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, - const double* __restrict__ Gamyz_d, - /* fderivs(Gamz) -> Gamzx,Gamzy,Gamzz */ - const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, - const double* __restrict__ Gamzz_d, - /* Christoffel symbols */ - const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, - const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, - const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, - const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, - const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, - const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, - const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, - const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, - const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, - /* betaij first derivs */ - const double* __restrict__ betaxx, const double* __restrict__ betaxy, - const double* __restrict__ betaxz, const double* __restrict__ betayx, - const double* __restrict__ betayy, const double* __restrict__ betayz, - const double* __restrict__ betazx, const double* __restrict__ betazy, - const double* __restrict__ betazz, - double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, - double* __restrict__ Gamz_rhs, - double* __restrict__ Gamxa_out, double* __restrict__ Gamya_out, - double* __restrict__ Gamza_out) -{ - const double TWO=2.0, F2o3=2.0/3.0, F1o3=1.0/3.0; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - /* div(beta_second_derivs) */ - double fxx_v = bxx_xx[i]+bxy_xy[i]+bxz_xz[i]; - double fxy_v = bxx_xy[i]+bxy_yy[i]+bxz_yz[i]; - double fxz_v = bxx_xz[i]+bxy_yz[i]+bxz_zz[i]; - /* Gamma^a contracted */ - double Ga_x = uxx*Gxxx[i]+uyy*Gxyy[i]+uzz*Gxzz[i] - +TWO*(uxy*Gxxy[i]+uxz*Gxxz[i]+uyz*Gxyz[i]); + /* fdderivs(betax) -> gxxx,gxyx,gxzx,gyyx,gyzx,gzzx */ + const double* __restrict__ bxx_xx, const double* __restrict__ bxx_xy, + const double* __restrict__ bxx_xz, const double* __restrict__ bxx_yy, + const double* __restrict__ bxx_yz, const double* __restrict__ bxx_zz, + /* fdderivs(betay) -> gxxy,gxyy,gxzy,gyyy,gyzy,gzzy */ + const double* __restrict__ bxy_xx, const double* __restrict__ bxy_xy, + const double* __restrict__ bxy_xz, const double* __restrict__ bxy_yy, + const double* __restrict__ bxy_yz, const double* __restrict__ bxy_zz, + /* fdderivs(betaz) -> gxxz,gxyz,gxzz,gyyz,gyzz,gzzz */ + const double* __restrict__ bxz_xx, const double* __restrict__ bxz_xy, + const double* __restrict__ bxz_xz, const double* __restrict__ bxz_yy, + const double* __restrict__ bxz_yz, const double* __restrict__ bxz_zz, + /* fderivs(Gamx) -> Gamxx,Gamxy,Gamxz */ + const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, + const double* __restrict__ Gamxz, + /* fderivs(Gamy) -> Gamyx,Gamyy,Gamyz */ + const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, + const double* __restrict__ Gamyz_d, + /* fderivs(Gamz) -> Gamzx,Gamzy,Gamzz */ + const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, + const double* __restrict__ Gamzz_d, + /* Christoffel symbols */ + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + /* betaij first derivs */ + const double* __restrict__ betaxx, const double* __restrict__ betaxy, + const double* __restrict__ betaxz, const double* __restrict__ betayx, + const double* __restrict__ betayy, const double* __restrict__ betayz, + const double* __restrict__ betazx, const double* __restrict__ betazy, + const double* __restrict__ betazz, + double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, + double* __restrict__ Gamz_rhs, + double* __restrict__ Gamxa_out, double* __restrict__ Gamya_out, + double* __restrict__ Gamza_out) +{ + const double TWO=2.0, F2o3=2.0/3.0, F1o3=1.0/3.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + /* div(beta_second_derivs) */ + double fxx_v = bxx_xx[i]+bxy_xy[i]+bxz_xz[i]; + double fxy_v = bxx_xy[i]+bxy_yy[i]+bxz_yz[i]; + double fxz_v = bxx_xz[i]+bxy_yz[i]+bxz_zz[i]; + /* Gamma^a contracted */ + double Ga_x = uxx*Gxxx[i]+uyy*Gxyy[i]+uzz*Gxzz[i] + +TWO*(uxy*Gxxy[i]+uxz*Gxxz[i]+uyz*Gxyz[i]); double Ga_y = uxx*Gyxx[i]+uyy*Gyyy[i]+uzz*Gyzz[i] +TWO*(uxy*Gyxy[i]+uxz*Gyxz[i]+uyz*Gyyz[i]); double Ga_z = uxx*Gzxx[i]+uyy*Gzyy[i]+uzz*Gzzz[i] @@ -1711,779 +1711,779 @@ void kern_phase8_gamma_rhs_part2( Gamxa_out[i]=Ga_x; Gamya_out[i]=Ga_y; Gamza_out[i]=Ga_z; double db = betaxx[i] + betayy[i] + betazz[i]; Gamx_rhs[i] += F2o3*Ga_x*db - - Ga_x*betaxx[i] - Ga_y*betaxy[i] - Ga_z*betaxz[i] - + F1o3*(uxx*fxx_v+uxy*fxy_v+uxz*fxz_v) - + uxx*bxx_xx[i]+uyy*bxx_yy[i]+uzz*bxx_zz[i] - + TWO*(uxy*bxx_xy[i]+uxz*bxx_xz[i]+uyz*bxx_yz[i]); - Gamy_rhs[i] += F2o3*Ga_y*db - - Ga_x*betayx[i] - Ga_y*betayy[i] - Ga_z*betayz[i] - + F1o3*(uxy*fxx_v+uyy*fxy_v+uyz*fxz_v) - + uxx*bxy_xx[i]+uyy*bxy_yy[i]+uzz*bxy_zz[i] - + TWO*(uxy*bxy_xy[i]+uxz*bxy_xz[i]+uyz*bxy_yz[i]); - Gamz_rhs[i] += F2o3*Ga_z*db - - Ga_x*betazx[i] - Ga_y*betazy[i] - Ga_z*betazz[i] - + F1o3*(uxz*fxx_v+uyz*fxy_v+uzz*fxz_v) - + uxx*bxz_xx[i]+uyy*bxz_yy[i]+uzz*bxz_zz[i] - + TWO*(uxy*bxz_xy[i]+uxz*bxz_xz[i]+uyz*bxz_yz[i]); - } -} - -/* Phase 9: Christoffel contract — compute g_{ia} Gamma^a_{bc} products - * Overwrites gxxx..gzzz with lowered Christoffel products needed for Ricci. - */ -__global__ __launch_bounds__(128, 4) -void kern_phase9_christoffel_contract( - const double* __restrict__ gxx, const double* __restrict__ gxy, - const double* __restrict__ gxz, const double* __restrict__ gyy, - const double* __restrict__ gyz, const double* __restrict__ gzz, - const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, - const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, - const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, - const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, - const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, - const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, - const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, - const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, - const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, - /* output: lowered products g_{ia} Gamma^a_{bc} */ - double* __restrict__ o_gxxx, double* __restrict__ o_gxyx, - double* __restrict__ o_gxzx, double* __restrict__ o_gyyx, - double* __restrict__ o_gyzx, double* __restrict__ o_gzzx, - double* __restrict__ o_gxxy, double* __restrict__ o_gxyy, - double* __restrict__ o_gxzy, double* __restrict__ o_gyyy, - double* __restrict__ o_gyzy, double* __restrict__ o_gzzy, - double* __restrict__ o_gxxz, double* __restrict__ o_gxyz, - double* __restrict__ o_gxzz, double* __restrict__ o_gyyz, - double* __restrict__ o_gyzz, double* __restrict__ o_gzzz) -{ - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double g11=gxx[i],g12=gxy[i],g13=gxz[i]; - double g22=gyy[i],g23=gyz[i],g33=gzz[i]; - /* row x: g_{x,a} Gamma^a_{bc} */ - o_gxxx[i]=g11*Gxxx[i]+g12*Gyxx[i]+g13*Gzxx[i]; - o_gxyx[i]=g11*Gxxy[i]+g12*Gyxy[i]+g13*Gzxy[i]; - o_gxzx[i]=g11*Gxxz[i]+g12*Gyxz[i]+g13*Gzxz[i]; - o_gyyx[i]=g11*Gxyy[i]+g12*Gyyy[i]+g13*Gzyy[i]; - o_gyzx[i]=g11*Gxyz[i]+g12*Gyyz[i]+g13*Gzyz[i]; - o_gzzx[i]=g11*Gxzz[i]+g12*Gyzz[i]+g13*Gzzz[i]; - /* row y: g_{y,a} Gamma^a_{bc} */ - o_gxxy[i]=g12*Gxxx[i]+g22*Gyxx[i]+g23*Gzxx[i]; - o_gxyy[i]=g12*Gxxy[i]+g22*Gyxy[i]+g23*Gzxy[i]; - o_gxzy[i]=g12*Gxxz[i]+g22*Gyxz[i]+g23*Gzxz[i]; - o_gyyy[i]=g12*Gxyy[i]+g22*Gyyy[i]+g23*Gzyy[i]; - o_gyzy[i]=g12*Gxyz[i]+g22*Gyyz[i]+g23*Gzyz[i]; - o_gzzy[i]=g12*Gxzz[i]+g22*Gyzz[i]+g23*Gzzz[i]; - /* row z: g_{z,a} Gamma^a_{bc} */ - o_gxxz[i]=g13*Gxxx[i]+g23*Gyxx[i]+g33*Gzxx[i]; - o_gxyz[i]=g13*Gxxy[i]+g23*Gyxy[i]+g33*Gzxy[i]; - o_gxzz[i]=g13*Gxxz[i]+g23*Gyxz[i]+g33*Gzxz[i]; - o_gyyz[i]=g13*Gxyy[i]+g23*Gyyy[i]+g33*Gzyy[i]; - o_gyzz[i]=g13*Gxyz[i]+g23*Gyyz[i]+g33*Gzyz[i]; - o_gzzz[i]=g13*Gxzz[i]+g23*Gyzz[i]+g33*Gzzz[i]; - } -} - -/* Phase 10: After fdderivs of a metric component, contract with gup^{ij} - * R_comp = gup^xx*fxx + gup^yy*fyy + gup^zz*fzz + 2*(gup^xy*fxy + gup^xz*fxz + gup^yz*fyz) - */ -__global__ void kern_phase10_ricci_contract( - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ fxx, const double* __restrict__ fxy, - const double* __restrict__ fxz, const double* __restrict__ fyy, - const double* __restrict__ fyz, const double* __restrict__ fzz, - double* __restrict__ R_comp) -{ - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - R_comp[i] = gupxx[i]*fxx[i] + gupyy[i]*fyy[i] + gupzz[i]*fzz[i] - + 2.0*(gupxy[i]*fxy[i] + gupxz[i]*fxz[i] + gupyz[i]*fyz[i]); - } -} - -/* Phase 11a: Ricci diagonal assembly (Rxx, Ryy, Rzz) */ -__global__ __launch_bounds__(128, 4) -void kern_phase11_ricci_diag( - const double* __restrict__ gxx, const double* __restrict__ gxy, - const double* __restrict__ gxz, const double* __restrict__ gyy, - const double* __restrict__ gyz, const double* __restrict__ gzz, - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ Gamxa, const double* __restrict__ Gamya, - const double* __restrict__ Gamza, - const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, - const double* __restrict__ Gamxz, - const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, - const double* __restrict__ Gamyz_d, - const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, - const double* __restrict__ Gamzz_d, - const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, - const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, - const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, - const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, - const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, - const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, - const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, - const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, - const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, - /* lowered Christoffel products */ - const double* __restrict__ lxxx, const double* __restrict__ lxyx, - const double* __restrict__ lxzx, const double* __restrict__ lyyx, - const double* __restrict__ lyzx, const double* __restrict__ lzzx, - const double* __restrict__ lxxy, const double* __restrict__ lxyy, - const double* __restrict__ lxzy, const double* __restrict__ lyyy, - const double* __restrict__ lyzy, const double* __restrict__ lzzy, - const double* __restrict__ lxxz, const double* __restrict__ lxyz, - const double* __restrict__ lxzz, const double* __restrict__ lyyz, - const double* __restrict__ lyzz, const double* __restrict__ lzzz, - double* __restrict__ Rxx, double* __restrict__ Ryy, double* __restrict__ Rzz) -{ - const double H = 0.5, TWO = 2.0; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - /* Rxx */ - Rxx[i] = -H*Rxx[i] - + gxx[i]*Gamxx[i]+gxy[i]*Gamyx[i]+gxz[i]*Gamzx[i] - + Gamxa[i]*lxxx[i]+Gamya[i]*lxyx[i]+Gamza[i]*lxzx[i] - + uxx*(TWO*(Gxxx[i]*lxxx[i]+Gyxx[i]*lxyx[i]+Gzxx[i]*lxzx[i]) - +(Gxxx[i]*lxxx[i]+Gyxx[i]*lxxy[i]+Gzxx[i]*lxxz[i])) - + uxy*(TWO*(Gxxx[i]*lxyx[i]+Gyxx[i]*lyyx[i]+Gzxx[i]*lyzx[i] - +Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i]) - +(Gxxy[i]*lxxx[i]+Gyxy[i]*lxxy[i]+Gzxy[i]*lxxz[i]) - +(Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i])) - + uxz*(TWO*(Gxxx[i]*lxzx[i]+Gyxx[i]*lyzx[i]+Gzxx[i]*lzzx[i] - +Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i]) - +(Gxxz[i]*lxxx[i]+Gyxz[i]*lxxy[i]+Gzxz[i]*lxxz[i]) - +(Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i])) - + uyy*(TWO*(Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i]) - +(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i])) - + uyz*(TWO*(Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i] - +Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i]) - +(Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i]) - +(Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i])) - + uzz*(TWO*(Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i]) - +(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i])); - - /* Ryy */ - Ryy[i] = -H*Ryy[i] - + gxy[i]*Gamxy[i]+gyy[i]*Gamyy_d[i]+gyz[i]*Gamzy[i] - + Gamxa[i]*lxyy[i]+Gamya[i]*lyyy[i]+Gamza[i]*lyzy[i] - + uxx*(TWO*(Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i]) - +(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i])) - + uxy*(TWO*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i] - +Gxyy[i]*lxxy[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxzy[i]) - +(Gxyy[i]*lxyx[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxyz[i]) - +(Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i])) - + uxz*(TWO*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i] - +Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i]) - +(Gxyz[i]*lxyx[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxyz[i]) - +(Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])) - + uyy*(TWO*(Gxyy[i]*lxyy[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyzy[i]) - +(Gxyy[i]*lyyx[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyyz[i])) - + uyz*(TWO*(Gxyy[i]*lxzy[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lzzy[i] - +Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i]) - +(Gxyz[i]*lyyx[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyyz[i]) - +(Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i])) - + uzz*(TWO*(Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i]) - +(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i])); - - /* Rzz */ - Rzz[i] = -H*Rzz[i] - + gxz[i]*Gamxz[i]+gyz[i]*Gamyz_d[i]+gzz[i]*Gamzz_d[i] - + Gamxa[i]*lxzz[i]+Gamya[i]*lyzz[i]+Gamza[i]*lzzz[i] - + uxx*(TWO*(Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i]) - +(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i])) - + uxy*(TWO*(Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i] - +Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i]) - +(Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i]) - +(Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i])) - + uxz*(TWO*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i] - +Gxzz[i]*lxxz[i]+Gyzz[i]*lxyz[i]+Gzzz[i]*lxzz[i]) - +(Gxzz[i]*lxzx[i]+Gyzz[i]*lxzy[i]+Gzzz[i]*lxzz[i]) - +(Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i])) - + uyy*(TWO*(Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i]) - +(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i])) - + uyz*(TWO*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i] - +Gxzz[i]*lxyz[i]+Gyzz[i]*lyyz[i]+Gzzz[i]*lyzz[i]) - +(Gxzz[i]*lyzx[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lyzz[i]) - +(Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i])) - + uzz*(TWO*(Gxzz[i]*lxzz[i]+Gyzz[i]*lyzz[i]+Gzzz[i]*lzzz[i]) - +(Gxzz[i]*lzzx[i]+Gyzz[i]*lzzy[i]+Gzzz[i]*lzzz[i])); - } -} - -/* Phase 11b: Ricci off-diagonal assembly (Rxy, Rxz, Ryz) */ -__global__ __launch_bounds__(128, 4) -void kern_phase11_ricci_offdiag( - const double* __restrict__ gxx, const double* __restrict__ gxy, - const double* __restrict__ gxz, const double* __restrict__ gyy, - const double* __restrict__ gyz, const double* __restrict__ gzz, - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ Gamxa, const double* __restrict__ Gamya, - const double* __restrict__ Gamza, - const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, - const double* __restrict__ Gamxz, - const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, - const double* __restrict__ Gamyz_d, - const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, - const double* __restrict__ Gamzz_d, - const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, - const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, - const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, - const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, - const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, - const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, - const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, - const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, - const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, - const double* __restrict__ lxxx, const double* __restrict__ lxyx, - const double* __restrict__ lxzx, const double* __restrict__ lyyx, - const double* __restrict__ lyzx, const double* __restrict__ lzzx, - const double* __restrict__ lxxy, const double* __restrict__ lxyy, - const double* __restrict__ lxzy, const double* __restrict__ lyyy, - const double* __restrict__ lyzy, const double* __restrict__ lzzy, - const double* __restrict__ lxxz, const double* __restrict__ lxyz, - const double* __restrict__ lxzz, const double* __restrict__ lyyz, - const double* __restrict__ lyzz, const double* __restrict__ lzzz, - double* __restrict__ Rxy, double* __restrict__ Rxz, double* __restrict__ Ryz) -{ - const double H = 0.5, TWO = 2.0; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - - /* Rxy */ - Rxy[i] = H*( - -Rxy[i] - +gxx[i]*Gamxy[i]+gxy[i]*Gamyy_d[i]+gxz[i]*Gamzy[i] - +gxy[i]*Gamxx[i]+gyy[i]*Gamyx[i]+gyz[i]*Gamzx[i] - +Gamxa[i]*lxyx[i]+Gamya[i]*lyyx[i]+Gamza[i]*lyzx[i] - +Gamxa[i]*lxxy[i]+Gamya[i]*lxyy[i]+Gamza[i]*lxzy[i]) - +uxx*(Gxxx[i]*lxxy[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxzy[i] - +Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i] - +Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i]) - +uxy*(Gxxx[i]*lxyy[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyzy[i] - +Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i] - +Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i] - +Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i] - +Gxyy[i]*lxxx[i]+Gyyy[i]*lxyx[i]+Gzyy[i]*lxzx[i] - +Gxxx[i]*lyyx[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyyz[i]) - +uxz*(Gxxx[i]*lxzy[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lzzy[i] - +Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i] - +Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i] - +Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i] - +Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i] - +Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i]) - +uyy*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i] - +Gxyy[i]*lxyx[i]+Gyyy[i]*lyyx[i]+Gzyy[i]*lyzx[i] - +Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i]) - +uyz*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i] - +Gxyy[i]*lxzx[i]+Gyyy[i]*lyzx[i]+Gzyy[i]*lzzx[i] - +Gxxz[i]*lyyx[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyyz[i] - +Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i] - +Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i] - +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) - +uzz*(Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i] - +Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i] - +Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]); - - /* Rxz */ - Rxz[i] = H*( - -Rxz[i] - +gxx[i]*Gamxz[i]+gxy[i]*Gamyz_d[i]+gxz[i]*Gamzz_d[i] - +gxz[i]*Gamxx[i]+gyz[i]*Gamyx[i]+gzz[i]*Gamzx[i] - +Gamxa[i]*lxzx[i]+Gamya[i]*lyzx[i]+Gamza[i]*lzzx[i] - +Gamxa[i]*lxxz[i]+Gamya[i]*lxyz[i]+Gamza[i]*lxzz[i]) - +uxx*(Gxxx[i]*lxxz[i]+Gyxx[i]*lxyz[i]+Gzxx[i]*lxzz[i] - +Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i] - +Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i]) - +uxy*(Gxxx[i]*lxyz[i]+Gyxx[i]*lyyz[i]+Gzxx[i]*lyzz[i] - +Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i] - +Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i] - +Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i] - +Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i] - +Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i]) - +uxz*(Gxxx[i]*lxzz[i]+Gyxx[i]*lyzz[i]+Gzxx[i]*lzzz[i] - +Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i] - +Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i] - +Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i] - +Gxzz[i]*lxxx[i]+Gyzz[i]*lxyx[i]+Gzzz[i]*lxzx[i] - +Gxxx[i]*lzzx[i]+Gyxx[i]*lzzy[i]+Gzxx[i]*lzzz[i]) - +uyy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i] - +Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i] - +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) - +uyz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i] - +Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i] - +Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i] - +Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i] - +Gxzz[i]*lxyx[i]+Gyzz[i]*lyyx[i]+Gzzz[i]*lyzx[i] - +Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i]) - +uzz*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i] - +Gxzz[i]*lxzx[i]+Gyzz[i]*lyzx[i]+Gzzz[i]*lzzx[i] - +Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i]); - - /* Ryz */ - Ryz[i] = H*( - -Ryz[i] - +gxy[i]*Gamxz[i]+gyy[i]*Gamyz_d[i]+gyz[i]*Gamzz_d[i] - +gxz[i]*Gamxy[i]+gyz[i]*Gamyy_d[i]+gzz[i]*Gamzy[i] - +Gamxa[i]*lxzy[i]+Gamya[i]*lyzy[i]+Gamza[i]*lzzy[i] - +Gamxa[i]*lxyz[i]+Gamya[i]*lyyz[i]+Gamza[i]*lyzz[i]) - +uxx*(Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i] - +Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i] - +Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i]) - +uxy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i] - +Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i] - +Gxyy[i]*lxzx[i]+Gyyy[i]*lxzy[i]+Gzyy[i]*lxzz[i] - +Gxyy[i]*lxxz[i]+Gyyy[i]*lxyz[i]+Gzyy[i]*lxzz[i] - +Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i] - +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) - +uxz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i] - +Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i] - +Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i] - +Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i] - +Gxzz[i]*lxxy[i]+Gyzz[i]*lxyy[i]+Gzzz[i]*lxzy[i] - +Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i]) - +uyy*(Gxyy[i]*lxyz[i]+Gyyy[i]*lyyz[i]+Gzyy[i]*lyzz[i] - +Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i] - +Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i]) - +uyz*(Gxyy[i]*lxzz[i]+Gyyy[i]*lyzz[i]+Gzyy[i]*lzzz[i] - +Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i] - +Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i] - +Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i] - +Gxzz[i]*lxyy[i]+Gyzz[i]*lyyy[i]+Gzzz[i]*lyzy[i] - +Gxyy[i]*lzzx[i]+Gyyy[i]*lzzy[i]+Gzyy[i]*lzzz[i]) - +uzz*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i] - +Gxzz[i]*lxzy[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lzzy[i] - +Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i]); - } -} - -/* Phase 13: chi correction to Ricci tensor - * After fdderivs(chi), subtract Christoffel*chi_deriv, compute conformal factor f, - * then add chi contribution to Rxx..Rzz. - */ -__global__ __launch_bounds__(128, 4) -void kern_phase13_chi_correction( - const double* __restrict__ chin1, - const double* __restrict__ chix, const double* __restrict__ chiy, - const double* __restrict__ chiz, - const double* __restrict__ gxx, const double* __restrict__ gxy, - const double* __restrict__ gxz, const double* __restrict__ gyy, - const double* __restrict__ gyz, const double* __restrict__ gzz, - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, - const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, - const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, - const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, - const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, - const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, - const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, - const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, - const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, - double* __restrict__ fxx, double* __restrict__ fxy, - double* __restrict__ fxz, double* __restrict__ fyy, - double* __restrict__ fyz, double* __restrict__ fzz, - double* __restrict__ Rxx, double* __restrict__ Rxy, - double* __restrict__ Rxz, double* __restrict__ Ryy, - double* __restrict__ Ryz, double* __restrict__ Rzz) -{ - const double H=0.5, TWO=2.0, F3o2=1.5; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double cx=chix[i],cy=chiy[i],cz=chiz[i],c1=chin1[i]; - /* subtract Christoffel * chi_deriv */ - fxx[i] -= Gxxx[i]*cx+Gyxx[i]*cy+Gzxx[i]*cz; - fxy[i] -= Gxxy[i]*cx+Gyxy[i]*cy+Gzxy[i]*cz; - fxz[i] -= Gxxz[i]*cx+Gyxz[i]*cy+Gzxz[i]*cz; - fyy[i] -= Gxyy[i]*cx+Gyyy[i]*cy+Gzyy[i]*cz; - fyz[i] -= Gxyz[i]*cx+Gyyz[i]*cy+Gzyz[i]*cz; - fzz[i] -= Gxzz[i]*cx+Gyzz[i]*cy+Gzzz[i]*cz; - - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - double f_val = uxx*(fxx[i]-F3o2/c1*cx*cx) - + uyy*(fyy[i]-F3o2/c1*cy*cy) - + uzz*(fzz[i]-F3o2/c1*cz*cz) - + TWO*uxy*(fxy[i]-F3o2/c1*cx*cy) - + TWO*uxz*(fxz[i]-F3o2/c1*cx*cz) - + TWO*uyz*(fyz[i]-F3o2/c1*cy*cz); - - double inv2c = 1.0/(c1*TWO); - Rxx[i] += (fxx[i]-cx*cx*inv2c+gxx[i]*f_val)*inv2c; - Ryy[i] += (fyy[i]-cy*cy*inv2c+gyy[i]*f_val)*inv2c; - Rzz[i] += (fzz[i]-cz*cz*inv2c+gzz[i]*f_val)*inv2c; - Rxy[i] += (fxy[i]-cx*cy*inv2c+gxy[i]*f_val)*inv2c; - Rxz[i] += (fxz[i]-cx*cz*inv2c+gxz[i]*f_val)*inv2c; - Ryz[i] += (fyz[i]-cy*cz*inv2c+gyz[i]*f_val)*inv2c; - } -} - -/* Phase 15: trK_rhs, Aij_rhs, gauge (after fdderivs(Lap) and fderivs(chi)) - * Also updates Christoffel with physical chi correction, computes Lap_rhs, beta_rhs, dtSf_rhs. - */ -__global__ __launch_bounds__(128, 4) -void kern_phase15_trK_Aij_gauge( - const double* __restrict__ alpn1, const double* __restrict__ chin1, - const double* __restrict__ chix, const double* __restrict__ chiy, - const double* __restrict__ chiz, - const double* __restrict__ gxx, const double* __restrict__ gxy, - const double* __restrict__ gxz, const double* __restrict__ gyy, - const double* __restrict__ gyz, const double* __restrict__ gzz, - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ trK, + - Ga_x*betaxx[i] - Ga_y*betaxy[i] - Ga_z*betaxz[i] + + F1o3*(uxx*fxx_v+uxy*fxy_v+uxz*fxz_v) + + uxx*bxx_xx[i]+uyy*bxx_yy[i]+uzz*bxx_zz[i] + + TWO*(uxy*bxx_xy[i]+uxz*bxx_xz[i]+uyz*bxx_yz[i]); + Gamy_rhs[i] += F2o3*Ga_y*db + - Ga_x*betayx[i] - Ga_y*betayy[i] - Ga_z*betayz[i] + + F1o3*(uxy*fxx_v+uyy*fxy_v+uyz*fxz_v) + + uxx*bxy_xx[i]+uyy*bxy_yy[i]+uzz*bxy_zz[i] + + TWO*(uxy*bxy_xy[i]+uxz*bxy_xz[i]+uyz*bxy_yz[i]); + Gamz_rhs[i] += F2o3*Ga_z*db + - Ga_x*betazx[i] - Ga_y*betazy[i] - Ga_z*betazz[i] + + F1o3*(uxz*fxx_v+uyz*fxy_v+uzz*fxz_v) + + uxx*bxz_xx[i]+uyy*bxz_yy[i]+uzz*bxz_zz[i] + + TWO*(uxy*bxz_xy[i]+uxz*bxz_xz[i]+uyz*bxz_yz[i]); + } +} + +/* Phase 9: Christoffel contract — compute g_{ia} Gamma^a_{bc} products + * Overwrites gxxx..gzzz with lowered Christoffel products needed for Ricci. + */ +__global__ __launch_bounds__(128, 4) +void kern_phase9_christoffel_contract( + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + /* output: lowered products g_{ia} Gamma^a_{bc} */ + double* __restrict__ o_gxxx, double* __restrict__ o_gxyx, + double* __restrict__ o_gxzx, double* __restrict__ o_gyyx, + double* __restrict__ o_gyzx, double* __restrict__ o_gzzx, + double* __restrict__ o_gxxy, double* __restrict__ o_gxyy, + double* __restrict__ o_gxzy, double* __restrict__ o_gyyy, + double* __restrict__ o_gyzy, double* __restrict__ o_gzzy, + double* __restrict__ o_gxxz, double* __restrict__ o_gxyz, + double* __restrict__ o_gxzz, double* __restrict__ o_gyyz, + double* __restrict__ o_gyzz, double* __restrict__ o_gzzz) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double g11=gxx[i],g12=gxy[i],g13=gxz[i]; + double g22=gyy[i],g23=gyz[i],g33=gzz[i]; + /* row x: g_{x,a} Gamma^a_{bc} */ + o_gxxx[i]=g11*Gxxx[i]+g12*Gyxx[i]+g13*Gzxx[i]; + o_gxyx[i]=g11*Gxxy[i]+g12*Gyxy[i]+g13*Gzxy[i]; + o_gxzx[i]=g11*Gxxz[i]+g12*Gyxz[i]+g13*Gzxz[i]; + o_gyyx[i]=g11*Gxyy[i]+g12*Gyyy[i]+g13*Gzyy[i]; + o_gyzx[i]=g11*Gxyz[i]+g12*Gyyz[i]+g13*Gzyz[i]; + o_gzzx[i]=g11*Gxzz[i]+g12*Gyzz[i]+g13*Gzzz[i]; + /* row y: g_{y,a} Gamma^a_{bc} */ + o_gxxy[i]=g12*Gxxx[i]+g22*Gyxx[i]+g23*Gzxx[i]; + o_gxyy[i]=g12*Gxxy[i]+g22*Gyxy[i]+g23*Gzxy[i]; + o_gxzy[i]=g12*Gxxz[i]+g22*Gyxz[i]+g23*Gzxz[i]; + o_gyyy[i]=g12*Gxyy[i]+g22*Gyyy[i]+g23*Gzyy[i]; + o_gyzy[i]=g12*Gxyz[i]+g22*Gyyz[i]+g23*Gzyz[i]; + o_gzzy[i]=g12*Gxzz[i]+g22*Gyzz[i]+g23*Gzzz[i]; + /* row z: g_{z,a} Gamma^a_{bc} */ + o_gxxz[i]=g13*Gxxx[i]+g23*Gyxx[i]+g33*Gzxx[i]; + o_gxyz[i]=g13*Gxxy[i]+g23*Gyxy[i]+g33*Gzxy[i]; + o_gxzz[i]=g13*Gxxz[i]+g23*Gyxz[i]+g33*Gzxz[i]; + o_gyyz[i]=g13*Gxyy[i]+g23*Gyyy[i]+g33*Gzyy[i]; + o_gyzz[i]=g13*Gxyz[i]+g23*Gyyz[i]+g33*Gzyz[i]; + o_gzzz[i]=g13*Gxzz[i]+g23*Gyzz[i]+g33*Gzzz[i]; + } +} + +/* Phase 10: After fdderivs of a metric component, contract with gup^{ij} + * R_comp = gup^xx*fxx + gup^yy*fyy + gup^zz*fzz + 2*(gup^xy*fxy + gup^xz*fxz + gup^yz*fyz) + */ +__global__ void kern_phase10_ricci_contract( + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ fxx, const double* __restrict__ fxy, + const double* __restrict__ fxz, const double* __restrict__ fyy, + const double* __restrict__ fyz, const double* __restrict__ fzz, + double* __restrict__ R_comp) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + R_comp[i] = gupxx[i]*fxx[i] + gupyy[i]*fyy[i] + gupzz[i]*fzz[i] + + 2.0*(gupxy[i]*fxy[i] + gupxz[i]*fxz[i] + gupyz[i]*fyz[i]); + } +} + +/* Phase 11a: Ricci diagonal assembly (Rxx, Ryy, Rzz) */ +__global__ __launch_bounds__(128, 4) +void kern_phase11_ricci_diag( + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Gamxa, const double* __restrict__ Gamya, + const double* __restrict__ Gamza, + const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, + const double* __restrict__ Gamxz, + const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, + const double* __restrict__ Gamyz_d, + const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, + const double* __restrict__ Gamzz_d, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + /* lowered Christoffel products */ + const double* __restrict__ lxxx, const double* __restrict__ lxyx, + const double* __restrict__ lxzx, const double* __restrict__ lyyx, + const double* __restrict__ lyzx, const double* __restrict__ lzzx, + const double* __restrict__ lxxy, const double* __restrict__ lxyy, + const double* __restrict__ lxzy, const double* __restrict__ lyyy, + const double* __restrict__ lyzy, const double* __restrict__ lzzy, + const double* __restrict__ lxxz, const double* __restrict__ lxyz, + const double* __restrict__ lxzz, const double* __restrict__ lyyz, + const double* __restrict__ lyzz, const double* __restrict__ lzzz, + double* __restrict__ Rxx, double* __restrict__ Ryy, double* __restrict__ Rzz) +{ + const double H = 0.5, TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + /* Rxx */ + Rxx[i] = -H*Rxx[i] + + gxx[i]*Gamxx[i]+gxy[i]*Gamyx[i]+gxz[i]*Gamzx[i] + + Gamxa[i]*lxxx[i]+Gamya[i]*lxyx[i]+Gamza[i]*lxzx[i] + + uxx*(TWO*(Gxxx[i]*lxxx[i]+Gyxx[i]*lxyx[i]+Gzxx[i]*lxzx[i]) + +(Gxxx[i]*lxxx[i]+Gyxx[i]*lxxy[i]+Gzxx[i]*lxxz[i])) + + uxy*(TWO*(Gxxx[i]*lxyx[i]+Gyxx[i]*lyyx[i]+Gzxx[i]*lyzx[i] + +Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i]) + +(Gxxy[i]*lxxx[i]+Gyxy[i]*lxxy[i]+Gzxy[i]*lxxz[i]) + +(Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i])) + + uxz*(TWO*(Gxxx[i]*lxzx[i]+Gyxx[i]*lyzx[i]+Gzxx[i]*lzzx[i] + +Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i]) + +(Gxxz[i]*lxxx[i]+Gyxz[i]*lxxy[i]+Gzxz[i]*lxxz[i]) + +(Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i])) + + uyy*(TWO*(Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i]) + +(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i])) + + uyz*(TWO*(Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i] + +Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i]) + +(Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i]) + +(Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i])) + + uzz*(TWO*(Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i]) + +(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i])); + + /* Ryy */ + Ryy[i] = -H*Ryy[i] + + gxy[i]*Gamxy[i]+gyy[i]*Gamyy_d[i]+gyz[i]*Gamzy[i] + + Gamxa[i]*lxyy[i]+Gamya[i]*lyyy[i]+Gamza[i]*lyzy[i] + + uxx*(TWO*(Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i]) + +(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i])) + + uxy*(TWO*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i] + +Gxyy[i]*lxxy[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxzy[i]) + +(Gxyy[i]*lxyx[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxyz[i]) + +(Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i])) + + uxz*(TWO*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i] + +Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i]) + +(Gxyz[i]*lxyx[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxyz[i]) + +(Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])) + + uyy*(TWO*(Gxyy[i]*lxyy[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyzy[i]) + +(Gxyy[i]*lyyx[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyyz[i])) + + uyz*(TWO*(Gxyy[i]*lxzy[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lzzy[i] + +Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i]) + +(Gxyz[i]*lyyx[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyyz[i]) + +(Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i])) + + uzz*(TWO*(Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i]) + +(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i])); + + /* Rzz */ + Rzz[i] = -H*Rzz[i] + + gxz[i]*Gamxz[i]+gyz[i]*Gamyz_d[i]+gzz[i]*Gamzz_d[i] + + Gamxa[i]*lxzz[i]+Gamya[i]*lyzz[i]+Gamza[i]*lzzz[i] + + uxx*(TWO*(Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i]) + +(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i])) + + uxy*(TWO*(Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i] + +Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i]) + +(Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i]) + +(Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i])) + + uxz*(TWO*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i] + +Gxzz[i]*lxxz[i]+Gyzz[i]*lxyz[i]+Gzzz[i]*lxzz[i]) + +(Gxzz[i]*lxzx[i]+Gyzz[i]*lxzy[i]+Gzzz[i]*lxzz[i]) + +(Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i])) + + uyy*(TWO*(Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i]) + +(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i])) + + uyz*(TWO*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i] + +Gxzz[i]*lxyz[i]+Gyzz[i]*lyyz[i]+Gzzz[i]*lyzz[i]) + +(Gxzz[i]*lyzx[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lyzz[i]) + +(Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i])) + + uzz*(TWO*(Gxzz[i]*lxzz[i]+Gyzz[i]*lyzz[i]+Gzzz[i]*lzzz[i]) + +(Gxzz[i]*lzzx[i]+Gyzz[i]*lzzy[i]+Gzzz[i]*lzzz[i])); + } +} + +/* Phase 11b: Ricci off-diagonal assembly (Rxy, Rxz, Ryz) */ +__global__ __launch_bounds__(128, 4) +void kern_phase11_ricci_offdiag( + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Gamxa, const double* __restrict__ Gamya, + const double* __restrict__ Gamza, + const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, + const double* __restrict__ Gamxz, + const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, + const double* __restrict__ Gamyz_d, + const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, + const double* __restrict__ Gamzz_d, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + const double* __restrict__ lxxx, const double* __restrict__ lxyx, + const double* __restrict__ lxzx, const double* __restrict__ lyyx, + const double* __restrict__ lyzx, const double* __restrict__ lzzx, + const double* __restrict__ lxxy, const double* __restrict__ lxyy, + const double* __restrict__ lxzy, const double* __restrict__ lyyy, + const double* __restrict__ lyzy, const double* __restrict__ lzzy, + const double* __restrict__ lxxz, const double* __restrict__ lxyz, + const double* __restrict__ lxzz, const double* __restrict__ lyyz, + const double* __restrict__ lyzz, const double* __restrict__ lzzz, + double* __restrict__ Rxy, double* __restrict__ Rxz, double* __restrict__ Ryz) +{ + const double H = 0.5, TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + + /* Rxy */ + Rxy[i] = H*( + -Rxy[i] + +gxx[i]*Gamxy[i]+gxy[i]*Gamyy_d[i]+gxz[i]*Gamzy[i] + +gxy[i]*Gamxx[i]+gyy[i]*Gamyx[i]+gyz[i]*Gamzx[i] + +Gamxa[i]*lxyx[i]+Gamya[i]*lyyx[i]+Gamza[i]*lyzx[i] + +Gamxa[i]*lxxy[i]+Gamya[i]*lxyy[i]+Gamza[i]*lxzy[i]) + +uxx*(Gxxx[i]*lxxy[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxzy[i] + +Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i] + +Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i]) + +uxy*(Gxxx[i]*lxyy[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyzy[i] + +Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i] + +Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i] + +Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i] + +Gxyy[i]*lxxx[i]+Gyyy[i]*lxyx[i]+Gzyy[i]*lxzx[i] + +Gxxx[i]*lyyx[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyyz[i]) + +uxz*(Gxxx[i]*lxzy[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lzzy[i] + +Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i] + +Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i] + +Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i] + +Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i] + +Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i]) + +uyy*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i] + +Gxyy[i]*lxyx[i]+Gyyy[i]*lyyx[i]+Gzyy[i]*lyzx[i] + +Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i]) + +uyz*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i] + +Gxyy[i]*lxzx[i]+Gyyy[i]*lyzx[i]+Gzyy[i]*lzzx[i] + +Gxxz[i]*lyyx[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyyz[i] + +Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i] + +Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i] + +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) + +uzz*(Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i] + +Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i] + +Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]); + + /* Rxz */ + Rxz[i] = H*( + -Rxz[i] + +gxx[i]*Gamxz[i]+gxy[i]*Gamyz_d[i]+gxz[i]*Gamzz_d[i] + +gxz[i]*Gamxx[i]+gyz[i]*Gamyx[i]+gzz[i]*Gamzx[i] + +Gamxa[i]*lxzx[i]+Gamya[i]*lyzx[i]+Gamza[i]*lzzx[i] + +Gamxa[i]*lxxz[i]+Gamya[i]*lxyz[i]+Gamza[i]*lxzz[i]) + +uxx*(Gxxx[i]*lxxz[i]+Gyxx[i]*lxyz[i]+Gzxx[i]*lxzz[i] + +Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i] + +Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i]) + +uxy*(Gxxx[i]*lxyz[i]+Gyxx[i]*lyyz[i]+Gzxx[i]*lyzz[i] + +Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i] + +Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i] + +Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i] + +Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i] + +Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i]) + +uxz*(Gxxx[i]*lxzz[i]+Gyxx[i]*lyzz[i]+Gzxx[i]*lzzz[i] + +Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i] + +Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i] + +Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i] + +Gxzz[i]*lxxx[i]+Gyzz[i]*lxyx[i]+Gzzz[i]*lxzx[i] + +Gxxx[i]*lzzx[i]+Gyxx[i]*lzzy[i]+Gzxx[i]*lzzz[i]) + +uyy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i] + +Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i] + +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) + +uyz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i] + +Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i] + +Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i] + +Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i] + +Gxzz[i]*lxyx[i]+Gyzz[i]*lyyx[i]+Gzzz[i]*lyzx[i] + +Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i]) + +uzz*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i] + +Gxzz[i]*lxzx[i]+Gyzz[i]*lyzx[i]+Gzzz[i]*lzzx[i] + +Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i]); + + /* Ryz */ + Ryz[i] = H*( + -Ryz[i] + +gxy[i]*Gamxz[i]+gyy[i]*Gamyz_d[i]+gyz[i]*Gamzz_d[i] + +gxz[i]*Gamxy[i]+gyz[i]*Gamyy_d[i]+gzz[i]*Gamzy[i] + +Gamxa[i]*lxzy[i]+Gamya[i]*lyzy[i]+Gamza[i]*lzzy[i] + +Gamxa[i]*lxyz[i]+Gamya[i]*lyyz[i]+Gamza[i]*lyzz[i]) + +uxx*(Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i] + +Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i] + +Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i]) + +uxy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i] + +Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i] + +Gxyy[i]*lxzx[i]+Gyyy[i]*lxzy[i]+Gzyy[i]*lxzz[i] + +Gxyy[i]*lxxz[i]+Gyyy[i]*lxyz[i]+Gzyy[i]*lxzz[i] + +Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i] + +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) + +uxz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i] + +Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i] + +Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i] + +Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i] + +Gxzz[i]*lxxy[i]+Gyzz[i]*lxyy[i]+Gzzz[i]*lxzy[i] + +Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i]) + +uyy*(Gxyy[i]*lxyz[i]+Gyyy[i]*lyyz[i]+Gzyy[i]*lyzz[i] + +Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i] + +Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i]) + +uyz*(Gxyy[i]*lxzz[i]+Gyyy[i]*lyzz[i]+Gzyy[i]*lzzz[i] + +Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i] + +Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i] + +Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i] + +Gxzz[i]*lxyy[i]+Gyzz[i]*lyyy[i]+Gzzz[i]*lyzy[i] + +Gxyy[i]*lzzx[i]+Gyyy[i]*lzzy[i]+Gzyy[i]*lzzz[i]) + +uzz*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i] + +Gxzz[i]*lxzy[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lzzy[i] + +Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i]); + } +} + +/* Phase 13: chi correction to Ricci tensor + * After fdderivs(chi), subtract Christoffel*chi_deriv, compute conformal factor f, + * then add chi contribution to Rxx..Rzz. + */ +__global__ __launch_bounds__(128, 4) +void kern_phase13_chi_correction( + const double* __restrict__ chin1, + const double* __restrict__ chix, const double* __restrict__ chiy, + const double* __restrict__ chiz, + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + double* __restrict__ fxx, double* __restrict__ fxy, + double* __restrict__ fxz, double* __restrict__ fyy, + double* __restrict__ fyz, double* __restrict__ fzz, + double* __restrict__ Rxx, double* __restrict__ Rxy, + double* __restrict__ Rxz, double* __restrict__ Ryy, + double* __restrict__ Ryz, double* __restrict__ Rzz) +{ + const double H=0.5, TWO=2.0, F3o2=1.5; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double cx=chix[i],cy=chiy[i],cz=chiz[i],c1=chin1[i]; + /* subtract Christoffel * chi_deriv */ + fxx[i] -= Gxxx[i]*cx+Gyxx[i]*cy+Gzxx[i]*cz; + fxy[i] -= Gxxy[i]*cx+Gyxy[i]*cy+Gzxy[i]*cz; + fxz[i] -= Gxxz[i]*cx+Gyxz[i]*cy+Gzxz[i]*cz; + fyy[i] -= Gxyy[i]*cx+Gyyy[i]*cy+Gzyy[i]*cz; + fyz[i] -= Gxyz[i]*cx+Gyyz[i]*cy+Gzyz[i]*cz; + fzz[i] -= Gxzz[i]*cx+Gyzz[i]*cy+Gzzz[i]*cz; + + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + double f_val = uxx*(fxx[i]-F3o2/c1*cx*cx) + + uyy*(fyy[i]-F3o2/c1*cy*cy) + + uzz*(fzz[i]-F3o2/c1*cz*cz) + + TWO*uxy*(fxy[i]-F3o2/c1*cx*cy) + + TWO*uxz*(fxz[i]-F3o2/c1*cx*cz) + + TWO*uyz*(fyz[i]-F3o2/c1*cy*cz); + + double inv2c = 1.0/(c1*TWO); + Rxx[i] += (fxx[i]-cx*cx*inv2c+gxx[i]*f_val)*inv2c; + Ryy[i] += (fyy[i]-cy*cy*inv2c+gyy[i]*f_val)*inv2c; + Rzz[i] += (fzz[i]-cz*cz*inv2c+gzz[i]*f_val)*inv2c; + Rxy[i] += (fxy[i]-cx*cy*inv2c+gxy[i]*f_val)*inv2c; + Rxz[i] += (fxz[i]-cx*cz*inv2c+gxz[i]*f_val)*inv2c; + Ryz[i] += (fyz[i]-cy*cz*inv2c+gyz[i]*f_val)*inv2c; + } +} + +/* Phase 15: trK_rhs, Aij_rhs, gauge (after fdderivs(Lap) and fderivs(chi)) + * Also updates Christoffel with physical chi correction, computes Lap_rhs, beta_rhs, dtSf_rhs. + */ +__global__ __launch_bounds__(128, 4) +void kern_phase15_trK_Aij_gauge( + const double* __restrict__ alpn1, const double* __restrict__ chin1, + const double* __restrict__ chix, const double* __restrict__ chiy, + const double* __restrict__ chiz, + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ trK, const double* __restrict__ Axx, const double* __restrict__ Axy, const double* __restrict__ Axz, const double* __restrict__ Ayy, const double* __restrict__ Ayz, const double* __restrict__ Azz, const double* __restrict__ Lapx, const double* __restrict__ Lapy, const double* __restrict__ Lapz, const double* __restrict__ betaxx, const double* __restrict__ betaxy, - const double* __restrict__ betaxz, const double* __restrict__ betayx, - const double* __restrict__ betayy, const double* __restrict__ betayz, - const double* __restrict__ betazx, const double* __restrict__ betazy, - const double* __restrict__ betazz, - const double* __restrict__ rho, - const double* __restrict__ Sx_m, const double* __restrict__ Sy_m, - const double* __restrict__ Sz_m, - const double* __restrict__ Sxx_m, const double* __restrict__ Sxy_m, - const double* __restrict__ Sxz_m, const double* __restrict__ Syy_m, - const double* __restrict__ Syz_m, const double* __restrict__ Szz_m, - const double* __restrict__ dtSfx, const double* __restrict__ dtSfy, - const double* __restrict__ dtSfz, - const double* __restrict__ Rxx, const double* __restrict__ Rxy, - const double* __restrict__ Rxz, const double* __restrict__ Ryy, - const double* __restrict__ Ryz, const double* __restrict__ Rzz, - double* __restrict__ Gxxx, double* __restrict__ Gxxy, - double* __restrict__ Gxxz, double* __restrict__ Gxyy, - double* __restrict__ Gxyz_o, double* __restrict__ Gxzz, - double* __restrict__ Gyxx, double* __restrict__ Gyxy, - double* __restrict__ Gyxz, double* __restrict__ Gyyy, - double* __restrict__ Gyyz, double* __restrict__ Gyzz, - double* __restrict__ Gzxx, double* __restrict__ Gzxy, - double* __restrict__ Gzxz, double* __restrict__ Gzyy, - double* __restrict__ Gzyz, double* __restrict__ Gzzz, - /* fxx..fzz = fdderivs(Lap) output */ - double* __restrict__ fxx, double* __restrict__ fxy, - double* __restrict__ fxz, double* __restrict__ fyy, - double* __restrict__ fyz, double* __restrict__ fzz, - /* dtSfx_rhs..dtSfz_rhs = fderivs(chi) output, then overwritten */ - double* __restrict__ dtSfx_rhs, double* __restrict__ dtSfy_rhs, - double* __restrict__ dtSfz_rhs, - double* __restrict__ trK_rhs, - double* __restrict__ Axx_rhs, double* __restrict__ Axy_rhs, - double* __restrict__ Axz_rhs, double* __restrict__ Ayy_rhs, - double* __restrict__ Ayz_rhs, double* __restrict__ Azz_rhs, - double* __restrict__ Lap_rhs, - double* __restrict__ betax_rhs, double* __restrict__ betay_rhs, - double* __restrict__ betaz_rhs, - double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, - double* __restrict__ Gamz_rhs, - double* __restrict__ f_arr, double* __restrict__ S_arr) -{ - const double TWO=2.0, FOUR=4.0, EIGHT=8.0, H=0.5; - const double F1o3=1.0/3.0, F2o3=2.0/3.0, F3o2=1.5; - const double PI_V=3.14159265358979323846; - const double F16=16.0, F8=8.0; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - double a=alpn1[i], c1=chin1[i]; - double cx=chix[i],cy=chiy[i],cz=chiz[i]; - double lx=Lapx[i],ly=Lapy[i],lz=Lapz[i]; - - /* raised chi/chi */ - double gx=(uxx*cx+uxy*cy+uxz*cz)/c1; - double gy=(uxy*cx+uyy*cy+uyz*cz)/c1; - double gz=(uxz*cx+uyz*cy+uzz*cz)/c1; - - /* Christoffel physical correction */ - Gxxx[i]-=((cx+cx)/c1-gxx[i]*gx)*H; - Gyxx[i]-=(0.0-gxx[i]*gy)*H; - Gzxx[i]-=(0.0-gxx[i]*gz)*H; - Gxyy[i]-=(0.0-gyy[i]*gx)*H; - Gyyy[i]-=((cy+cy)/c1-gyy[i]*gy)*H; - Gzyy[i]-=(0.0-gyy[i]*gz)*H; - Gxzz[i]-=(0.0-gzz[i]*gx)*H; - Gyzz[i]-=(0.0-gzz[i]*gy)*H; - Gzzz[i]-=((cz+cz)/c1-gzz[i]*gz)*H; - Gxxy[i]-=(cy/c1-gxy[i]*gx)*H; - Gyxy[i]-=(cx/c1-gxy[i]*gy)*H; - Gzxy[i]-=(0.0-gxy[i]*gz)*H; - Gxxz[i]-=(cz/c1-gxz[i]*gx)*H; - Gyxz[i]-=(0.0-gxz[i]*gy)*H; - Gzxz[i]-=(cx/c1-gxz[i]*gz)*H; - Gxyz_o[i]-=(0.0-gyz[i]*gx)*H; - Gyyz[i]-=(cz/c1-gyz[i]*gy)*H; - Gzyz[i]-=(cy/c1-gyz[i]*gz)*H; - - /* fxx..fzz correction: subtract Gamma*Lap_deriv */ - fxx[i]-=Gxxx[i]*lx+Gyxx[i]*ly+Gzxx[i]*lz; - fyy[i]-=Gxyy[i]*lx+Gyyy[i]*ly+Gzyy[i]*lz; - fzz[i]-=Gxzz[i]*lx+Gyzz[i]*ly+Gzzz[i]*lz; - fxy[i]-=Gxxy[i]*lx+Gyxy[i]*ly+Gzxy[i]*lz; - fxz[i]-=Gxxz[i]*lx+Gyxz[i]*ly+Gzxz[i]*lz; - fyz[i]-=Gxyz_o[i]*lx+Gyyz[i]*ly+Gzyz[i]*lz; - - /* D^i D_i alpha */ - double DDA = uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i] - +TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i]); - - /* trace of S_ij (physical) */ - double S_v = c1*(uxx*Sxx_m[i]+uyy*Syy_m[i]+uzz*Szz_m[i] - +TWO*(uxy*Sxy_m[i]+uxz*Sxz_m[i]+uyz*Syz_m[i])); - - /* A^ij A_ij */ - double AijAij = - uxx*(uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] - +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i])) - +uyy*(uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] - +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i])) - +uzz*(uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] - +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i])) - +TWO*( - uxy*(uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] - +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) - +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) - +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i])) - +uxz*(uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] - +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) - +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) - +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i])) - +uyz*(uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] - +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) - +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) - +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]))); - + const double* __restrict__ betaxz, const double* __restrict__ betayx, + const double* __restrict__ betayy, const double* __restrict__ betayz, + const double* __restrict__ betazx, const double* __restrict__ betazy, + const double* __restrict__ betazz, + const double* __restrict__ rho, + const double* __restrict__ Sx_m, const double* __restrict__ Sy_m, + const double* __restrict__ Sz_m, + const double* __restrict__ Sxx_m, const double* __restrict__ Sxy_m, + const double* __restrict__ Sxz_m, const double* __restrict__ Syy_m, + const double* __restrict__ Syz_m, const double* __restrict__ Szz_m, + const double* __restrict__ dtSfx, const double* __restrict__ dtSfy, + const double* __restrict__ dtSfz, + const double* __restrict__ Rxx, const double* __restrict__ Rxy, + const double* __restrict__ Rxz, const double* __restrict__ Ryy, + const double* __restrict__ Ryz, const double* __restrict__ Rzz, + double* __restrict__ Gxxx, double* __restrict__ Gxxy, + double* __restrict__ Gxxz, double* __restrict__ Gxyy, + double* __restrict__ Gxyz_o, double* __restrict__ Gxzz, + double* __restrict__ Gyxx, double* __restrict__ Gyxy, + double* __restrict__ Gyxz, double* __restrict__ Gyyy, + double* __restrict__ Gyyz, double* __restrict__ Gyzz, + double* __restrict__ Gzxx, double* __restrict__ Gzxy, + double* __restrict__ Gzxz, double* __restrict__ Gzyy, + double* __restrict__ Gzyz, double* __restrict__ Gzzz, + /* fxx..fzz = fdderivs(Lap) output */ + double* __restrict__ fxx, double* __restrict__ fxy, + double* __restrict__ fxz, double* __restrict__ fyy, + double* __restrict__ fyz, double* __restrict__ fzz, + /* dtSfx_rhs..dtSfz_rhs = fderivs(chi) output, then overwritten */ + double* __restrict__ dtSfx_rhs, double* __restrict__ dtSfy_rhs, + double* __restrict__ dtSfz_rhs, + double* __restrict__ trK_rhs, + double* __restrict__ Axx_rhs, double* __restrict__ Axy_rhs, + double* __restrict__ Axz_rhs, double* __restrict__ Ayy_rhs, + double* __restrict__ Ayz_rhs, double* __restrict__ Azz_rhs, + double* __restrict__ Lap_rhs, + double* __restrict__ betax_rhs, double* __restrict__ betay_rhs, + double* __restrict__ betaz_rhs, + double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, + double* __restrict__ Gamz_rhs, + double* __restrict__ f_arr, double* __restrict__ S_arr) +{ + const double TWO=2.0, FOUR=4.0, EIGHT=8.0, H=0.5; + const double F1o3=1.0/3.0, F2o3=2.0/3.0, F3o2=1.5; + const double PI_V=3.14159265358979323846; + const double F16=16.0, F8=8.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + double a=alpn1[i], c1=chin1[i]; + double cx=chix[i],cy=chiy[i],cz=chiz[i]; + double lx=Lapx[i],ly=Lapy[i],lz=Lapz[i]; + + /* raised chi/chi */ + double gx=(uxx*cx+uxy*cy+uxz*cz)/c1; + double gy=(uxy*cx+uyy*cy+uyz*cz)/c1; + double gz=(uxz*cx+uyz*cy+uzz*cz)/c1; + + /* Christoffel physical correction */ + Gxxx[i]-=((cx+cx)/c1-gxx[i]*gx)*H; + Gyxx[i]-=(0.0-gxx[i]*gy)*H; + Gzxx[i]-=(0.0-gxx[i]*gz)*H; + Gxyy[i]-=(0.0-gyy[i]*gx)*H; + Gyyy[i]-=((cy+cy)/c1-gyy[i]*gy)*H; + Gzyy[i]-=(0.0-gyy[i]*gz)*H; + Gxzz[i]-=(0.0-gzz[i]*gx)*H; + Gyzz[i]-=(0.0-gzz[i]*gy)*H; + Gzzz[i]-=((cz+cz)/c1-gzz[i]*gz)*H; + Gxxy[i]-=(cy/c1-gxy[i]*gx)*H; + Gyxy[i]-=(cx/c1-gxy[i]*gy)*H; + Gzxy[i]-=(0.0-gxy[i]*gz)*H; + Gxxz[i]-=(cz/c1-gxz[i]*gx)*H; + Gyxz[i]-=(0.0-gxz[i]*gy)*H; + Gzxz[i]-=(cx/c1-gxz[i]*gz)*H; + Gxyz_o[i]-=(0.0-gyz[i]*gx)*H; + Gyyz[i]-=(cz/c1-gyz[i]*gy)*H; + Gzyz[i]-=(cy/c1-gyz[i]*gz)*H; + + /* fxx..fzz correction: subtract Gamma*Lap_deriv */ + fxx[i]-=Gxxx[i]*lx+Gyxx[i]*ly+Gzxx[i]*lz; + fyy[i]-=Gxyy[i]*lx+Gyyy[i]*ly+Gzyy[i]*lz; + fzz[i]-=Gxzz[i]*lx+Gyzz[i]*ly+Gzzz[i]*lz; + fxy[i]-=Gxxy[i]*lx+Gyxy[i]*ly+Gzxy[i]*lz; + fxz[i]-=Gxxz[i]*lx+Gyxz[i]*ly+Gzxz[i]*lz; + fyz[i]-=Gxyz_o[i]*lx+Gyyz[i]*ly+Gzyz[i]*lz; + + /* D^i D_i alpha */ + double DDA = uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i] + +TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i]); + + /* trace of S_ij (physical) */ + double S_v = c1*(uxx*Sxx_m[i]+uyy*Syy_m[i]+uzz*Szz_m[i] + +TWO*(uxy*Sxy_m[i]+uxz*Sxz_m[i]+uyz*Syz_m[i])); + + /* A^ij A_ij */ + double AijAij = + uxx*(uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] + +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i])) + +uyy*(uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] + +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i])) + +uzz*(uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] + +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i])) + +TWO*( + uxy*(uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] + +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) + +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) + +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i])) + +uxz*(uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] + +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) + +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) + +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i])) + +uyz*(uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] + +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) + +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) + +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]))); + double trK_v = trK[i]; double db = betaxx[i] + betayy[i] + betazz[i]; - - /* trK_rhs step 1: store D^iD_i alpha * chin1 */ - trK_rhs[i] = c1 * DDA; - - /* f_arr = -(1/3) * (DDA + alpha/chi * (2/3*K^2 - AijAij - 16pi*rho + 8pi*S)) */ - double f_v = F2o3*trK_v*trK_v - AijAij - F16*PI_V*rho[i] + EIGHT*PI_V*S_v; - f_arr[i] = -F1o3*(uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i] - +TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i]) - +(a/c1)*f_v); - - /* fij = alpha*(Rij - 8pi*Sij) - D_iD_j alpha */ - double fxx_v=a*(Rxx[i]-EIGHT*PI_V*Sxx_m[i])-fxx[i]; - double fxy_v=a*(Rxy[i]-EIGHT*PI_V*Sxy_m[i])-fxy[i]; - double fxz_v=a*(Rxz[i]-EIGHT*PI_V*Sxz_m[i])-fxz[i]; - double fyy_v=a*(Ryy[i]-EIGHT*PI_V*Syy_m[i])-fyy[i]; - double fyz_v=a*(Ryz[i]-EIGHT*PI_V*Syz_m[i])-fyz[i]; - double fzz_v=a*(Rzz[i]-EIGHT*PI_V*Szz_m[i])-fzz[i]; - - /* Aij_rhs = chi*(fij - gij*f) */ - Axx_rhs[i]=fxx_v-gxx[i]*f_arr[i]; - Ayy_rhs[i]=fyy_v-gyy[i]*f_arr[i]; - Azz_rhs[i]=fzz_v-gzz[i]*f_arr[i]; - Axy_rhs[i]=fxy_v-gxy[i]*f_arr[i]; - Axz_rhs[i]=fxz_v-gxz[i]*f_arr[i]; - Ayz_rhs[i]=fyz_v-gyz[i]*f_arr[i]; - - /* A_il A^l_j */ - double AA_xx=uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] - +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i]); - double AA_yy=uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] - +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i]); - double AA_zz=uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] - +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i]); - double AA_xy=uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] - +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) - +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) - +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i]); - double AA_xz=uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] - +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) - +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) - +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i]); - double AA_yz=uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] - +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) - +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) - +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]); - - /* trK_rhs final */ - trK_rhs[i] = -trK_rhs[i] - + a*(F1o3*trK_v*trK_v - +uxx*AA_xx+uyy*AA_yy+uzz*AA_zz - +TWO*(uxy*AA_xy+uxz*AA_xz+uyz*AA_yz) - +FOUR*PI_V*(rho[i]+S_v)); - - /* Aij_rhs final */ - Axx_rhs[i]=c1*Axx_rhs[i]+a*(trK_v*Axx[i]-TWO*AA_xx) - +TWO*(Axx[i]*betaxx[i]+Axy[i]*betayx[i]+Axz[i]*betazx[i])-F2o3*Axx[i]*db; - Ayy_rhs[i]=c1*Ayy_rhs[i]+a*(trK_v*Ayy[i]-TWO*AA_yy) - +TWO*(Axy[i]*betaxy[i]+Ayy[i]*betayy[i]+Ayz[i]*betazy[i])-F2o3*Ayy[i]*db; - Azz_rhs[i]=c1*Azz_rhs[i]+a*(trK_v*Azz[i]-TWO*AA_zz) - +TWO*(Axz[i]*betaxz[i]+Ayz[i]*betayz[i]+Azz[i]*betazz[i])-F2o3*Azz[i]*db; - Axy_rhs[i]=c1*Axy_rhs[i]+a*(trK_v*Axy[i]-TWO*AA_xy) - +Axx[i]*betaxy[i]+Axz[i]*betazy[i]+Ayy[i]*betayx[i] - +Ayz[i]*betazx[i]+F1o3*Axy[i]*db-Axy[i]*betazz[i]; - Ayz_rhs[i]=c1*Ayz_rhs[i]+a*(trK_v*Ayz[i]-TWO*AA_yz) - +Axy[i]*betaxz[i]+Ayy[i]*betayz[i]+Axz[i]*betaxy[i] - +Azz[i]*betazy[i]+F1o3*Ayz[i]*db-Ayz[i]*betaxx[i]; - Axz_rhs[i]=c1*Axz_rhs[i]+a*(trK_v*Axz[i]-TWO*AA_xz) - +Axx[i]*betaxz[i]+Axy[i]*betayz[i]+Ayz[i]*betayx[i] - +Azz[i]*betazx[i]+F1o3*Axz[i]*db-Axz[i]*betayy[i]; - - /* gauge */ - Lap_rhs[i] = -TWO*a*trK_v; - betax_rhs[i] = 0.75*dtSfx[i]; - betay_rhs[i] = 0.75*dtSfy[i]; - betaz_rhs[i] = 0.75*dtSfz[i]; -#if (GAUGE == 0) - dtSfx_rhs[i] = Gamx_rhs[i] - 2.0*dtSfx[i]; - dtSfy_rhs[i] = Gamy_rhs[i] - 2.0*dtSfy[i]; - dtSfz_rhs[i] = Gamz_rhs[i] - 2.0*dtSfz[i]; -#endif - } -} - -/* Phase 18: Hamilton & momentum constraints (co==0 only) */ -__global__ __launch_bounds__(128, 4) -void kern_phase18_constraints( - const double* __restrict__ chin1, - const double* __restrict__ chix, const double* __restrict__ chiy, - const double* __restrict__ chiz, - const double* __restrict__ gupxx, const double* __restrict__ gupxy, - const double* __restrict__ gupxz, const double* __restrict__ gupyy, - const double* __restrict__ gupyz, const double* __restrict__ gupzz, - const double* __restrict__ trK, - const double* __restrict__ Axx, const double* __restrict__ Axy, - const double* __restrict__ Axz, const double* __restrict__ Ayy, - const double* __restrict__ Ayz, const double* __restrict__ Azz, - const double* __restrict__ Rxx, const double* __restrict__ Rxy, - const double* __restrict__ Rxz, const double* __restrict__ Ryy, - const double* __restrict__ Ryz, const double* __restrict__ Rzz, - const double* __restrict__ rho, - const double* __restrict__ Sx_m, const double* __restrict__ Sy_m, - const double* __restrict__ Sz_m, - const double* __restrict__ Kx, const double* __restrict__ Ky, - const double* __restrict__ Kz, - const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, - const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, - const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, - const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, - const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, - const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, - const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, - const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, - const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, - /* dA/dx arrays (fderivs of Aij) */ - const double* __restrict__ dAxx_x, const double* __restrict__ dAxx_y, - const double* __restrict__ dAxx_z, - const double* __restrict__ dAxy_x, const double* __restrict__ dAxy_y, - const double* __restrict__ dAxy_z, - const double* __restrict__ dAxz_x, const double* __restrict__ dAxz_y, - const double* __restrict__ dAxz_z, - const double* __restrict__ dAyy_x, const double* __restrict__ dAyy_y, - const double* __restrict__ dAyy_z, - const double* __restrict__ dAyz_x, const double* __restrict__ dAyz_y, - const double* __restrict__ dAyz_z, - const double* __restrict__ dAzz_x, const double* __restrict__ dAzz_y, - const double* __restrict__ dAzz_z, - double* __restrict__ ham_Res, - double* __restrict__ movx_Res, double* __restrict__ movy_Res, - double* __restrict__ movz_Res) -{ - const double TWO=2.0, F2o3=2.0/3.0, F8=8.0, F16=16.0; - const double PI_V=3.14159265358979323846; - for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; - i += blockDim.x*gridDim.x) - { - double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; - double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; - double c1=chin1[i]; - - /* Hamiltonian constraint */ - double R_sc = uxx*Rxx[i]+uyy*Ryy[i]+uzz*Rzz[i] - +TWO*(uxy*Rxy[i]+uxz*Rxz[i]+uyz*Ryz[i]); - /* AijAij (same as in phase15) */ - double AijAij = - uxx*(uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] - +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i])) - +uyy*(uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] - +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i])) - +uzz*(uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] - +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i])) - +TWO*(uxy*(uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] - +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) - +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) - +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i])) - +uxz*(uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] - +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) - +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) - +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i])) - +uyz*(uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] - +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) - +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) - +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]))); - - ham_Res[i] = c1*R_sc + F2o3*trK[i]*trK[i] - AijAij - F16*PI_V*rho[i]; - - /* Momentum constraints: need covariant derivative of A */ - double cx=chix[i],cy=chiy[i],cz=chiz[i]; - /* D_j A^j_x etc — subtract Christoffel and chi terms */ - /* gxxx = dAxx_x - 2*Gxxx*Axx - ... - chix*Axx/chin1 etc */ - double mx_xx = dAxx_x[i]-(Gxxx[i]*Axx[i]+Gyxx[i]*Axy[i]+Gzxx[i]*Axz[i] - +Gxxx[i]*Axx[i]+Gyxx[i]*Axy[i]+Gzxx[i]*Axz[i])-cx*Axx[i]/c1; - double mx_xy = dAxy_x[i]-(Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i] - +Gxxx[i]*Axy[i]+Gyxx[i]*Ayy[i]+Gzxx[i]*Ayz[i])-cx*Axy[i]/c1; - double mx_xz = dAxz_x[i]-(Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i] - +Gxxx[i]*Axz[i]+Gyxx[i]*Ayz[i]+Gzxx[i]*Azz[i])-cx*Axz[i]/c1; - double mx_yy = dAyy_x[i]-(Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i] - +Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i])-cx*Ayy[i]/c1; - double mx_yz = dAyz_x[i]-(Gxxz[i]*Axy[i]+Gyxz[i]*Ayy[i]+Gzxz[i]*Ayz[i] - +Gxxy[i]*Axz[i]+Gyxy[i]*Ayz[i]+Gzxy[i]*Azz[i])-cx*Ayz[i]/c1; - double mx_zz = dAzz_x[i]-(Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i] - +Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i])-cx*Azz[i]/c1; - - double my_xx = dAxx_y[i]-(Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i] - +Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i])-cy*Axx[i]/c1; - double my_xy = dAxy_y[i]-(Gxyy[i]*Axx[i]+Gyyy[i]*Axy[i]+Gzyy[i]*Axz[i] - +Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i])-cy*Axy[i]/c1; - double my_xz = dAxz_y[i]-(Gxyz[i]*Axx[i]+Gyyz[i]*Axy[i]+Gzyz[i]*Axz[i] - +Gxxy[i]*Axz[i]+Gyxy[i]*Ayz[i]+Gzxy[i]*Azz[i])-cy*Axz[i]/c1; - double my_yy = dAyy_y[i]-(Gxyy[i]*Axy[i]+Gyyy[i]*Ayy[i]+Gzyy[i]*Ayz[i] - +Gxyy[i]*Axy[i]+Gyyy[i]*Ayy[i]+Gzyy[i]*Ayz[i])-cy*Ayy[i]/c1; - double my_yz = dAyz_y[i]-(Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i] - +Gxyy[i]*Axz[i]+Gyyy[i]*Ayz[i]+Gzyy[i]*Azz[i])-cy*Ayz[i]/c1; - double my_zz = dAzz_y[i]-(Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i] - +Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i])-cy*Azz[i]/c1; - - double mz_xx = dAxx_z[i]-(Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i] - +Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i])-cz*Axx[i]/c1; - double mz_xy = dAxy_z[i]-(Gxyz[i]*Axx[i]+Gyyz[i]*Axy[i]+Gzyz[i]*Axz[i] - +Gxxz[i]*Axy[i]+Gyxz[i]*Ayy[i]+Gzxz[i]*Ayz[i])-cz*Axy[i]/c1; - double mz_xz = dAxz_z[i]-(Gxzz[i]*Axx[i]+Gyzz[i]*Axy[i]+Gzzz[i]*Axz[i] - +Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i])-cz*Axz[i]/c1; - double mz_yy = dAyy_z[i]-(Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i] - +Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i])-cz*Ayy[i]/c1; - double mz_yz = dAyz_z[i]-(Gxzz[i]*Axy[i]+Gyzz[i]*Ayy[i]+Gzzz[i]*Ayz[i] - +Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i])-cz*Ayz[i]/c1; - double mz_zz = dAzz_z[i]-(Gxzz[i]*Axz[i]+Gyzz[i]*Ayz[i]+Gzzz[i]*Azz[i] - +Gxzz[i]*Axz[i]+Gyzz[i]*Ayz[i]+Gzzz[i]*Azz[i])-cz*Azz[i]/c1; - - movx_Res[i] = uxx*mx_xx+uyy*my_xy+uzz*mz_xz - +uxy*mx_xy+uxz*mx_xz+uyz*my_xz - +uxy*my_xx+uxz*mz_xx+uyz*mz_xy - - F2o3*Kx[i] - F8*PI_V*Sx_m[i]; - movy_Res[i] = uxx*mx_xy+uyy*my_yy+uzz*mz_yz - +uxy*mx_yy+uxz*mx_yz+uyz*my_yz - +uxy*my_xy+uxz*mz_xy+uyz*mz_yy - - F2o3*Ky[i] - F8*PI_V*Sy_m[i]; - movz_Res[i] = uxx*mx_xz+uyy*my_yz+uzz*mz_zz - +uxy*mx_yz+uxz*mx_zz+uyz*my_zz - +uxy*my_xz+uxz*mz_xz+uyz*mz_yz - - F2o3*Kz[i] - F8*PI_V*Sz_m[i]; - } + + /* trK_rhs step 1: store D^iD_i alpha * chin1 */ + trK_rhs[i] = c1 * DDA; + + /* f_arr = -(1/3) * (DDA + alpha/chi * (2/3*K^2 - AijAij - 16pi*rho + 8pi*S)) */ + double f_v = F2o3*trK_v*trK_v - AijAij - F16*PI_V*rho[i] + EIGHT*PI_V*S_v; + f_arr[i] = -F1o3*(uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i] + +TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i]) + +(a/c1)*f_v); + + /* fij = alpha*(Rij - 8pi*Sij) - D_iD_j alpha */ + double fxx_v=a*(Rxx[i]-EIGHT*PI_V*Sxx_m[i])-fxx[i]; + double fxy_v=a*(Rxy[i]-EIGHT*PI_V*Sxy_m[i])-fxy[i]; + double fxz_v=a*(Rxz[i]-EIGHT*PI_V*Sxz_m[i])-fxz[i]; + double fyy_v=a*(Ryy[i]-EIGHT*PI_V*Syy_m[i])-fyy[i]; + double fyz_v=a*(Ryz[i]-EIGHT*PI_V*Syz_m[i])-fyz[i]; + double fzz_v=a*(Rzz[i]-EIGHT*PI_V*Szz_m[i])-fzz[i]; + + /* Aij_rhs = chi*(fij - gij*f) */ + Axx_rhs[i]=fxx_v-gxx[i]*f_arr[i]; + Ayy_rhs[i]=fyy_v-gyy[i]*f_arr[i]; + Azz_rhs[i]=fzz_v-gzz[i]*f_arr[i]; + Axy_rhs[i]=fxy_v-gxy[i]*f_arr[i]; + Axz_rhs[i]=fxz_v-gxz[i]*f_arr[i]; + Ayz_rhs[i]=fyz_v-gyz[i]*f_arr[i]; + + /* A_il A^l_j */ + double AA_xx=uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] + +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i]); + double AA_yy=uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] + +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i]); + double AA_zz=uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] + +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i]); + double AA_xy=uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] + +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) + +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) + +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i]); + double AA_xz=uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] + +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) + +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) + +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i]); + double AA_yz=uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] + +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) + +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) + +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]); + + /* trK_rhs final */ + trK_rhs[i] = -trK_rhs[i] + + a*(F1o3*trK_v*trK_v + +uxx*AA_xx+uyy*AA_yy+uzz*AA_zz + +TWO*(uxy*AA_xy+uxz*AA_xz+uyz*AA_yz) + +FOUR*PI_V*(rho[i]+S_v)); + + /* Aij_rhs final */ + Axx_rhs[i]=c1*Axx_rhs[i]+a*(trK_v*Axx[i]-TWO*AA_xx) + +TWO*(Axx[i]*betaxx[i]+Axy[i]*betayx[i]+Axz[i]*betazx[i])-F2o3*Axx[i]*db; + Ayy_rhs[i]=c1*Ayy_rhs[i]+a*(trK_v*Ayy[i]-TWO*AA_yy) + +TWO*(Axy[i]*betaxy[i]+Ayy[i]*betayy[i]+Ayz[i]*betazy[i])-F2o3*Ayy[i]*db; + Azz_rhs[i]=c1*Azz_rhs[i]+a*(trK_v*Azz[i]-TWO*AA_zz) + +TWO*(Axz[i]*betaxz[i]+Ayz[i]*betayz[i]+Azz[i]*betazz[i])-F2o3*Azz[i]*db; + Axy_rhs[i]=c1*Axy_rhs[i]+a*(trK_v*Axy[i]-TWO*AA_xy) + +Axx[i]*betaxy[i]+Axz[i]*betazy[i]+Ayy[i]*betayx[i] + +Ayz[i]*betazx[i]+F1o3*Axy[i]*db-Axy[i]*betazz[i]; + Ayz_rhs[i]=c1*Ayz_rhs[i]+a*(trK_v*Ayz[i]-TWO*AA_yz) + +Axy[i]*betaxz[i]+Ayy[i]*betayz[i]+Axz[i]*betaxy[i] + +Azz[i]*betazy[i]+F1o3*Ayz[i]*db-Ayz[i]*betaxx[i]; + Axz_rhs[i]=c1*Axz_rhs[i]+a*(trK_v*Axz[i]-TWO*AA_xz) + +Axx[i]*betaxz[i]+Axy[i]*betayz[i]+Ayz[i]*betayx[i] + +Azz[i]*betazx[i]+F1o3*Axz[i]*db-Axz[i]*betayy[i]; + + /* gauge */ + Lap_rhs[i] = -TWO*a*trK_v; + betax_rhs[i] = 0.75*dtSfx[i]; + betay_rhs[i] = 0.75*dtSfy[i]; + betaz_rhs[i] = 0.75*dtSfz[i]; +#if (GAUGE == 0) + dtSfx_rhs[i] = Gamx_rhs[i] - 2.0*dtSfx[i]; + dtSfy_rhs[i] = Gamy_rhs[i] - 2.0*dtSfy[i]; + dtSfz_rhs[i] = Gamz_rhs[i] - 2.0*dtSfz[i]; +#endif + } +} + +/* Phase 18: Hamilton & momentum constraints (co==0 only) */ +__global__ __launch_bounds__(128, 4) +void kern_phase18_constraints( + const double* __restrict__ chin1, + const double* __restrict__ chix, const double* __restrict__ chiy, + const double* __restrict__ chiz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ trK, + const double* __restrict__ Axx, const double* __restrict__ Axy, + const double* __restrict__ Axz, const double* __restrict__ Ayy, + const double* __restrict__ Ayz, const double* __restrict__ Azz, + const double* __restrict__ Rxx, const double* __restrict__ Rxy, + const double* __restrict__ Rxz, const double* __restrict__ Ryy, + const double* __restrict__ Ryz, const double* __restrict__ Rzz, + const double* __restrict__ rho, + const double* __restrict__ Sx_m, const double* __restrict__ Sy_m, + const double* __restrict__ Sz_m, + const double* __restrict__ Kx, const double* __restrict__ Ky, + const double* __restrict__ Kz, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + /* dA/dx arrays (fderivs of Aij) */ + const double* __restrict__ dAxx_x, const double* __restrict__ dAxx_y, + const double* __restrict__ dAxx_z, + const double* __restrict__ dAxy_x, const double* __restrict__ dAxy_y, + const double* __restrict__ dAxy_z, + const double* __restrict__ dAxz_x, const double* __restrict__ dAxz_y, + const double* __restrict__ dAxz_z, + const double* __restrict__ dAyy_x, const double* __restrict__ dAyy_y, + const double* __restrict__ dAyy_z, + const double* __restrict__ dAyz_x, const double* __restrict__ dAyz_y, + const double* __restrict__ dAyz_z, + const double* __restrict__ dAzz_x, const double* __restrict__ dAzz_y, + const double* __restrict__ dAzz_z, + double* __restrict__ ham_Res, + double* __restrict__ movx_Res, double* __restrict__ movy_Res, + double* __restrict__ movz_Res) +{ + const double TWO=2.0, F2o3=2.0/3.0, F8=8.0, F16=16.0; + const double PI_V=3.14159265358979323846; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; + i += blockDim.x*gridDim.x) + { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + double c1=chin1[i]; + + /* Hamiltonian constraint */ + double R_sc = uxx*Rxx[i]+uyy*Ryy[i]+uzz*Rzz[i] + +TWO*(uxy*Rxy[i]+uxz*Rxz[i]+uyz*Ryz[i]); + /* AijAij (same as in phase15) */ + double AijAij = + uxx*(uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] + +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i])) + +uyy*(uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] + +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i])) + +uzz*(uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] + +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i])) + +TWO*(uxy*(uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] + +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) + +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) + +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i])) + +uxz*(uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] + +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) + +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) + +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i])) + +uyz*(uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] + +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) + +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) + +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]))); + + ham_Res[i] = c1*R_sc + F2o3*trK[i]*trK[i] - AijAij - F16*PI_V*rho[i]; + + /* Momentum constraints: need covariant derivative of A */ + double cx=chix[i],cy=chiy[i],cz=chiz[i]; + /* D_j A^j_x etc — subtract Christoffel and chi terms */ + /* gxxx = dAxx_x - 2*Gxxx*Axx - ... - chix*Axx/chin1 etc */ + double mx_xx = dAxx_x[i]-(Gxxx[i]*Axx[i]+Gyxx[i]*Axy[i]+Gzxx[i]*Axz[i] + +Gxxx[i]*Axx[i]+Gyxx[i]*Axy[i]+Gzxx[i]*Axz[i])-cx*Axx[i]/c1; + double mx_xy = dAxy_x[i]-(Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i] + +Gxxx[i]*Axy[i]+Gyxx[i]*Ayy[i]+Gzxx[i]*Ayz[i])-cx*Axy[i]/c1; + double mx_xz = dAxz_x[i]-(Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i] + +Gxxx[i]*Axz[i]+Gyxx[i]*Ayz[i]+Gzxx[i]*Azz[i])-cx*Axz[i]/c1; + double mx_yy = dAyy_x[i]-(Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i] + +Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i])-cx*Ayy[i]/c1; + double mx_yz = dAyz_x[i]-(Gxxz[i]*Axy[i]+Gyxz[i]*Ayy[i]+Gzxz[i]*Ayz[i] + +Gxxy[i]*Axz[i]+Gyxy[i]*Ayz[i]+Gzxy[i]*Azz[i])-cx*Ayz[i]/c1; + double mx_zz = dAzz_x[i]-(Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i] + +Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i])-cx*Azz[i]/c1; + + double my_xx = dAxx_y[i]-(Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i] + +Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i])-cy*Axx[i]/c1; + double my_xy = dAxy_y[i]-(Gxyy[i]*Axx[i]+Gyyy[i]*Axy[i]+Gzyy[i]*Axz[i] + +Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i])-cy*Axy[i]/c1; + double my_xz = dAxz_y[i]-(Gxyz[i]*Axx[i]+Gyyz[i]*Axy[i]+Gzyz[i]*Axz[i] + +Gxxy[i]*Axz[i]+Gyxy[i]*Ayz[i]+Gzxy[i]*Azz[i])-cy*Axz[i]/c1; + double my_yy = dAyy_y[i]-(Gxyy[i]*Axy[i]+Gyyy[i]*Ayy[i]+Gzyy[i]*Ayz[i] + +Gxyy[i]*Axy[i]+Gyyy[i]*Ayy[i]+Gzyy[i]*Ayz[i])-cy*Ayy[i]/c1; + double my_yz = dAyz_y[i]-(Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i] + +Gxyy[i]*Axz[i]+Gyyy[i]*Ayz[i]+Gzyy[i]*Azz[i])-cy*Ayz[i]/c1; + double my_zz = dAzz_y[i]-(Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i] + +Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i])-cy*Azz[i]/c1; + + double mz_xx = dAxx_z[i]-(Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i] + +Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i])-cz*Axx[i]/c1; + double mz_xy = dAxy_z[i]-(Gxyz[i]*Axx[i]+Gyyz[i]*Axy[i]+Gzyz[i]*Axz[i] + +Gxxz[i]*Axy[i]+Gyxz[i]*Ayy[i]+Gzxz[i]*Ayz[i])-cz*Axy[i]/c1; + double mz_xz = dAxz_z[i]-(Gxzz[i]*Axx[i]+Gyzz[i]*Axy[i]+Gzzz[i]*Axz[i] + +Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i])-cz*Axz[i]/c1; + double mz_yy = dAyy_z[i]-(Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i] + +Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i])-cz*Ayy[i]/c1; + double mz_yz = dAyz_z[i]-(Gxzz[i]*Axy[i]+Gyzz[i]*Ayy[i]+Gzzz[i]*Ayz[i] + +Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i])-cz*Ayz[i]/c1; + double mz_zz = dAzz_z[i]-(Gxzz[i]*Axz[i]+Gyzz[i]*Ayz[i]+Gzzz[i]*Azz[i] + +Gxzz[i]*Axz[i]+Gyzz[i]*Ayz[i]+Gzzz[i]*Azz[i])-cz*Azz[i]/c1; + + movx_Res[i] = uxx*mx_xx+uyy*my_xy+uzz*mz_xz + +uxy*mx_xy+uxz*mx_xz+uyz*my_xz + +uxy*my_xx+uxz*mz_xx+uyz*mz_xy + - F2o3*Kx[i] - F8*PI_V*Sx_m[i]; + movy_Res[i] = uxx*mx_xy+uyy*my_yy+uzz*mz_yz + +uxy*mx_yy+uxz*mx_yz+uyz*my_yz + +uxy*my_xy+uxz*mz_xy+uyz*mz_yy + - F2o3*Ky[i] - F8*PI_V*Sy_m[i]; + movz_Res[i] = uxx*mx_xz+uyy*my_yz+uzz*mz_zz + +uxy*mx_yz+uxz*mx_zz+uyz*my_zz + +uxy*my_xz+uxz*mz_xz+uyz*mz_yz + - F2o3*Kz[i] - F8*PI_V*Sz_m[i]; + } } static void setup_grid_params(int *ex, @@ -3018,75 +3018,75 @@ static bool has_resident_state(void *block_tag) /* ================================================================== */ /* Main host function — drop-in replacement for bssn_rhs_c.C */ /* ================================================================== */ - + extern "C" int f_compute_rhs_bssn(int *ex, double &T, - double *X, double *Y, double *Z, - double *chi, double *trK, - double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, - double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, - double *Gamx, double *Gamy, double *Gamz, - double *Lap, double *betax, double *betay, double *betaz, - double *dtSfx, double *dtSfy, double *dtSfz, - double *chi_rhs, double *trK_rhs, - double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, - double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, - double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, - double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, - double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, - double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, - double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, - double *rho, double *Sx, double *Sy, double *Sz, - double *Sxx, double *Sxy_m, double *Sxz, double *Syy, double *Syz_m, double *Szz, - double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, - double *Gamxyz, double *Gamxzz, - double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, - double *Gamyyz, double *Gamyzz, - double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, - double *Gamzyz, double *Gamzzz, - double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, - double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, - double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, - int &Symmetry, int &Lev, double &eps, int &co) + double *X, double *Y, double *Z, + double *chi, double *trK, + double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, + double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, + double *Gamx, double *Gamy, double *Gamz, + double *Lap, double *betax, double *betay, double *betaz, + double *dtSfx, double *dtSfy, double *dtSfz, + double *chi_rhs, double *trK_rhs, + double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, + double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, + double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, + double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, + double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, + double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, + double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, + double *rho, double *Sx, double *Sy, double *Sz, + double *Sxx, double *Sxy_m, double *Sxz, double *Syy, double *Syz_m, double *Szz, + double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, + double *Gamxyz, double *Gamxzz, + double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, + double *Gamyyz, double *Gamyzz, + double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, + double *Gamzyz, double *Gamzzz, + double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, + double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, + double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, + int &Symmetry, int &Lev, double &eps, int &co) { /* --- Multi-GPU: select device --- */ init_gpu_dispatch(); CUDA_CHECK(cudaSetDevice(g_dispatch.my_device)); - + const int nx = ex[0], ny = ex[1], nz = ex[2]; - const int all = nx * ny * nz; - const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0]; - const int NO_SYMM = 0, EQ_SYMM = 1; - const double SYM = 1.0, ANTI = -1.0; - - /* --- Allocate GPU buffers --- */ - ensure_gpu_buffers(nx, ny, nz); - - /* --- Setup GridParams --- */ - GridParams gp; - gp.ex[0]=nx; gp.ex[1]=ny; gp.ex[2]=nz; - gp.all=all; gp.dX=dX; gp.dY=dY; gp.dZ=dZ; - gp.d12dx=1.0/(12.0*dX); gp.d12dy=1.0/(12.0*dY); gp.d12dz=1.0/(12.0*dZ); - gp.d2dx=1.0/(2.0*dX); gp.d2dy=1.0/(2.0*dY); gp.d2dz=1.0/(2.0*dZ); - gp.Fdxdx=1.0/(12.0*dX*dX); gp.Fdydy=1.0/(12.0*dY*dY); gp.Fdzdz=1.0/(12.0*dZ*dZ); - gp.Sdxdx=1.0/(dX*dX); gp.Sdydy=1.0/(dY*dY); gp.Sdzdz=1.0/(dZ*dZ); - gp.Fdxdy=1.0/(144.0*dX*dY); gp.Fdxdz=1.0/(144.0*dX*dZ); gp.Fdydz=1.0/(144.0*dY*dZ); - gp.Sdxdy=0.25/(dX*dY); gp.Sdxdz=0.25/(dX*dZ); gp.Sdydz=0.25/(dY*dZ); - gp.imaxF=nx; gp.jmaxF=ny; gp.kmaxF=nz; - gp.iminF=1; gp.jminF=1; gp.kminF=1; - if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) gp.kminF = -1; - if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) gp.iminF = -1; - if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) gp.jminF = -1; - gp.iminF3=1; gp.jminF3=1; gp.kminF3=1; - if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) gp.kminF3 = -2; - if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) gp.iminF3 = -2; - if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) gp.jminF3 = -2; - gp.Symmetry=Symmetry; gp.eps=eps; gp.co=co; - gp.fh2_nx=nx+2; gp.fh2_ny=ny+2; gp.fh2_nz=nz+2; - gp.fh3_nx=nx+3; gp.fh3_ny=ny+3; gp.fh3_nz=nz+3; - CUDA_CHECK(cudaMemcpyToSymbol(d_gp, &gp, sizeof(GridParams))); - - /* --- Shorthand for device slot pointers --- */ + const int all = nx * ny * nz; + const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0]; + const int NO_SYMM = 0, EQ_SYMM = 1; + const double SYM = 1.0, ANTI = -1.0; + + /* --- Allocate GPU buffers --- */ + ensure_gpu_buffers(nx, ny, nz); + + /* --- Setup GridParams --- */ + GridParams gp; + gp.ex[0]=nx; gp.ex[1]=ny; gp.ex[2]=nz; + gp.all=all; gp.dX=dX; gp.dY=dY; gp.dZ=dZ; + gp.d12dx=1.0/(12.0*dX); gp.d12dy=1.0/(12.0*dY); gp.d12dz=1.0/(12.0*dZ); + gp.d2dx=1.0/(2.0*dX); gp.d2dy=1.0/(2.0*dY); gp.d2dz=1.0/(2.0*dZ); + gp.Fdxdx=1.0/(12.0*dX*dX); gp.Fdydy=1.0/(12.0*dY*dY); gp.Fdzdz=1.0/(12.0*dZ*dZ); + gp.Sdxdx=1.0/(dX*dX); gp.Sdydy=1.0/(dY*dY); gp.Sdzdz=1.0/(dZ*dZ); + gp.Fdxdy=1.0/(144.0*dX*dY); gp.Fdxdz=1.0/(144.0*dX*dZ); gp.Fdydz=1.0/(144.0*dY*dZ); + gp.Sdxdy=0.25/(dX*dY); gp.Sdxdz=0.25/(dX*dZ); gp.Sdydz=0.25/(dY*dZ); + gp.imaxF=nx; gp.jmaxF=ny; gp.kmaxF=nz; + gp.iminF=1; gp.jminF=1; gp.kminF=1; + if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) gp.kminF = -1; + if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) gp.iminF = -1; + if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) gp.jminF = -1; + gp.iminF3=1; gp.jminF3=1; gp.kminF3=1; + if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) gp.kminF3 = -2; + if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) gp.iminF3 = -2; + if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) gp.jminF3 = -2; + gp.Symmetry=Symmetry; gp.eps=eps; gp.co=co; + gp.fh2_nx=nx+2; gp.fh2_ny=ny+2; gp.fh2_nz=nz+2; + gp.fh3_nx=nx+3; gp.fh3_ny=ny+3; gp.fh3_nz=nz+3; + CUDA_CHECK(cudaMemcpyToSymbol(d_gp, &gp, sizeof(GridParams))); + + /* --- Shorthand for device slot pointers --- */ #define D(s) g_buf.slot[s] const size_t bytes = (size_t)all * sizeof(double); @@ -3108,31 +3108,31 @@ int f_compute_rhs_bssn(int *ex, double &T, CUDA_CHECK(cudaMemcpy(D(S_chi), g_buf.h_stage, (size_t)H2D_INPUT_SLOT_COUNT * bytes, cudaMemcpyHostToDevice)); - + /* ============================================================ */ /* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */ - /* ============================================================ */ - kern_phase1_prep<<>>( - D(S_Lap), D(S_chi), D(S_dxx), D(S_dyy), D(S_dzz), - D(S_alpn1), D(S_chin1), D(S_gxx), D(S_gyy), D(S_gzz)); - - /* 12x fderivs */ - gpu_fderivs(D(S_betax), D(S_betaxx),D(S_betaxy),D(S_betaxz), ANTI,SYM,SYM, all); - gpu_fderivs(D(S_betay), D(S_betayx),D(S_betayy),D(S_betayz), SYM,ANTI,SYM, all); - gpu_fderivs(D(S_betaz), D(S_betazx),D(S_betazy),D(S_betazz), SYM,SYM,ANTI, all); - gpu_fderivs(D(S_chi), D(S_chix),D(S_chiy),D(S_chiz), SYM,SYM,SYM, all); - gpu_fderivs(D(S_dxx), D(S_gxxx),D(S_gxxy),D(S_gxxz), SYM,SYM,SYM, all); - gpu_fderivs(D(S_gxy), D(S_gxyx),D(S_gxyy),D(S_gxyz), ANTI,ANTI,SYM, all); - gpu_fderivs(D(S_gxz), D(S_gxzx),D(S_gxzy),D(S_gxzz), ANTI,SYM,ANTI, all); - gpu_fderivs(D(S_dyy), D(S_gyyx),D(S_gyyy),D(S_gyyz), SYM,SYM,SYM, all); - gpu_fderivs(D(S_gyz), D(S_gyzx),D(S_gyzy),D(S_gyzz), SYM,ANTI,ANTI, all); - gpu_fderivs(D(S_dzz), D(S_gzzx),D(S_gzzy),D(S_gzzz), SYM,SYM,SYM, all); - gpu_fderivs(D(S_Lap), D(S_Lapx),D(S_Lapy),D(S_Lapz), SYM,SYM,SYM, all); - gpu_fderivs(D(S_trK), D(S_Kx),D(S_Ky),D(S_Kz), SYM,SYM,SYM, all); - - /* ============================================================ */ - /* Phase 2: metric RHS + inverse */ - /* ============================================================ */ + /* ============================================================ */ + kern_phase1_prep<<>>( + D(S_Lap), D(S_chi), D(S_dxx), D(S_dyy), D(S_dzz), + D(S_alpn1), D(S_chin1), D(S_gxx), D(S_gyy), D(S_gzz)); + + /* 12x fderivs */ + gpu_fderivs(D(S_betax), D(S_betaxx),D(S_betaxy),D(S_betaxz), ANTI,SYM,SYM, all); + gpu_fderivs(D(S_betay), D(S_betayx),D(S_betayy),D(S_betayz), SYM,ANTI,SYM, all); + gpu_fderivs(D(S_betaz), D(S_betazx),D(S_betazy),D(S_betazz), SYM,SYM,ANTI, all); + gpu_fderivs(D(S_chi), D(S_chix),D(S_chiy),D(S_chiz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_dxx), D(S_gxxx),D(S_gxxy),D(S_gxxz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_gxy), D(S_gxyx),D(S_gxyy),D(S_gxyz), ANTI,ANTI,SYM, all); + gpu_fderivs(D(S_gxz), D(S_gxzx),D(S_gxzy),D(S_gxzz), ANTI,SYM,ANTI, all); + gpu_fderivs(D(S_dyy), D(S_gyyx),D(S_gyyy),D(S_gyyz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_gyz), D(S_gyzx),D(S_gyzy),D(S_gyzz), SYM,ANTI,ANTI, all); + gpu_fderivs(D(S_dzz), D(S_gzzx),D(S_gzzy),D(S_gzzz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_Lap), D(S_Lapx),D(S_Lapy),D(S_Lapz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_trK), D(S_Kx),D(S_Ky),D(S_Kz), SYM,SYM,SYM, all); + + /* ============================================================ */ + /* Phase 2: metric RHS + inverse */ + /* ============================================================ */ kern_phase2_metric_rhs<<>>( D(S_alpn1), D(S_chin1), D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), @@ -3143,221 +3143,221 @@ int f_compute_rhs_bssn(int *ex, double &T, D(S_betazx), D(S_betazy), D(S_betazz), D(S_chi_rhs), D(S_gxx_rhs), D(S_gyy_rhs), D(S_gzz_rhs), D(S_gxy_rhs), D(S_gyz_rhs), D(S_gxz_rhs)); - - kern_phase2_inverse<<>>( - D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), - D(S_gupxx), D(S_gupxy), D(S_gupxz), - D(S_gupyy), D(S_gupyz), D(S_gupzz)); - - /* Phase 3: Gamma constraint (co==0) */ - if (co == 0) { - kern_phase3_gamma_constraint<<>>( - D(S_Gamx), D(S_Gamy), D(S_Gamz), - D(S_gupxx), D(S_gupxy), D(S_gupxz), - D(S_gupyy), D(S_gupyz), D(S_gupzz), - D(S_gxxx), D(S_gxyx), D(S_gxzx), D(S_gyyx), D(S_gyzx), D(S_gzzx), - D(S_gxxy), D(S_gxyy), D(S_gxzy), D(S_gyyy), D(S_gyzy), D(S_gzzy), - D(S_gxxz), D(S_gxyz), D(S_gxzz), D(S_gyyz), D(S_gyzz), D(S_gzzz), - D(S_Gmx_Res), D(S_Gmy_Res), D(S_Gmz_Res)); - } - - /* Phase 4: Christoffel symbols */ - kern_phase4_christoffel<<>>( - D(S_gupxx), D(S_gupxy), D(S_gupxz), - D(S_gupyy), D(S_gupyz), D(S_gupzz), - D(S_gxxx), D(S_gxyx), D(S_gxzx), D(S_gyyx), D(S_gyzx), D(S_gzzx), - D(S_gxxy), D(S_gxyy), D(S_gxzy), D(S_gyyy), D(S_gyzy), D(S_gzzy), - D(S_gxxz), D(S_gxyz), D(S_gxzz), D(S_gyyz), D(S_gyzz), D(S_gzzz), - D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), - D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), - D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), - D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), - D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), - D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz)); - - /* Phase 5: Raise A index (stored in Rxx..Rzz temporarily) */ - kern_phase5_raise_A<<>>( - D(S_gupxx), D(S_gupxy), D(S_gupxz), - D(S_gupyy), D(S_gupyz), D(S_gupzz), - D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), - D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz)); - - /* Phase 6: Gamma_rhs part 1 */ - kern_phase6_gamma_rhs_part1<<>>( - D(S_Lapx), D(S_Lapy), D(S_Lapz), - D(S_alpn1), D(S_chin1), - D(S_chix), D(S_chiy), D(S_chiz), - D(S_gupxx), D(S_gupxy), D(S_gupxz), - D(S_gupyy), D(S_gupyz), D(S_gupzz), - D(S_Kx), D(S_Ky), D(S_Kz), - D(S_Sx), D(S_Sy), D(S_Sz), - D(S_Rxx), D(S_Rxy), D(S_Rxz), - D(S_Ryy), D(S_Ryz), D(S_Rzz), - D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), - D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), - D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), - D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), - D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), - D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), - D(S_Gamx_rhs), D(S_Gamy_rhs), D(S_Gamz_rhs)); - - /* Phase 7: fdderivs(beta) + fderivs(Gamma) */ - gpu_fdderivs(D(S_betax), D(S_gxxx),D(S_gxyx),D(S_gxzx), - D(S_gyyx),D(S_gyzx),D(S_gzzx), ANTI,SYM,SYM, all); - gpu_fdderivs(D(S_betay), D(S_gxxy),D(S_gxyy),D(S_gxzy), - D(S_gyyy),D(S_gyzy),D(S_gzzy), SYM,ANTI,SYM, all); - gpu_fdderivs(D(S_betaz), D(S_gxxz),D(S_gxyz),D(S_gxzz), - D(S_gyyz),D(S_gyzz),D(S_gzzz), SYM,SYM,ANTI, all); - gpu_fderivs(D(S_Gamx), D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), ANTI,SYM,SYM, all); - gpu_fderivs(D(S_Gamy), D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), SYM,ANTI,SYM, all); - gpu_fderivs(D(S_Gamz), D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), SYM,SYM,ANTI, all); - - /* Phase 8: Gamma_rhs part 2 */ + + kern_phase2_inverse<<>>( + D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz)); + + /* Phase 3: Gamma constraint (co==0) */ + if (co == 0) { + kern_phase3_gamma_constraint<<>>( + D(S_Gamx), D(S_Gamy), D(S_Gamz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_gxxx), D(S_gxyx), D(S_gxzx), D(S_gyyx), D(S_gyzx), D(S_gzzx), + D(S_gxxy), D(S_gxyy), D(S_gxzy), D(S_gyyy), D(S_gyzy), D(S_gzzy), + D(S_gxxz), D(S_gxyz), D(S_gxzz), D(S_gyyz), D(S_gyzz), D(S_gzzz), + D(S_Gmx_Res), D(S_Gmy_Res), D(S_Gmz_Res)); + } + + /* Phase 4: Christoffel symbols */ + kern_phase4_christoffel<<>>( + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_gxxx), D(S_gxyx), D(S_gxzx), D(S_gyyx), D(S_gyzx), D(S_gzzx), + D(S_gxxy), D(S_gxyy), D(S_gxzy), D(S_gyyy), D(S_gyzy), D(S_gzzy), + D(S_gxxz), D(S_gxyz), D(S_gxzz), D(S_gyyz), D(S_gyzz), D(S_gzzz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz)); + + /* Phase 5: Raise A index (stored in Rxx..Rzz temporarily) */ + kern_phase5_raise_A<<>>( + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz)); + + /* Phase 6: Gamma_rhs part 1 */ + kern_phase6_gamma_rhs_part1<<>>( + D(S_Lapx), D(S_Lapy), D(S_Lapz), + D(S_alpn1), D(S_chin1), + D(S_chix), D(S_chiy), D(S_chiz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_Kx), D(S_Ky), D(S_Kz), + D(S_Sx), D(S_Sy), D(S_Sz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), + D(S_Ryy), D(S_Ryz), D(S_Rzz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), + D(S_Gamx_rhs), D(S_Gamy_rhs), D(S_Gamz_rhs)); + + /* Phase 7: fdderivs(beta) + fderivs(Gamma) */ + gpu_fdderivs(D(S_betax), D(S_gxxx),D(S_gxyx),D(S_gxzx), + D(S_gyyx),D(S_gyzx),D(S_gzzx), ANTI,SYM,SYM, all); + gpu_fdderivs(D(S_betay), D(S_gxxy),D(S_gxyy),D(S_gxzy), + D(S_gyyy),D(S_gyzy),D(S_gzzy), SYM,ANTI,SYM, all); + gpu_fdderivs(D(S_betaz), D(S_gxxz),D(S_gxyz),D(S_gxzz), + D(S_gyyz),D(S_gyzz),D(S_gzzz), SYM,SYM,ANTI, all); + gpu_fderivs(D(S_Gamx), D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), ANTI,SYM,SYM, all); + gpu_fderivs(D(S_Gamy), D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), SYM,ANTI,SYM, all); + gpu_fderivs(D(S_Gamz), D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), SYM,SYM,ANTI, all); + + /* Phase 8: Gamma_rhs part 2 */ kern_phase8_gamma_rhs_part2<<>>( D(S_gupxx), D(S_gupxy), D(S_gupxz), D(S_gupyy), D(S_gupyz), D(S_gupzz), D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), - D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), - D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), - D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), - D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), - D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), - D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), - D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), - D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), - D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), - D(S_betaxx),D(S_betaxy),D(S_betaxz), - D(S_betayx),D(S_betayy),D(S_betayz), - D(S_betazx),D(S_betazy),D(S_betazz), - D(S_Gamx_rhs),D(S_Gamy_rhs),D(S_Gamz_rhs), - D(S_Gamxa),D(S_Gamya),D(S_Gamza)); - - /* Phase 9: Christoffel contract (lowered products for Ricci) */ - kern_phase9_christoffel_contract<<>>( - D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), - D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), - D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), - D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), - D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), - D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), - D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), - D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), - D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), - D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz)); - - /* Phase 10: 6x fdderivs(metric) + Ricci contract */ - gpu_fdderivs(D(S_dxx), D(S_fxx),D(S_fxy),D(S_fxz), - D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); - kern_phase10_ricci_contract<<>>( - D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), - D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxx)); - - gpu_fdderivs(D(S_dyy), D(S_fxx),D(S_fxy),D(S_fxz), - D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); - kern_phase10_ricci_contract<<>>( - D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), - D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Ryy)); - - gpu_fdderivs(D(S_dzz), D(S_fxx),D(S_fxy),D(S_fxz), - D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); - kern_phase10_ricci_contract<<>>( - D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), - D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rzz)); - - gpu_fdderivs(D(S_gxy), D(S_fxx),D(S_fxy),D(S_fxz), - D(S_fyy),D(S_fyz),D(S_fzz), ANTI,ANTI,SYM, all); - kern_phase10_ricci_contract<<>>( - D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), - D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxy)); - - gpu_fdderivs(D(S_gxz), D(S_fxx),D(S_fxy),D(S_fxz), - D(S_fyy),D(S_fyz),D(S_fzz), ANTI,SYM,ANTI, all); - kern_phase10_ricci_contract<<>>( - D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), - D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxz)); - - gpu_fdderivs(D(S_gyz), D(S_fxx),D(S_fxy),D(S_fxz), - D(S_fyy),D(S_fyz),D(S_fzz), SYM,ANTI,ANTI, all); - kern_phase10_ricci_contract<<>>( - D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), - D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Ryz)); - - /* Phase 11: Ricci assembly (diagonal + off-diagonal) */ - kern_phase11_ricci_diag<<>>( - D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), - D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), - D(S_Gamxa),D(S_Gamya),D(S_Gamza), - D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), - D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), - D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), - D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), - D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), - D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), - D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), - D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), - D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), - D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), - D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), - D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), - D(S_Rxx),D(S_Ryy),D(S_Rzz)); - - kern_phase11_ricci_offdiag<<>>( - D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), - D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), - D(S_Gamxa),D(S_Gamya),D(S_Gamza), - D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), - D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), - D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), - D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), - D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), - D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), - D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), - D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), - D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), - D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), - D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), - D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), - D(S_Rxy),D(S_Rxz),D(S_Ryz)); - - /* ============================================================ */ - /* Phase 12: fdderivs(chi) */ - /* ============================================================ */ - gpu_fdderivs(D(S_chi), D(S_fxx),D(S_fxy),D(S_fxz), - D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); - - /* ============================================================ */ - /* Phase 13: chi correction to Ricci */ - /* ============================================================ */ - kern_phase13_chi_correction<<>>( - D(S_chin1), - D(S_chix), D(S_chiy), D(S_chiz), - D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), - D(S_gupxx), D(S_gupxy), D(S_gupxz), - D(S_gupyy), D(S_gupyz), D(S_gupzz), - D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), - D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), - D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), - D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), - D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), - D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), - D(S_fxx), D(S_fxy), D(S_fxz), - D(S_fyy), D(S_fyz), D(S_fzz), - D(S_Rxx), D(S_Rxy), D(S_Rxz), - D(S_Ryy), D(S_Ryz), D(S_Rzz)); - - /* ============================================================ */ - /* Phase 14: fdderivs(Lap) + fderivs(chi) */ - /* ============================================================ */ - gpu_fdderivs(D(S_Lap), D(S_fxx),D(S_fxy),D(S_fxz), - D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); - gpu_fderivs(D(S_chi), D(S_dtSfx_rhs),D(S_dtSfy_rhs),D(S_dtSfz_rhs), - SYM,SYM,SYM, all); - - /* ============================================================ */ - /* Phase 15: trK_rhs, Aij_rhs, gauge */ - /* ============================================================ */ + D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), + D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), + D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), + D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), + D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), + D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), + D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), + D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), + D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), + D(S_betaxx),D(S_betaxy),D(S_betaxz), + D(S_betayx),D(S_betayy),D(S_betayz), + D(S_betazx),D(S_betazy),D(S_betazz), + D(S_Gamx_rhs),D(S_Gamy_rhs),D(S_Gamz_rhs), + D(S_Gamxa),D(S_Gamya),D(S_Gamza)); + + /* Phase 9: Christoffel contract (lowered products for Ricci) */ + kern_phase9_christoffel_contract<<>>( + D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), + D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), + D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), + D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), + D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), + D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), + D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), + D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), + D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), + D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz)); + + /* Phase 10: 6x fdderivs(metric) + Ricci contract */ + gpu_fdderivs(D(S_dxx), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxx)); + + gpu_fdderivs(D(S_dyy), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Ryy)); + + gpu_fdderivs(D(S_dzz), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rzz)); + + gpu_fdderivs(D(S_gxy), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), ANTI,ANTI,SYM, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxy)); + + gpu_fdderivs(D(S_gxz), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), ANTI,SYM,ANTI, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxz)); + + gpu_fdderivs(D(S_gyz), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,ANTI,ANTI, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Ryz)); + + /* Phase 11: Ricci assembly (diagonal + off-diagonal) */ + kern_phase11_ricci_diag<<>>( + D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_Gamxa),D(S_Gamya),D(S_Gamza), + D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), + D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), + D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), + D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), + D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), + D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), + D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), + D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), + D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), + D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), + D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), + D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), + D(S_Rxx),D(S_Ryy),D(S_Rzz)); + + kern_phase11_ricci_offdiag<<>>( + D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_Gamxa),D(S_Gamya),D(S_Gamza), + D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), + D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), + D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), + D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), + D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), + D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), + D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), + D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), + D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), + D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), + D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), + D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), + D(S_Rxy),D(S_Rxz),D(S_Ryz)); + + /* ============================================================ */ + /* Phase 12: fdderivs(chi) */ + /* ============================================================ */ + gpu_fdderivs(D(S_chi), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + + /* ============================================================ */ + /* Phase 13: chi correction to Ricci */ + /* ============================================================ */ + kern_phase13_chi_correction<<>>( + D(S_chin1), + D(S_chix), D(S_chiy), D(S_chiz), + D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), + D(S_fxx), D(S_fxy), D(S_fxz), + D(S_fyy), D(S_fyz), D(S_fzz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), + D(S_Ryy), D(S_Ryz), D(S_Rzz)); + + /* ============================================================ */ + /* Phase 14: fdderivs(Lap) + fderivs(chi) */ + /* ============================================================ */ + gpu_fdderivs(D(S_Lap), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_chi), D(S_dtSfx_rhs),D(S_dtSfy_rhs),D(S_dtSfz_rhs), + SYM,SYM,SYM, all); + + /* ============================================================ */ + /* Phase 15: trK_rhs, Aij_rhs, gauge */ + /* ============================================================ */ kern_phase15_trK_Aij_gauge<<>>( D(S_alpn1), D(S_chin1), D(S_chix), D(S_chiy), D(S_chiz), @@ -3368,30 +3368,30 @@ int f_compute_rhs_bssn(int *ex, double &T, D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), D(S_Lapx), D(S_Lapy), D(S_Lapz), D(S_betaxx), D(S_betaxy), D(S_betaxz), - D(S_betayx), D(S_betayy), D(S_betayz), - D(S_betazx), D(S_betazy), D(S_betazz), - D(S_rho), - D(S_Sx), D(S_Sy), D(S_Sz), - D(S_Sxx), D(S_Sxy), D(S_Sxz), D(S_Syy), D(S_Syz), D(S_Szz), - D(S_dtSfx), D(S_dtSfy), D(S_dtSfz), - D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz), - D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), - D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), - D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), - D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), - D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), - D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), - D(S_fxx), D(S_fxy), D(S_fxz), - D(S_fyy), D(S_fyz), D(S_fzz), - D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs), - D(S_trK_rhs), - D(S_Axx_rhs), D(S_Axy_rhs), D(S_Axz_rhs), - D(S_Ayy_rhs), D(S_Ayz_rhs), D(S_Azz_rhs), - D(S_Lap_rhs), - D(S_betax_rhs), D(S_betay_rhs), D(S_betaz_rhs), - D(S_Gamx_rhs), D(S_Gamy_rhs), D(S_Gamz_rhs), - D(S_f_arr), D(S_S_arr)); - + D(S_betayx), D(S_betayy), D(S_betayz), + D(S_betazx), D(S_betazy), D(S_betazz), + D(S_rho), + D(S_Sx), D(S_Sy), D(S_Sz), + D(S_Sxx), D(S_Sxy), D(S_Sxz), D(S_Syy), D(S_Syz), D(S_Szz), + D(S_dtSfx), D(S_dtSfy), D(S_dtSfz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), + D(S_fxx), D(S_fxy), D(S_fxz), + D(S_fyy), D(S_fyz), D(S_fzz), + D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs), + D(S_trK_rhs), + D(S_Axx_rhs), D(S_Axy_rhs), D(S_Axz_rhs), + D(S_Ayy_rhs), D(S_Ayz_rhs), D(S_Azz_rhs), + D(S_Lap_rhs), + D(S_betax_rhs), D(S_betay_rhs), D(S_betaz_rhs), + D(S_Gamx_rhs), D(S_Gamy_rhs), D(S_Gamz_rhs), + D(S_f_arr), D(S_S_arr)); + /* ============================================================ */ /* Phase 16/17: advection + KO dissipation (shared ord=3 pack) */ /* ============================================================ */ @@ -3419,45 +3419,45 @@ int f_compute_rhs_bssn(int *ex, double &T, gpu_lopsided_kodis(D(S_trK), D(S_trK), D(S_trK_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); gpu_lopsided_kodis(D(S_Gamx), D(S_Gamx), D(S_Gamx_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, eps, all); gpu_lopsided_kodis(D(S_Gamy), D(S_Gamy), D(S_Gamy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, eps, all); - - /* ============================================================ */ - /* Phase 18: Hamilton & momentum constraints (co==0) */ - /* ============================================================ */ - if (co == 0) { - /* 6x fderivs on Aij — reuse gxxx..gzzz slots for dA/dx output */ - gpu_fderivs(D(S_Axx), D(S_gxxx),D(S_gxxy),D(S_gxxz), SYM,SYM,SYM, all); - gpu_fderivs(D(S_Axy), D(S_gxyx),D(S_gxyy),D(S_gxyz), ANTI,ANTI,SYM, all); - gpu_fderivs(D(S_Axz), D(S_gxzx),D(S_gxzy),D(S_gxzz), ANTI,SYM,ANTI, all); - gpu_fderivs(D(S_Ayy), D(S_gyyx),D(S_gyyy),D(S_gyyz), SYM,SYM,SYM, all); - gpu_fderivs(D(S_Ayz), D(S_gyzx),D(S_gyzy),D(S_gyzz), SYM,ANTI,ANTI, all); - gpu_fderivs(D(S_Azz), D(S_gzzx),D(S_gzzy),D(S_gzzz), SYM,SYM,SYM, all); - - kern_phase18_constraints<<>>( - D(S_chin1), - D(S_chix), D(S_chiy), D(S_chiz), - D(S_gupxx), D(S_gupxy), D(S_gupxz), - D(S_gupyy), D(S_gupyz), D(S_gupzz), - D(S_trK), - D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), - D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz), - D(S_rho), D(S_Sx), D(S_Sy), D(S_Sz), - D(S_Kx), D(S_Ky), D(S_Kz), - D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), - D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), - D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), - D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), - D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), - D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), - /* dA/dx arrays */ - D(S_gxxx), D(S_gxxy), D(S_gxxz), - D(S_gxyx), D(S_gxyy), D(S_gxyz), - D(S_gxzx), D(S_gxzy), D(S_gxzz), - D(S_gyyx), D(S_gyyy), D(S_gyyz), - D(S_gyzx), D(S_gyzy), D(S_gyzz), - D(S_gzzx), D(S_gzzy), D(S_gzzz), - D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res)); - } - + + /* ============================================================ */ + /* Phase 18: Hamilton & momentum constraints (co==0) */ + /* ============================================================ */ + if (co == 0) { + /* 6x fderivs on Aij — reuse gxxx..gzzz slots for dA/dx output */ + gpu_fderivs(D(S_Axx), D(S_gxxx),D(S_gxxy),D(S_gxxz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_Axy), D(S_gxyx),D(S_gxyy),D(S_gxyz), ANTI,ANTI,SYM, all); + gpu_fderivs(D(S_Axz), D(S_gxzx),D(S_gxzy),D(S_gxzz), ANTI,SYM,ANTI, all); + gpu_fderivs(D(S_Ayy), D(S_gyyx),D(S_gyyy),D(S_gyyz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_Ayz), D(S_gyzx),D(S_gyzy),D(S_gyzz), SYM,ANTI,ANTI, all); + gpu_fderivs(D(S_Azz), D(S_gzzx),D(S_gzzy),D(S_gzzz), SYM,SYM,SYM, all); + + kern_phase18_constraints<<>>( + D(S_chin1), + D(S_chix), D(S_chiy), D(S_chiz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_trK), + D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz), + D(S_rho), D(S_Sx), D(S_Sy), D(S_Sz), + D(S_Kx), D(S_Ky), D(S_Kz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), + /* dA/dx arrays */ + D(S_gxxx), D(S_gxxy), D(S_gxxz), + D(S_gxyx), D(S_gxyy), D(S_gxyz), + D(S_gxzx), D(S_gxzy), D(S_gxzz), + D(S_gyyx), D(S_gyyy), D(S_gyyz), + D(S_gyzx), D(S_gyzy), D(S_gyzz), + D(S_gzzx), D(S_gzzy), D(S_gzzz), + D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res)); + } + /* ============================================================ */ /* D2H: copy all output arrays back to host */ /* ============================================================ */ @@ -3497,7 +3497,7 @@ int f_compute_rhs_bssn(int *ex, double &T, bytes); } } - + #undef D return 0; } @@ -3626,8 +3626,8 @@ int bssn_cuda_rk4_substep(void *block_tag, } else { download_state_outputs(state_host_out, all); } - if (RK4 == 3 && !use_resident_state) { - release_step_ctx(block_tag); + if (RK4 == 3) { + ctx.matter_ready = false; /* invalidate matter cache for next timestep */ } if (profile) { cuda_profile_sync();