From 0f1d0de1e7be43c8046f4cee630fd738ee12ea1d Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Sat, 25 Apr 2026 00:08:35 +0800 Subject: [PATCH] Stabilize and wire BSSN-EScalar C path --- AMSS_NCKU_source/Parallel.C | 341 ++++++++++++++------------ AMSS_NCKU_source/bssnEScalar_class.C | 289 ++++++++++++---------- AMSS_NCKU_source/bssn_class.C | 228 ++++++++++++----- AMSS_NCKU_source/bssn_escalar_rhs_c.C | 169 +++++++++++++ AMSS_NCKU_source/bssn_rhs.h | 59 +++-- AMSS_NCKU_source/makefile | 14 +- 6 files changed, 731 insertions(+), 369 deletions(-) create mode 100644 AMSS_NCKU_source/bssn_escalar_rhs_c.C diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C index 38278ab..416a260 100644 --- a/AMSS_NCKU_source/Parallel.C +++ b/AMSS_NCKU_source/Parallel.C @@ -1,14 +1,50 @@ -#include "Parallel.h" -#include "fmisc.h" -#include "prolongrestrict.h" -#include "misc.h" -#include "parameters.h" - -int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion -{ - nx = Mymax(1, shape / min_width); - nx = Mymin(cpusize, nx); +#include "Parallel.h" +#include "fmisc.h" +#include "prolongrestrict.h" +#include "misc.h" +#include "parameters.h" + +namespace +{ +enum { MAX_DATA_PACKER_VARS = 64 }; + +int expand_var_list_pack_info(MyList *src_list, MyList *dst_list, + int *src_sgfn, int *dst_sgfn, double **src_soa) +{ + int count = 0; + MyList *src_it = src_list; + MyList *dst_it = dst_list; + + while (src_it && dst_it) + { + if (count >= MAX_DATA_PACKER_VARS) + { + cout << "Parallel::data_packer: too many variables in communication list." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + src_sgfn[count] = src_it->data->sgfn; + dst_sgfn[count] = dst_it->data->sgfn; + src_soa[count] = src_it->data->SoA; + count++; + src_it = src_it->next; + dst_it = dst_it->next; + } + + if (src_it || dst_it) + { + cout << "error in short data packer, var lists does not match." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + return count; +} +} + +int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion +{ + nx = Mymax(1, shape / min_width); + nx = Mymin(cpusize, nx); return nx; } @@ -3711,11 +3747,11 @@ void Parallel::build_gstl(MyList *srci, MyList *src, MyList *dst, int rank_in, int dir, - MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); +int Parallel::data_packer(double *data, MyList *src, MyList *dst, int rank_in, int dir, + MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); int DIM = dim; @@ -3725,86 +3761,89 @@ int Parallel::data_packer(double *data, MyList *src, MyList

*varls, *varld; - - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - varls = varls->next; - varld = varld->next; - } - - if (varls || varld) - { - cout << "error in short data packer, var lists does not match." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int type; /* 1 copy, 2 restrict, 3 prolong */ - if (src->data->Bg->lev == dst->data->Bg->lev) - type = 1; - else if (src->data->Bg->lev > dst->data->Bg->lev) - type = 2; - else - type = 3; - - while (src && dst) - { - if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || - (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank)) - { - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - if (data) - { - if (dir == PACK) - switch (type) - { - // attention must be paied to the difference between src's llb,uub and dst's llb,uub - case 1: - f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub); - break; - case 2: - f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); - break; - case 3: - f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); - } - if (dir == UNPACK) // from target data to corresponding grid - f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], - dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - dst->data->llb, dst->data->uub); - } - size_out += dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2]; - varls = varls->next; - varld = varld->next; - } - } - dst = dst->next; - src = src->next; - } - + int size_out = 0; + + if (!src || !dst) + return size_out; + + int src_sgfn[MAX_DATA_PACKER_VARS]; + int dst_sgfn[MAX_DATA_PACKER_VARS]; + double *src_soa[MAX_DATA_PACKER_VARS]; + const int var_count = expand_var_list_pack_info(VarLists, VarListd, src_sgfn, dst_sgfn, src_soa); + + int type; /* 1 copy, 2 restrict, 3 prolong */ + if (src->data->Bg->lev == dst->data->Bg->lev) + type = 1; + else if (src->data->Bg->lev > dst->data->Bg->lev) + type = 2; + else + type = 3; + + while (src && dst) + { + const bool rank_match = + (dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || + (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank); + + if (rank_match) + { + const int segment_size = dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2]; + int offset = size_out; + + if (data) + { + if (dir == PACK) + { + switch (type) + { + case 1: + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + offset, + src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, + src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub); + break; + case 2: + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + offset, + src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, + src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub, + src_soa[iv], Symmetry); + break; + case 3: + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, + src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub, + dst->data->shape, data + offset, dst->data->llb, dst->data->uub, + src_soa[iv], Symmetry); + break; + default: + break; + } + } + else + { + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, + dst->data->Bg->fgfs[dst_sgfn[iv]], dst->data->llb, dst->data->uub, + dst->data->shape, data + offset, dst->data->llb, dst->data->uub); + } + } + + size_out = offset + ((!data) ? segment_size * var_count : 0); + if (data) + size_out = offset; + } + dst = dst->next; + src = src->next; + } + return size_out; } -int Parallel::data_packermix(double *data, MyList *src, MyList *dst, int rank_in, int dir, - MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); +int Parallel::data_packermix(double *data, MyList *src, MyList *dst, int rank_in, int dir, + MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); int DIM = dim; @@ -3814,33 +3853,22 @@ int Parallel::data_packermix(double *data, MyList *src, MyLis MPI_Abort(MPI_COMM_WORLD, 1); } - int size_out = 0; - - if (!src || !dst) - return size_out; - - MyList *varls, *varld; - - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - varls = varls->next; - varld = varld->next; - } - - if (varls || varld) - { - cout << "error in short data packer, var lists does not match." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int type; /* 1 copy, 2 restrict, 3 prolong */ - if (src->data->Bg->lev == dst->data->Bg->lev) - type = 1; - else if (src->data->Bg->lev > dst->data->Bg->lev) - type = 2; - else + int size_out = 0; + + if (!src || !dst) + return size_out; + + int src_sgfn[MAX_DATA_PACKER_VARS]; + int dst_sgfn[MAX_DATA_PACKER_VARS]; + double *src_soa[MAX_DATA_PACKER_VARS]; + const int var_count = expand_var_list_pack_info(VarLists, VarListd, src_sgfn, dst_sgfn, src_soa); + + int type; /* 1 copy, 2 restrict, 3 prolong */ + if (src->data->Bg->lev == dst->data->Bg->lev) + type = 1; + else if (src->data->Bg->lev > dst->data->Bg->lev) + type = 2; + else type = 3; if (type != 3) @@ -3848,37 +3876,48 @@ int Parallel::data_packermix(double *data, MyList *src, MyLis cout << "Parallel::data_packermix: error type " << type << " for data_packermix." << endl; MPI_Abort(MPI_COMM_WORLD, 1); } - - while (src && dst) - { - if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || - (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank)) - { - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - if (data) - { - if (dir == PACK) - f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, src->data->shape, data + size_out, - src->data->llb, src->data->uub, varls->data->SoA, Symmetry); - if (dir == UNPACK) // from target data to corresponding grid - f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], - src->data->llb, src->data->uub, src->data->shape, data + size_out, - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry, dst->data->illb, dst->data->iuub); - } - // the symmetry problem should be dealt in prolongcopy3, - // so we always have ghost_width for both sides - size_out += (src->data->shape[0] + 2 * ghost_width) * (src->data->shape[1] + 2 * ghost_width) * (src->data->shape[2] + 2 * ghost_width); - varls = varls->next; - varld = varld->next; - } - } - dst = dst->next; - src = src->next; - } + + while (src && dst) + { + const bool rank_match = + (dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || + (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank); + + if (rank_match) + { + const int segment_size = + (src->data->shape[0] + 2 * ghost_width) * + (src->data->shape[1] + 2 * ghost_width) * + (src->data->shape[2] + 2 * ghost_width); + int offset = size_out; + + if (data) + { + if (dir == PACK) + { + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, + src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub, + src->data->shape, data + offset, src->data->llb, src->data->uub, + src_soa[iv], Symmetry); + } + else + { + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, + dst->data->Bg->fgfs[dst_sgfn[iv]], src->data->llb, src->data->uub, + src->data->shape, data + offset, dst->data->llb, dst->data->uub, + src_soa[iv], Symmetry, dst->data->illb, dst->data->iuub); + } + } + + size_out = offset + ((!data) ? segment_size * var_count : 0); + if (data) + size_out = offset; + } + dst = dst->next; + src = src->next; + } return size_out; } diff --git a/AMSS_NCKU_source/bssnEScalar_class.C b/AMSS_NCKU_source/bssnEScalar_class.C index c1e71cd..ad07ae3 100644 --- a/AMSS_NCKU_source/bssnEScalar_class.C +++ b/AMSS_NCKU_source/bssnEScalar_class.C @@ -74,8 +74,8 @@ bssnEScalar_class::bssnEScalar_class(double Couranti, double StartTimei, double //================================================================================================ -void bssnEScalar_class::Initialize() -{ +void bssnEScalar_class::Initialize() +{ Sphio = new var("Sphio", ngfs++, 1, 1, 1); Spio = new var("Spio", ngfs++, 1, 1, 1); Sphi0 = new var("Sphi0", ngfs++, 1, 1, 1); @@ -132,11 +132,14 @@ void bssnEScalar_class::Initialize() } } - GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor); - if (checkrun) - CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry); - else - GH->compose_cgh(nprocs); + GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor); + ConstraintRefreshLevels = new int[GH->levels]; + for (int il = 0; il < GH->levels; il++) + ConstraintRefreshLevels[il] = 0; + if (checkrun) + CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry); + else + GH->compose_cgh(nprocs); #ifdef WithShell SH = new ShellPatch(0, ngfs, pname, Symmetry, myrank, ErrorMonitor); @@ -160,12 +163,20 @@ void bssnEScalar_class::Initialize() { CheckPoint->read_Black_Hole_position(BH_num_input, BH_num, Porg0, Pmom, Spin, Mass, Porgbr, Porg, Porg1, Porg_rhs); } - else - { - PhysTime = StartTime; - Setup_Black_Hole_position(); - } -} + else + { + PhysTime = StartTime; + Setup_Black_Hole_position(); + } + + // BSSN-EScalar currently uses the uncached communication fallback paths. + sync_cache_pre = 0; + sync_cache_cor = 0; + sync_cache_rp_coarse = 0; + sync_cache_rp_fine = 0; + sync_cache_restrict = 0; + sync_cache_outbd = 0; +} //================================================================================================ @@ -207,10 +218,10 @@ bssnEScalar_class::~bssnEScalar_class() // Read initial data solved by Ansorg, PRD 70, 064011 (2004) -void bssnEScalar_class::Read_Ansorg() -{ - if (!checkrun) - { +void bssnEScalar_class::Read_Ansorg() +{ + if (!checkrun) + { if (myrank == 0) cout << "Read initial data from Ansorg's solver," << " please be sure the input parameters for black holes are puncture parameters!!" @@ -227,9 +238,12 @@ void bssnEScalar_class::Read_Ansorg() cout << "Error inputpar" << endl; exit(0); } - } - int BH_NM; - double *Porg_here; + } + int BH_NM; + double *Porg_here; + double *pmom_local; + double *spin_local; + double *mass_local; // read parameter from file { const int LEN = 256; @@ -269,11 +283,11 @@ void bssnEScalar_class::Read_Ansorg() } inf.close(); } - - Porg_here = new double[3 * BH_NM]; - Pmom = new double[3 * BH_NM]; - Spin = new double[3 * BH_NM]; - Mass = new double[BH_NM]; + + Porg_here = new double[3 * BH_NM]; + pmom_local = new double[3 * BH_NM]; + spin_local = new double[3 * BH_NM]; + mass_local = new double[BH_NM]; // read parameter from file { const int LEN = 256; @@ -305,31 +319,31 @@ void bssnEScalar_class::Read_Ansorg() else if (status == 0) continue; - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); + if (sgrp == "BSSN" && sind < BH_NM) + { + if (skey == "Mass") + mass_local[sind] = atof(sval.c_str()); + else if (skey == "Porgx") + Porg_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Porgy") + Porg_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Porgz") + Porg_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Spinx") + spin_local[sind * 3] = atof(sval.c_str()); + else if (skey == "Spiny") + spin_local[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Spinz") + spin_local[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Pmomx") + pmom_local[sind * 3] = atof(sval.c_str()); + else if (skey == "Pmomy") + pmom_local[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Pmomz") + pmom_local[sind * 3 + 2] = atof(sval.c_str()); + } + } + inf.close(); } int order = 6; Ansorg read_ansorg("Ansorg.psid", order); @@ -358,11 +372,11 @@ void bssnEScalar_class::Read_Ansorg() cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], - Mass, Porg_here, Pmom, Spin, BH_NM); + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], + mass_local, Porg_here, pmom_local, spin_local, BH_NM); } if (BL == Pp->data->ble) break; @@ -400,11 +414,11 @@ void bssnEScalar_class::Read_Ansorg() cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], - Mass, Porg_here, Pmom, Spin, BH_NM); + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], + mass_local, Porg_here, pmom_local, spin_local, BH_NM); } if (BL == Pp->data->ble) break; @@ -413,12 +427,15 @@ void bssnEScalar_class::Read_Ansorg() Pp = Pp->next; } #endif - - delete[] Porg_here; - // dump read_in initial data - // for(int lev=0;levlevels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT); - } -} + + delete[] Porg_here; + delete[] pmom_local; + delete[] spin_local; + delete[] mass_local; + // dump read_in initial data + // for(int lev=0;levlevels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT); + } +} //================================================================================================ @@ -432,10 +449,10 @@ void bssnEScalar_class::Read_Ansorg() // Read initial data solved by Pablo's Olliptic Phys.Rev.D 82 024005 (2010) -void bssnEScalar_class::Read_Pablo() -{ - if (!checkrun) - { +void bssnEScalar_class::Read_Pablo() +{ + if (!checkrun) + { if (myrank == 0) cout << "Read initial data from Pablo's solver," << " please be sure the input parameters for black holes are puncture parameters!!" @@ -452,9 +469,12 @@ void bssnEScalar_class::Read_Pablo() cout << "Error inputpar" << endl; exit(0); } - } - int BH_NM; - double *Porg_here; + } + int BH_NM; + double *Porg_here; + double *pmom_local; + double *spin_local; + double *mass_local; // read parameter from file { const int LEN = 256; @@ -494,11 +514,11 @@ void bssnEScalar_class::Read_Pablo() } inf.close(); } - - Porg_here = new double[3 * BH_NM]; - Pmom = new double[3 * BH_NM]; - Spin = new double[3 * BH_NM]; - Mass = new double[BH_NM]; + + Porg_here = new double[3 * BH_NM]; + pmom_local = new double[3 * BH_NM]; + spin_local = new double[3 * BH_NM]; + mass_local = new double[BH_NM]; // read parameter from file { const int LEN = 256; @@ -530,31 +550,31 @@ void bssnEScalar_class::Read_Pablo() else if (status == 0) continue; - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); + if (sgrp == "BSSN" && sind < BH_NM) + { + if (skey == "Mass") + mass_local[sind] = atof(sval.c_str()); + else if (skey == "Porgx") + Porg_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Porgy") + Porg_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Porgz") + Porg_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Spinx") + spin_local[sind * 3] = atof(sval.c_str()); + else if (skey == "Spiny") + spin_local[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Spinz") + spin_local[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Pmomx") + pmom_local[sind * 3] = atof(sval.c_str()); + else if (skey == "Pmomy") + pmom_local[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Pmomz") + pmom_local[sind * 3 + 2] = atof(sval.c_str()); + } + } + inf.close(); } bool flag = false; int DIM = dim; @@ -594,11 +614,11 @@ void bssnEScalar_class::Read_Pablo() cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], - Mass, Porg_here, Pmom, Spin, BH_NM); + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], + mass_local, Porg_here, pmom_local, spin_local, BH_NM); } if (BL == Pp->data->ble) break; @@ -658,11 +678,11 @@ void bssnEScalar_class::Read_Pablo() cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], - Mass, Porg_here, Pmom, Spin, BH_NM); + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], + mass_local, Porg_here, pmom_local, spin_local, BH_NM); } if (BL == Pp->data->ble) break; @@ -684,10 +704,13 @@ void bssnEScalar_class::Read_Pablo() Pp = Pp->next; } #endif - - delete[] Porg_here; - if (flag && myrank == 0) - MPI_Abort(MPI_COMM_WORLD, 1); + + delete[] Porg_here; + delete[] pmom_local; + delete[] spin_local; + delete[] mass_local; + if (flag && myrank == 0) + MPI_Abort(MPI_COMM_WORLD, 1); // dump read_in initial data for (int lev = 0; lev < GH->levels; lev++) Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT); @@ -739,10 +762,10 @@ void bssnEScalar_class::Step(int lev, int YN) cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); #endif - if (f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + if (f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], @@ -1081,10 +1104,10 @@ void bssnEScalar_class::Step(int lev, int YN) cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); #endif - if (f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + if (f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], @@ -1858,10 +1881,10 @@ void bssnEScalar_class::Interp_Constraint() if (myrank == cg->rank) { if (lev > 0) - f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], @@ -2078,10 +2101,10 @@ void bssnEScalar_class::Constraint_Out() if (myrank == cg->rank) { if (lev > 0) - f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index 376df07..5b0645f 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -280,9 +280,9 @@ namespace rhs_kernel_timing_report //================================================================================================ -bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, - double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, - int Symmetryi, int checkruni, char *checkfilenamei, +bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, + double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, + int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi, int a_levi, int maxli, int decni, double maxrexi, double drexi) : Courant(Couranti), StartTime(StartTimei), TotalTime(TotalTimei), @@ -295,12 +295,34 @@ bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, ConstraintRefreshLevels(0), CheckPoint(0) // CheckPoint(0) -{ - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - // setup Monitors - { +{ + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + // Derived classes override Initialize(), so ownership-sensitive members must + // be in a known state before any specialized setup path runs. + GH = 0; + SH = 0; + PhysTime = 0.0; + BH_num = 0; + BH_num_input = 0; + Porg0 = 0; + Porgbr = 0; + Porg = 0; + Porg1 = 0; + Porg_rhs = 0; + Mass = 0; + Pmom = 0; + Spin = 0; + sync_cache_pre = 0; + sync_cache_cor = 0; + sync_cache_rp_coarse = 0; + sync_cache_rp_fine = 0; + sync_cache_restrict = 0; + sync_cache_outbd = 0; + + // setup Monitors + { stringstream a_stream; a_stream.setf(ios::left); a_stream << "# Error log information"; @@ -986,13 +1008,21 @@ void bssn_class::Initialize() Setup_Black_Hole_position(); } - // Initialize sync caches (per-level, for predictor and corrector) - sync_cache_pre = new Parallel::SyncCache[GH->levels]; - sync_cache_cor = new Parallel::SyncCache[GH->levels]; - sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels]; - sync_cache_rp_fine = new Parallel::SyncCache[GH->levels]; - sync_cache_restrict = new Parallel::SyncCache[GH->levels]; - sync_cache_outbd = new Parallel::SyncCache[GH->levels]; + // BSSN-EScalar uses the uncached communication fallback paths. + sync_cache_pre = 0; + sync_cache_cor = 0; + sync_cache_rp_coarse = 0; + sync_cache_rp_fine = 0; + sync_cache_restrict = 0; + sync_cache_outbd = 0; +#if (ABEtype != 1) + sync_cache_pre = new Parallel::SyncCache[GH->levels]; + sync_cache_cor = new Parallel::SyncCache[GH->levels]; + sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels]; + sync_cache_rp_fine = new Parallel::SyncCache[GH->levels]; + sync_cache_restrict = new Parallel::SyncCache[GH->levels]; + sync_cache_outbd = new Parallel::SyncCache[GH->levels]; +#endif } //================================================================================================ @@ -1005,9 +1035,16 @@ void bssn_class::Initialize() //================================================================================================ -bssn_class::~bssn_class() -{ -#ifdef With_AHF +bssn_class::~bssn_class() +{ +#if (ABEtype == 1) + if (myrank == 0) + { + cout << "[dtor] begin" << endl; + cout.flush(); + } +#endif +#ifdef With_AHF AHList->clearList(); AHDList->clearList(); GaugeList->clearList(); @@ -1041,6 +1078,13 @@ bssn_class::~bssn_class() ConstraintList->clearList(); delete[] ConstraintRefreshLevels; +#if (ABEtype == 1) + if (myrank == 0) + { + cout << "[dtor] lists cleared" << endl; + cout.flush(); + } +#endif delete phio; delete trKo; @@ -1216,8 +1260,15 @@ bssn_class::~bssn_class() delete Cons_Py; delete Cons_Pz; delete Cons_Gx; - delete Cons_Gy; - delete Cons_Gz; + delete Cons_Gy; + delete Cons_Gz; +#if (ABEtype == 1) + if (myrank == 0) + { + cout << "[dtor] core vars freed" << endl; + cout.flush(); + } +#endif #ifdef Point_Psi4 delete phix; @@ -1247,35 +1298,73 @@ bssn_class::~bssn_class() #endif // Destroy sync caches before GH - if (sync_cache_pre) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_pre[i].destroy(); - delete[] sync_cache_pre; - } - if (sync_cache_cor) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_cor[i].destroy(); - delete[] sync_cache_cor; - } - if (sync_cache_rp_coarse) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_rp_coarse[i].destroy(); - delete[] sync_cache_rp_coarse; - } - if (sync_cache_rp_fine) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_rp_fine[i].destroy(); - delete[] sync_cache_rp_fine; - } - - delete GH; -#ifdef WithShell - delete SH; -#endif + if (sync_cache_pre) + { +#if (ABEtype != 1) + for (int i = 0; i < GH->levels; i++) + sync_cache_pre[i].destroy(); +#endif + delete[] sync_cache_pre; + } + if (sync_cache_cor) + { +#if (ABEtype != 1) + for (int i = 0; i < GH->levels; i++) + sync_cache_cor[i].destroy(); +#endif + delete[] sync_cache_cor; + } + if (sync_cache_rp_coarse) + { +#if (ABEtype != 1) + for (int i = 0; i < GH->levels; i++) + sync_cache_rp_coarse[i].destroy(); +#endif + delete[] sync_cache_rp_coarse; + } + if (sync_cache_rp_fine) + { +#if (ABEtype != 1) + for (int i = 0; i < GH->levels; i++) + sync_cache_rp_fine[i].destroy(); +#endif + delete[] sync_cache_rp_fine; + } + if (sync_cache_restrict) + { +#if (ABEtype != 1) + for (int i = 0; i < GH->levels; i++) + sync_cache_restrict[i].destroy(); +#endif + delete[] sync_cache_restrict; + } + if (sync_cache_outbd) + { +#if (ABEtype != 1) + for (int i = 0; i < GH->levels; i++) + sync_cache_outbd[i].destroy(); +#endif + delete[] sync_cache_outbd; + } +#if (ABEtype == 1) + if (myrank == 0) + { + cout << "[dtor] caches freed" << endl; + cout.flush(); + } +#endif + + delete GH; +#ifdef WithShell + delete SH; +#endif +#if (ABEtype == 1) + if (myrank == 0) + { + cout << "[dtor] grids freed" << endl; + cout.flush(); + } +#endif for (int i = 0; i < BH_num; i++) { @@ -1292,20 +1381,41 @@ bssn_class::~bssn_class() delete[] Porg1; delete[] Porg_rhs; - delete[] Mass; - delete[] Spin; - delete[] Pmom; - - delete ErrorMonitor; - delete Psi4Monitor; + delete[] Mass; + delete[] Spin; + delete[] Pmom; +#if (ABEtype == 1) + if (myrank == 0) + { + cout << "[dtor] puncture arrays freed" << endl; + cout.flush(); + } +#endif + + delete ErrorMonitor; + delete Psi4Monitor; delete BHMonitor; delete MAPMonitor; delete ConVMonitor; delete TimingMonitor; delete Waveshell; - - delete CheckPoint; -} +#if (ABEtype == 1) + if (myrank == 0) + { + cout << "[dtor] monitors freed" << endl; + cout.flush(); + } +#endif + + delete CheckPoint; +#if (ABEtype == 1) + if (myrank == 0) + { + cout << "[dtor] checkpoint freed" << endl; + cout.flush(); + } +#endif +} //================================================================================================ diff --git a/AMSS_NCKU_source/bssn_escalar_rhs_c.C b/AMSS_NCKU_source/bssn_escalar_rhs_c.C new file mode 100644 index 0000000..6785af1 --- /dev/null +++ b/AMSS_NCKU_source/bssn_escalar_rhs_c.C @@ -0,0 +1,169 @@ +#include "macrodef.h" +#include "bssn_rhs.h" +#include "share_func.h" +#include "tool.h" +#include + +namespace +{ + // Reuse the temporary workspace across block calls to avoid repeated heap churn + // in the EScalar wrapper. MPI ranks execute this path sequentially, so a single + // process-local buffer is sufficient here. + std::vector g_escalar_tmp_store; +} + +#ifdef fortran1 +#define f_frpotential frpotential +#endif +#ifdef fortran2 +#define f_frpotential FRPOTENTIAL +#endif +#ifdef fortran3 +#define f_frpotential frpotential_ +#endif + +extern "C" +{ + void f_frpotential(int *, double *, double *, double *); +} + +int f_compute_rhs_bssn_escalar_c(int *ex, double &T, + double *X, double *Y, double *Z, + double *chi, double *trK, + double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, + double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, + double *Gamx, double *Gamy, double *Gamz, + double *Lap, double *betax, double *betay, double *betaz, + double *dtSfx, double *dtSfy, double *dtSfz, + double *Sphi, double *Spi, + double *chi_rhs, double *trK_rhs, + double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, + double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, + double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, + double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, + double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, + double *Sphi_rhs, double *Spi_rhs, + double *rho, double *Sx, double *Sy, double *Sz, + double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, + double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, + double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, + double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, + double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, + double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, + double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, + int &Symmetry, int &Lev, double &eps, int &co) +{ + const int nx = ex[0], ny = ex[1], nz = ex[2]; + const int all = nx * ny * nz; + + const size_t workspace_size = size_t(all) * 17; + if (g_escalar_tmp_store.size() < workspace_size) + g_escalar_tmp_store.resize(workspace_size); + + double *tmp_ptr = g_escalar_tmp_store.data(); + auto alloc_tmp = [&](int n = 1) -> double * + { + double *ptr = tmp_ptr; + tmp_ptr += size_t(all) * n; + return ptr; + }; + + double *chix = alloc_tmp(), *chiy = alloc_tmp(), *chiz = alloc_tmp(); + double *Kx = alloc_tmp(), *Ky = alloc_tmp(), *Kz = alloc_tmp(); + double *fxx = alloc_tmp(), *fxy = alloc_tmp(), *fxz = alloc_tmp(); + double *fyy = alloc_tmp(), *fyz = alloc_tmp(), *fzz = alloc_tmp(); + double *Lapx = alloc_tmp(), *Lapy = alloc_tmp(), *Lapz = alloc_tmp(); + double *V = alloc_tmp(), *dVdSphi = alloc_tmp(); + + const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, HALF = 0.5; + const double SSS[3] = {1.0, 1.0, 1.0}; + + fderivs(ex, chi, chix, chiy, chiz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev); + fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev); + fderivs(ex, Sphi, Kx, Ky, Kz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev); + fdderivs(ex, Sphi, fxx, fxy, fxz, fyy, fyz, fzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev); + + f_frpotential(ex, Sphi, V, dVdSphi); + + for (int i = 0; i < all; ++i) + { + const double alpn1 = Lap[i] + ONE; + const double chin1 = chi[i] + ONE; + const double gxx = dxx[i] + ONE; + const double gyy = dyy[i] + ONE; + const double gzz = dzz[i] + ONE; + const double det = gxx * gyy * gzz + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i] + - gxz[i] * gyy * gxz[i] - gxy[i] * gxy[i] * gzz - gxx * gyz[i] * gyz[i]; + const double gupxx = (gyy * gzz - gyz[i] * gyz[i]) / det; + const double gupxy = -(gxy[i] * gzz - gyz[i] * gxz[i]) / det; + const double gupxz = (gxy[i] * gyz[i] - gyy * gxz[i]) / det; + const double gupyy = (gxx * gzz - gxz[i] * gxz[i]) / det; + const double gupyz = -(gxx * gyz[i] - gxy[i] * gxz[i]) / det; + const double gupzz = (gxx * gyy - gxy[i] * gxy[i]) / det; + + Sphi_rhs[i] = alpn1 * Spi[i]; + + Spi_rhs[i] = gupxx * fxx[i] + gupyy * fyy[i] + gupzz * fzz[i] + + TWO * (gupxy * fxy[i] + gupxz * fxz[i] + gupyz * fyz[i]) + - ((Gamx[i] + (gupxx * chix[i] + gupxy * chiy[i] + gupxz * chiz[i]) / TWO / chin1) * Kx[i] + + (Gamy[i] + (gupxy * chix[i] + gupyy * chiy[i] + gupyz * chiz[i]) / TWO / chin1) * Ky[i] + + (Gamz[i] + (gupxz * chix[i] + gupyz * chiy[i] + gupzz * chiz[i]) / TWO / chin1) * Kz[i]); + + Spi_rhs[i] = Spi_rhs[i] * alpn1 + + gupxx * Lapx[i] * Kx[i] + gupxy * Lapx[i] * Ky[i] + gupxz * Lapx[i] * Kz[i] + + gupxy * Lapy[i] * Kx[i] + gupyy * Lapy[i] * Ky[i] + gupyz * Lapy[i] * Kz[i] + + gupxz * Lapz[i] * Kx[i] + gupyz * Lapz[i] * Ky[i] + gupzz * Lapz[i] * Kz[i]; + + Spi_rhs[i] = Spi_rhs[i] * chin1 + alpn1 * (trK[i] * Spi[i] - dVdSphi[i]); + + rho[i] = chin1 * ((gupxx * Kx[i] * Kx[i] + gupyy * Ky[i] * Ky[i] + gupzz * Kz[i] * Kz[i]) * HALF + + gupxy * Kx[i] * Ky[i] + gupxz * Kx[i] * Kz[i] + gupyz * Ky[i] * Kz[i]) + + Spi[i] * Spi[i] * HALF + V[i]; + Sx[i] = -Spi[i] * Kx[i]; + Sy[i] = -Spi[i] * Ky[i]; + Sz[i] = -Spi[i] * Kz[i]; + + const double pressure = (rho[i] - Spi[i] * Spi[i]) / chin1; + Sxx[i] = Kx[i] * Kx[i] - pressure * gxx; + Sxy[i] = Kx[i] * Ky[i] - pressure * gxy[i]; + Sxz[i] = Kx[i] * Kz[i] - pressure * gxz[i]; + Syy[i] = Ky[i] * Ky[i] - pressure * gyy; + Syz[i] = Ky[i] * Kz[i] - pressure * gyz[i]; + Szz[i] = Kz[i] * Kz[i] - pressure * gzz; + } + + if (f_compute_rhs_bssn(ex, T, X, Y, Z, + chi, trK, + dxx, gxy, gxz, dyy, gyz, dzz, + Axx, Axy, Axz, Ayy, Ayz, Azz, + Gamx, Gamy, Gamz, + Lap, betax, betay, betaz, + dtSfx, dtSfy, dtSfz, + chi_rhs, trK_rhs, + gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, + Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, + Gamx_rhs, Gamy_rhs, Gamz_rhs, + Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, + dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, + rho, Sx, Sy, Sz, + Sxx, Sxy, Sxz, Syy, Syz, Szz, + Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz, + Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz, + Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz, + Rxx, Rxy, Rxz, Ryy, Ryz, Rzz, + ham_Res, movx_Res, movy_Res, movz_Res, + Gmx_Res, Gmy_Res, Gmz_Res, + Symmetry, Lev, eps, co)) + return 1; + + lopsided_kodis(ex, X, Y, Z, Sphi, Sphi_rhs, betax, betay, betaz, Symmetry, SSS, eps); + lopsided_kodis(ex, X, Y, Z, Spi, Spi_rhs, betax, betay, betaz, Symmetry, SSS, eps); + + for (int i = 0; i < all; ++i) + { + if (Sphi_rhs[i] != Sphi_rhs[i] || Spi_rhs[i] != Spi_rhs[i] || rho[i] != rho[i]) + return 1; + } + + return 0; +} diff --git a/AMSS_NCKU_source/bssn_rhs.h b/AMSS_NCKU_source/bssn_rhs.h index 96a545a..4384118 100644 --- a/AMSS_NCKU_source/bssn_rhs.h +++ b/AMSS_NCKU_source/bssn_rhs.h @@ -63,13 +63,34 @@ extern "C" double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Ricci - double *, double *, double *, double *, double *, double *, double *, // constraint violation - int &, int &, double &, int &); -} - -extern "C" -{ - int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); +} + +int f_compute_rhs_bssn_escalar_c(int *, double &, double *, double *, double *, // ex,T,X,Y,Z + double *, double *, // chi, trK + double *, double *, double *, double *, double *, double *, // gij + double *, double *, double *, double *, double *, double *, // Aij + double *, double *, double *, // Gam + double *, double *, double *, double *, double *, double *, double *, // Gauge + double *, double *, // Sphi, Spi + double *, double *, // chi, trK + double *, double *, double *, double *, double *, double *, // gij + double *, double *, double *, double *, double *, double *, // Aij + double *, double *, double *, // Gam + double *, double *, double *, double *, double *, double *, double *, // Gauge + double *, double *, // Sphi, Spi + double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Ricci + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); + +extern "C" +{ + int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R double *, double *, double *, // X,Y,Z double *, double *, double *, // drhodx,drhody,drhodz double *, double *, double *, // dsigmadx,dsigmady,dsigmadz @@ -96,10 +117,10 @@ extern "C" int &, int &, double &, int &, int &); } -extern "C" -{ - int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *, // ex,T,X,Y,Z - double *, double *, // chi, trK +extern "C" +{ + int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *, // ex,T,X,Y,Z + double *, double *, // chi, trK double *, double *, double *, double *, double *, double *, // gij double *, double *, double *, double *, double *, double *, // Aij double *, double *, double *, // Gam @@ -116,14 +137,14 @@ extern "C" double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Ricci - double *, double *, double *, double *, double *, double *, double *, // constraint violation - int &, int &, double &, int &); -} - -extern "C" -{ - int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R - double *, double *, double *, // X,Y,Z + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); +} + +extern "C" +{ + int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R + double *, double *, double *, // X,Y,Z double *, double *, double *, // drhodx,drhody,drhodz double *, double *, double *, // dsigmadx,dsigmady,dsigmadz double *, double *, double *, // dRdx,dRdy,dRdz diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index 72b9cbd..8953ecd 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -46,11 +46,11 @@ endif $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) # C rewrite of BSSN RHS kernel and helpers -bssn_rhs_c.o: bssn_rhs_c.C - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ - -fderivs_c.o: fderivs_c.C - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ +bssn_rhs_c.o: bssn_rhs_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ + +fderivs_c.o: fderivs_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ fdderivs_c.o: fdderivs_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ @@ -86,8 +86,8 @@ ifeq ($(USE_CXX_KERNELS),0) # Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below CFILES = else -# C++ mode (default): C rewrite of bssn_rhs and helper kernels -CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o +# C++ mode (default): C rewrite of bssn/bssn-escalar rhs and helper kernels +CFILES = bssn_rhs_c.o bssn_escalar_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o endif ## RK4 kernel switch (independent from USE_CXX_KERNELS)