diff --git a/AMSS_NCKU_Program.py b/AMSS_NCKU_Program.py index 2d777cd..8aa5d01 100755 --- a/AMSS_NCKU_Program.py +++ b/AMSS_NCKU_Program.py @@ -174,11 +174,14 @@ import generate_macrodef generate_macrodef.generate_macrodef_h() print( " AMSS-NCKU macro file macrodef.h has been generated. " ) -generate_macrodef.generate_macrodef_fh() -print( " AMSS-NCKU macro file macrodef.fh has been generated. " ) - - -################################################################## +generate_macrodef.generate_macrodef_fh() +print( " AMSS-NCKU macro file macrodef.fh has been generated. " ) + +generate_macrodef.generate_build_config() +print( " AMSS-NCKU build config AMSS_NCKU_build.mk has been generated. " ) + + +################################################################## # Compile the AMSS-NCKU program according to user requirements @@ -217,11 +220,13 @@ shutil.copytree(AMSS_NCKU_source_path, AMSS_NCKU_source_copy) # Copy the generated macro files into the AMSS_NCKU source folder -macrodef_h_path = os.path.join(File_directory, "macrodef.h") -macrodef_fh_path = os.path.join(File_directory, "macrodef.fh") - -shutil.copy2(macrodef_h_path, AMSS_NCKU_source_copy) -shutil.copy2(macrodef_fh_path, AMSS_NCKU_source_copy) +macrodef_h_path = os.path.join(File_directory, "macrodef.h") +macrodef_fh_path = os.path.join(File_directory, "macrodef.fh") +build_config_path = os.path.join(File_directory, "AMSS_NCKU_build.mk") + +shutil.copy2(macrodef_h_path, AMSS_NCKU_source_copy) +shutil.copy2(macrodef_fh_path, AMSS_NCKU_source_copy) +shutil.copy2(build_config_path, AMSS_NCKU_source_copy) # Notes on copying files: # shutil.copy2 preserves file metadata such as modification time. diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C index 38278ab..416a260 100644 --- a/AMSS_NCKU_source/Parallel.C +++ b/AMSS_NCKU_source/Parallel.C @@ -1,14 +1,50 @@ -#include "Parallel.h" -#include "fmisc.h" -#include "prolongrestrict.h" -#include "misc.h" -#include "parameters.h" - -int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion -{ - nx = Mymax(1, shape / min_width); - nx = Mymin(cpusize, nx); +#include "Parallel.h" +#include "fmisc.h" +#include "prolongrestrict.h" +#include "misc.h" +#include "parameters.h" + +namespace +{ +enum { MAX_DATA_PACKER_VARS = 64 }; + +int expand_var_list_pack_info(MyList *src_list, MyList *dst_list, + int *src_sgfn, int *dst_sgfn, double **src_soa) +{ + int count = 0; + MyList *src_it = src_list; + MyList *dst_it = dst_list; + + while (src_it && dst_it) + { + if (count >= MAX_DATA_PACKER_VARS) + { + cout << "Parallel::data_packer: too many variables in communication list." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + src_sgfn[count] = src_it->data->sgfn; + dst_sgfn[count] = dst_it->data->sgfn; + src_soa[count] = src_it->data->SoA; + count++; + src_it = src_it->next; + dst_it = dst_it->next; + } + + if (src_it || dst_it) + { + cout << "error in short data packer, var lists does not match." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + return count; +} +} + +int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion +{ + nx = Mymax(1, shape / min_width); + nx = Mymin(cpusize, nx); return nx; } @@ -3711,11 +3747,11 @@ void Parallel::build_gstl(MyList *srci, MyList *src, MyList *dst, int rank_in, int dir, - MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); +int Parallel::data_packer(double *data, MyList *src, MyList *dst, int rank_in, int dir, + MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); int DIM = dim; @@ -3725,86 +3761,89 @@ int Parallel::data_packer(double *data, MyList *src, MyList

*varls, *varld; - - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - varls = varls->next; - varld = varld->next; - } - - if (varls || varld) - { - cout << "error in short data packer, var lists does not match." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int type; /* 1 copy, 2 restrict, 3 prolong */ - if (src->data->Bg->lev == dst->data->Bg->lev) - type = 1; - else if (src->data->Bg->lev > dst->data->Bg->lev) - type = 2; - else - type = 3; - - while (src && dst) - { - if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || - (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank)) - { - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - if (data) - { - if (dir == PACK) - switch (type) - { - // attention must be paied to the difference between src's llb,uub and dst's llb,uub - case 1: - f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub); - break; - case 2: - f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); - break; - case 3: - f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); - } - if (dir == UNPACK) // from target data to corresponding grid - f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], - dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - dst->data->llb, dst->data->uub); - } - size_out += dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2]; - varls = varls->next; - varld = varld->next; - } - } - dst = dst->next; - src = src->next; - } - + int size_out = 0; + + if (!src || !dst) + return size_out; + + int src_sgfn[MAX_DATA_PACKER_VARS]; + int dst_sgfn[MAX_DATA_PACKER_VARS]; + double *src_soa[MAX_DATA_PACKER_VARS]; + const int var_count = expand_var_list_pack_info(VarLists, VarListd, src_sgfn, dst_sgfn, src_soa); + + int type; /* 1 copy, 2 restrict, 3 prolong */ + if (src->data->Bg->lev == dst->data->Bg->lev) + type = 1; + else if (src->data->Bg->lev > dst->data->Bg->lev) + type = 2; + else + type = 3; + + while (src && dst) + { + const bool rank_match = + (dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || + (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank); + + if (rank_match) + { + const int segment_size = dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2]; + int offset = size_out; + + if (data) + { + if (dir == PACK) + { + switch (type) + { + case 1: + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + offset, + src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, + src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub); + break; + case 2: + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + offset, + src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, + src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub, + src_soa[iv], Symmetry); + break; + case 3: + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, + src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub, + dst->data->shape, data + offset, dst->data->llb, dst->data->uub, + src_soa[iv], Symmetry); + break; + default: + break; + } + } + else + { + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, + dst->data->Bg->fgfs[dst_sgfn[iv]], dst->data->llb, dst->data->uub, + dst->data->shape, data + offset, dst->data->llb, dst->data->uub); + } + } + + size_out = offset + ((!data) ? segment_size * var_count : 0); + if (data) + size_out = offset; + } + dst = dst->next; + src = src->next; + } + return size_out; } -int Parallel::data_packermix(double *data, MyList *src, MyList *dst, int rank_in, int dir, - MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); +int Parallel::data_packermix(double *data, MyList *src, MyList *dst, int rank_in, int dir, + MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); int DIM = dim; @@ -3814,33 +3853,22 @@ int Parallel::data_packermix(double *data, MyList *src, MyLis MPI_Abort(MPI_COMM_WORLD, 1); } - int size_out = 0; - - if (!src || !dst) - return size_out; - - MyList *varls, *varld; - - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - varls = varls->next; - varld = varld->next; - } - - if (varls || varld) - { - cout << "error in short data packer, var lists does not match." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int type; /* 1 copy, 2 restrict, 3 prolong */ - if (src->data->Bg->lev == dst->data->Bg->lev) - type = 1; - else if (src->data->Bg->lev > dst->data->Bg->lev) - type = 2; - else + int size_out = 0; + + if (!src || !dst) + return size_out; + + int src_sgfn[MAX_DATA_PACKER_VARS]; + int dst_sgfn[MAX_DATA_PACKER_VARS]; + double *src_soa[MAX_DATA_PACKER_VARS]; + const int var_count = expand_var_list_pack_info(VarLists, VarListd, src_sgfn, dst_sgfn, src_soa); + + int type; /* 1 copy, 2 restrict, 3 prolong */ + if (src->data->Bg->lev == dst->data->Bg->lev) + type = 1; + else if (src->data->Bg->lev > dst->data->Bg->lev) + type = 2; + else type = 3; if (type != 3) @@ -3848,37 +3876,48 @@ int Parallel::data_packermix(double *data, MyList *src, MyLis cout << "Parallel::data_packermix: error type " << type << " for data_packermix." << endl; MPI_Abort(MPI_COMM_WORLD, 1); } - - while (src && dst) - { - if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || - (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank)) - { - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - if (data) - { - if (dir == PACK) - f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, src->data->shape, data + size_out, - src->data->llb, src->data->uub, varls->data->SoA, Symmetry); - if (dir == UNPACK) // from target data to corresponding grid - f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], - src->data->llb, src->data->uub, src->data->shape, data + size_out, - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry, dst->data->illb, dst->data->iuub); - } - // the symmetry problem should be dealt in prolongcopy3, - // so we always have ghost_width for both sides - size_out += (src->data->shape[0] + 2 * ghost_width) * (src->data->shape[1] + 2 * ghost_width) * (src->data->shape[2] + 2 * ghost_width); - varls = varls->next; - varld = varld->next; - } - } - dst = dst->next; - src = src->next; - } + + while (src && dst) + { + const bool rank_match = + (dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || + (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank); + + if (rank_match) + { + const int segment_size = + (src->data->shape[0] + 2 * ghost_width) * + (src->data->shape[1] + 2 * ghost_width) * + (src->data->shape[2] + 2 * ghost_width); + int offset = size_out; + + if (data) + { + if (dir == PACK) + { + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, + src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub, + src->data->shape, data + offset, src->data->llb, src->data->uub, + src_soa[iv], Symmetry); + } + else + { + for (int iv = 0; iv < var_count; iv++, offset += segment_size) + f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, + dst->data->Bg->fgfs[dst_sgfn[iv]], src->data->llb, src->data->uub, + src->data->shape, data + offset, dst->data->llb, dst->data->uub, + src_soa[iv], Symmetry, dst->data->illb, dst->data->iuub); + } + } + + size_out = offset + ((!data) ? segment_size * var_count : 0); + if (data) + size_out = offset; + } + dst = dst->next; + src = src->next; + } return size_out; } diff --git a/AMSS_NCKU_source/bssnEM_class.C b/AMSS_NCKU_source/bssnEM_class.C index e06b701..c65ef9d 100644 --- a/AMSS_NCKU_source/bssnEM_class.C +++ b/AMSS_NCKU_source/bssnEM_class.C @@ -258,6 +258,8 @@ void bssnEM_class::Initialize() PhysTime = StartTime; Setup_Black_Hole_position(); } + + setup_transfer_caches(); } //================================================================================================ diff --git a/AMSS_NCKU_source/bssnEScalar_class.C b/AMSS_NCKU_source/bssnEScalar_class.C index c1e71cd..0d346d2 100644 --- a/AMSS_NCKU_source/bssnEScalar_class.C +++ b/AMSS_NCKU_source/bssnEScalar_class.C @@ -23,8 +23,14 @@ using namespace std; #include "rungekutta4_rout.h" #include "sommerfeld_rout.h" #include "getnp4.h" -#include "shellfunctions.h" -#include "parameters.h" +#include "shellfunctions.h" +#include "parameters.h" + +#if BSSN_USE_ESCALAR_C_KERNEL +#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar_c +#else +#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar +#endif #ifdef With_AHF #include "derivatives.h" @@ -74,8 +80,8 @@ bssnEScalar_class::bssnEScalar_class(double Couranti, double StartTimei, double //================================================================================================ -void bssnEScalar_class::Initialize() -{ +void bssnEScalar_class::Initialize() +{ Sphio = new var("Sphio", ngfs++, 1, 1, 1); Spio = new var("Spio", ngfs++, 1, 1, 1); Sphi0 = new var("Sphi0", ngfs++, 1, 1, 1); @@ -132,11 +138,14 @@ void bssnEScalar_class::Initialize() } } - GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor); - if (checkrun) - CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry); - else - GH->compose_cgh(nprocs); + GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor); + ConstraintRefreshLevels = new int[GH->levels]; + for (int il = 0; il < GH->levels; il++) + ConstraintRefreshLevels[il] = 0; + if (checkrun) + CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry); + else + GH->compose_cgh(nprocs); #ifdef WithShell SH = new ShellPatch(0, ngfs, pname, Symmetry, myrank, ErrorMonitor); @@ -160,12 +169,14 @@ void bssnEScalar_class::Initialize() { CheckPoint->read_Black_Hole_position(BH_num_input, BH_num, Porg0, Pmom, Spin, Mass, Porgbr, Porg, Porg1, Porg_rhs); } - else - { - PhysTime = StartTime; - Setup_Black_Hole_position(); - } -} + else + { + PhysTime = StartTime; + Setup_Black_Hole_position(); + } + + setup_transfer_caches(); +} //================================================================================================ @@ -207,10 +218,10 @@ bssnEScalar_class::~bssnEScalar_class() // Read initial data solved by Ansorg, PRD 70, 064011 (2004) -void bssnEScalar_class::Read_Ansorg() -{ - if (!checkrun) - { +void bssnEScalar_class::Read_Ansorg() +{ + if (!checkrun) + { if (myrank == 0) cout << "Read initial data from Ansorg's solver," << " please be sure the input parameters for black holes are puncture parameters!!" @@ -227,9 +238,12 @@ void bssnEScalar_class::Read_Ansorg() cout << "Error inputpar" << endl; exit(0); } - } - int BH_NM; - double *Porg_here; + } + int BH_NM; + double *Porg_here; + double *pmom_local; + double *spin_local; + double *mass_local; // read parameter from file { const int LEN = 256; @@ -269,11 +283,11 @@ void bssnEScalar_class::Read_Ansorg() } inf.close(); } - - Porg_here = new double[3 * BH_NM]; - Pmom = new double[3 * BH_NM]; - Spin = new double[3 * BH_NM]; - Mass = new double[BH_NM]; + + Porg_here = new double[3 * BH_NM]; + pmom_local = new double[3 * BH_NM]; + spin_local = new double[3 * BH_NM]; + mass_local = new double[BH_NM]; // read parameter from file { const int LEN = 256; @@ -305,37 +319,37 @@ void bssnEScalar_class::Read_Ansorg() else if (status == 0) continue; - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); + if (sgrp == "BSSN" && sind < BH_NM) + { + if (skey == "Mass") + mass_local[sind] = atof(sval.c_str()); + else if (skey == "Porgx") + Porg_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Porgy") + Porg_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Porgz") + Porg_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Spinx") + spin_local[sind * 3] = atof(sval.c_str()); + else if (skey == "Spiny") + spin_local[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Spinz") + spin_local[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Pmomx") + pmom_local[sind * 3] = atof(sval.c_str()); + else if (skey == "Pmomy") + pmom_local[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Pmomz") + pmom_local[sind * 3 + 2] = atof(sval.c_str()); + } + } + inf.close(); } - int order = 6; - Ansorg read_ansorg("Ansorg.psid", order); - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { + int order = 6; + Ansorg read_ansorg("Ansorg.psid", order); + // set initial data + for (int lev = 0; lev < GH->levels; lev++) + { MyList *Pp = GH->PatL[lev]; while (Pp) { @@ -358,21 +372,21 @@ void bssnEScalar_class::Read_Ansorg() cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], - Mass, Porg_here, Pmom, Spin, BH_NM); + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], + mass_local, Porg_here, pmom_local, spin_local, BH_NM); } if (BL == Pp->data->ble) break; BL = BL->next; - } - Pp = Pp->next; - } - } -#ifdef WithShell - // ShellPatch part + } + Pp = Pp->next; + } + } +#ifdef WithShell + // ShellPatch part MyList *Pp = SH->PatL; while (Pp) { @@ -400,25 +414,28 @@ void bssnEScalar_class::Read_Ansorg() cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], - Mass, Porg_here, Pmom, Spin, BH_NM); + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], + mass_local, Porg_here, pmom_local, spin_local, BH_NM); } if (BL == Pp->data->ble) break; BL = BL->next; - } - Pp = Pp->next; - } -#endif - - delete[] Porg_here; - // dump read_in initial data - // for(int lev=0;levlevels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT); - } -} + } + Pp = Pp->next; + } +#endif + + delete[] Porg_here; + delete[] pmom_local; + delete[] spin_local; + delete[] mass_local; + // dump read_in initial data + // for(int lev=0;levlevels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT); + } +} //================================================================================================ @@ -432,10 +449,10 @@ void bssnEScalar_class::Read_Ansorg() // Read initial data solved by Pablo's Olliptic Phys.Rev.D 82 024005 (2010) -void bssnEScalar_class::Read_Pablo() -{ - if (!checkrun) - { +void bssnEScalar_class::Read_Pablo() +{ + if (!checkrun) + { if (myrank == 0) cout << "Read initial data from Pablo's solver," << " please be sure the input parameters for black holes are puncture parameters!!" @@ -452,9 +469,12 @@ void bssnEScalar_class::Read_Pablo() cout << "Error inputpar" << endl; exit(0); } - } - int BH_NM; - double *Porg_here; + } + int BH_NM; + double *Porg_here; + double *pmom_local; + double *spin_local; + double *mass_local; // read parameter from file { const int LEN = 256; @@ -494,11 +514,11 @@ void bssnEScalar_class::Read_Pablo() } inf.close(); } - - Porg_here = new double[3 * BH_NM]; - Pmom = new double[3 * BH_NM]; - Spin = new double[3 * BH_NM]; - Mass = new double[BH_NM]; + + Porg_here = new double[3 * BH_NM]; + pmom_local = new double[3 * BH_NM]; + spin_local = new double[3 * BH_NM]; + mass_local = new double[BH_NM]; // read parameter from file { const int LEN = 256; @@ -530,31 +550,31 @@ void bssnEScalar_class::Read_Pablo() else if (status == 0) continue; - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); + if (sgrp == "BSSN" && sind < BH_NM) + { + if (skey == "Mass") + mass_local[sind] = atof(sval.c_str()); + else if (skey == "Porgx") + Porg_here[sind * 3] = atof(sval.c_str()); + else if (skey == "Porgy") + Porg_here[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Porgz") + Porg_here[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Spinx") + spin_local[sind * 3] = atof(sval.c_str()); + else if (skey == "Spiny") + spin_local[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Spinz") + spin_local[sind * 3 + 2] = atof(sval.c_str()); + else if (skey == "Pmomx") + pmom_local[sind * 3] = atof(sval.c_str()); + else if (skey == "Pmomy") + pmom_local[sind * 3 + 1] = atof(sval.c_str()); + else if (skey == "Pmomz") + pmom_local[sind * 3 + 2] = atof(sval.c_str()); + } + } + inf.close(); } bool flag = false; int DIM = dim; @@ -594,11 +614,11 @@ void bssnEScalar_class::Read_Pablo() cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], - Mass, Porg_here, Pmom, Spin, BH_NM); + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], + mass_local, Porg_here, pmom_local, spin_local, BH_NM); } if (BL == Pp->data->ble) break; @@ -658,11 +678,11 @@ void bssnEScalar_class::Read_Pablo() cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], - Mass, Porg_here, Pmom, Spin, BH_NM); + cg->fgfs[Lap0->sgfn], + cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], + cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], + cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn], + mass_local, Porg_here, pmom_local, spin_local, BH_NM); } if (BL == Pp->data->ble) break; @@ -684,10 +704,13 @@ void bssnEScalar_class::Read_Pablo() Pp = Pp->next; } #endif - - delete[] Porg_here; - if (flag && myrank == 0) - MPI_Abort(MPI_COMM_WORLD, 1); + + delete[] Porg_here; + delete[] pmom_local; + delete[] spin_local; + delete[] mass_local; + if (flag && myrank == 0) + MPI_Abort(MPI_COMM_WORLD, 1); // dump read_in initial data for (int lev = 0; lev < GH->levels; lev++) Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT); @@ -739,10 +762,10 @@ void bssnEScalar_class::Step(int lev, int YN) cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); #endif - if (f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], @@ -993,11 +1016,12 @@ void bssnEScalar_class::Step(int lev, int YN) } #endif - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); + Parallel::AsyncSyncState async_pre; + sync_predictor_start(lev, SynchList_pre, async_pre); -#ifdef WithShell - if (lev == 0) - { +#ifdef WithShell + if (lev == 0) + { clock_t prev_clock, curr_clock; if (myrank == 0) curr_clock = clock(); @@ -1009,9 +1033,10 @@ void bssnEScalar_class::Step(int lev, int YN) cout << " Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; - } - } -#endif + } + } +#endif + sync_predictor_finish(lev, async_pre, SynchList_pre); // for black hole position if (BH_num > 0 && lev == GH->levels - 1) @@ -1081,10 +1106,10 @@ void bssnEScalar_class::Step(int lev, int YN) cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); #endif - if (f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], + cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], @@ -1349,11 +1374,12 @@ void bssnEScalar_class::Step(int lev, int YN) } #endif - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + Parallel::AsyncSyncState async_cor; + sync_corrector_start(lev, SynchList_cor, async_cor); -#ifdef WithShell - if (lev == 0) - { +#ifdef WithShell + if (lev == 0) + { clock_t prev_clock, curr_clock; if (myrank == 0) curr_clock = clock(); @@ -1365,9 +1391,10 @@ void bssnEScalar_class::Step(int lev, int YN) cout << " Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; - } - } -#endif + } + } +#endif + sync_corrector_finish(lev, async_cor, SynchList_cor); // for black hole position if (BH_num > 0 && lev == GH->levels - 1) { @@ -1835,11 +1862,14 @@ void bssnEScalar_class::AnalysisStuff_EScalar(int lev, double dT_lev) //================================================================================================ -void bssnEScalar_class::Interp_Constraint() -{ - // we do not support a_lev != 0 yet. - if (a_lev > 0) - return; +void bssnEScalar_class::Interp_Constraint(bool infg) +{ + if (!infg) + return; + + // we do not support a_lev != 0 yet. + if (a_lev > 0) + return; for (int lev = 0; lev < GH->levels; lev++) { @@ -1858,10 +1888,10 @@ void bssnEScalar_class::Interp_Constraint() if (myrank == cg->rank) { if (lev > 0) - f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], @@ -2078,10 +2108,10 @@ void bssnEScalar_class::Constraint_Out() if (myrank == cg->rank) { if (lev > 0) - f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], + BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], + cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], + cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], diff --git a/AMSS_NCKU_source/bssnEScalar_class.h b/AMSS_NCKU_source/bssnEScalar_class.h index 3e26005..cabfdb1 100644 --- a/AMSS_NCKU_source/bssnEScalar_class.h +++ b/AMSS_NCKU_source/bssnEScalar_class.h @@ -51,7 +51,7 @@ public: void Compute_Psi4(int lev); void Step(int lev, int YN); void AnalysisStuff_EScalar(int lev, double dT_lev); - void Interp_Constraint(); + void Interp_Constraint(bool infg); void Constraint_Out(); protected: diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index b9ba856..233a6bf 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -280,10 +280,10 @@ namespace rhs_kernel_timing_report //================================================================================================ -bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, - double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, - int Symmetryi, int checkruni, char *checkfilenamei, - double numepssi, double numepsbi, double numepshi, +bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, + double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, + int Symmetryi, int checkruni, char *checkfilenamei, + double numepssi, double numepsbi, double numepshi, int a_levi, int maxli, int decni, double maxrexi, double drexi) : Courant(Couranti), StartTime(StartTimei), TotalTime(TotalTimei), DumpTime(DumpTimei), d2DumpTime(d2DumpTimei), CheckTime(CheckTimei), AnasTime(AnasTimei), @@ -295,12 +295,34 @@ bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, ConstraintRefreshLevels(0), CheckPoint(0) // CheckPoint(0) -{ - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - // setup Monitors - { +{ + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + // Derived classes override Initialize(), so ownership-sensitive members must + // be in a known state before any specialized setup path runs. + GH = 0; + SH = 0; + PhysTime = 0.0; + BH_num = 0; + BH_num_input = 0; + Porg0 = 0; + Porgbr = 0; + Porg = 0; + Porg1 = 0; + Porg_rhs = 0; + Mass = 0; + Pmom = 0; + Spin = 0; + sync_cache_pre = 0; + sync_cache_cor = 0; + sync_cache_rp_coarse = 0; + sync_cache_rp_fine = 0; + sync_cache_restrict = 0; + sync_cache_outbd = 0; + + // setup Monitors + { stringstream a_stream; a_stream.setf(ios::left); a_stream << "# Error log information"; @@ -986,13 +1008,7 @@ void bssn_class::Initialize() Setup_Black_Hole_position(); } - // Initialize sync caches (per-level, for predictor and corrector) - sync_cache_pre = new Parallel::SyncCache[GH->levels]; - sync_cache_cor = new Parallel::SyncCache[GH->levels]; - sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels]; - sync_cache_rp_fine = new Parallel::SyncCache[GH->levels]; - sync_cache_restrict = new Parallel::SyncCache[GH->levels]; - sync_cache_outbd = new Parallel::SyncCache[GH->levels]; + setup_transfer_caches(); } //================================================================================================ @@ -1005,10 +1021,10 @@ void bssn_class::Initialize() //================================================================================================ -bssn_class::~bssn_class() -{ -#ifdef With_AHF - AHList->clearList(); +bssn_class::~bssn_class() +{ +#ifdef With_AHF + AHList->clearList(); AHDList->clearList(); GaugeList->clearList(); if (lastahdumpid) @@ -1213,13 +1229,13 @@ bssn_class::~bssn_class() delete Cons_Ham; delete Cons_Px; - delete Cons_Py; - delete Cons_Pz; - delete Cons_Gx; - delete Cons_Gy; - delete Cons_Gz; - -#ifdef Point_Psi4 + delete Cons_Py; + delete Cons_Pz; + delete Cons_Gx; + delete Cons_Gy; + delete Cons_Gz; + +#ifdef Point_Psi4 delete phix; delete phiy; delete phiz; @@ -1245,40 +1261,17 @@ bssn_class::~bssn_class() delete Azzy; delete Azzz; #endif - - // Destroy sync caches before GH - if (sync_cache_pre) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_pre[i].destroy(); - delete[] sync_cache_pre; - } - if (sync_cache_cor) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_cor[i].destroy(); - delete[] sync_cache_cor; - } - if (sync_cache_rp_coarse) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_rp_coarse[i].destroy(); - delete[] sync_cache_rp_coarse; - } - if (sync_cache_rp_fine) - { - for (int i = 0; i < GH->levels; i++) - sync_cache_rp_fine[i].destroy(); - delete[] sync_cache_rp_fine; - } - - delete GH; -#ifdef WithShell - delete SH; -#endif - - for (int i = 0; i < BH_num; i++) - { + + // Destroy sync caches before GH + destroy_transfer_caches(); + + delete GH; +#ifdef WithShell + delete SH; +#endif + + for (int i = 0; i < BH_num; i++) + { delete[] Porg0[i]; delete[] Porgbr[i]; delete[] Porg[i]; @@ -1291,21 +1284,21 @@ bssn_class::~bssn_class() delete[] Porg; delete[] Porg1; delete[] Porg_rhs; - - delete[] Mass; - delete[] Spin; - delete[] Pmom; - - delete ErrorMonitor; - delete Psi4Monitor; + + delete[] Mass; + delete[] Spin; + delete[] Pmom; + + delete ErrorMonitor; + delete Psi4Monitor; delete BHMonitor; delete MAPMonitor; delete ConVMonitor; delete TimingMonitor; delete Waveshell; - - delete CheckPoint; -} + + delete CheckPoint; +} //================================================================================================ @@ -2489,9 +2482,7 @@ void bssn_class::Evolve(int Steps) GH->Regrid(Symmetry, BH_num, Porgbr, Porg0, SynchList_cor, OldStateList, StateList, SynchList_pre, fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor); -#if (ABEtype != 1 && ABEtype != 2) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif + invalidate_transfer_caches(); STEP_TIMER_ADD(TB_REGRID, timer_regrid); #endif @@ -2732,9 +2723,7 @@ void bssn_class::RecursiveStep(int lev) { if (ConstraintRefreshLevels) ConstraintRefreshLevels[lev] = 1; -#if (ABEtype != 1 && ABEtype != 2) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif + invalidate_transfer_caches(); } STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel); #endif @@ -2912,13 +2901,11 @@ void bssn_class::ParallelStep() delete[] tporg; delete[] tporgo; #if (REGLEV == 0) - if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) -#if (ABEtype != 1 && ABEtype != 2) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } + if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) + invalidate_transfer_caches(); #endif -#endif } //================================================================================================ @@ -3081,12 +3068,10 @@ void bssn_class::ParallelStep() if (lev + 1 >= GH->movls) { // GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0, - if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor)) -#if (ABEtype != 1 && ABEtype != 2) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif + if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor)) + invalidate_transfer_caches(); // a_stream.clear(); // a_stream.str(""); @@ -3098,12 +3083,10 @@ void bssn_class::ParallelStep() // for this level if (YN == 1) { - if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) -#if (ABEtype != 1 && ABEtype != 2) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif + if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) + invalidate_transfer_caches(); // a_stream.clear(); // a_stream.str(""); @@ -3119,12 +3102,10 @@ void bssn_class::ParallelStep() if (YN == 1) { // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, - if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) -#if (ABEtype != 1 && ABEtype != 2) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif + if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) + invalidate_transfer_caches(); // a_stream.clear(); // a_stream.str(""); @@ -3137,12 +3118,10 @@ void bssn_class::ParallelStep() if (i % 4 == 3) { // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, - if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) -#if (ABEtype != 1 && ABEtype != 2) - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } -#endif + if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, + SynchList_cor, OldStateList, StateList, SynchList_pre, + fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) + invalidate_transfer_caches(); // a_stream.clear(); // a_stream.str(""); @@ -3673,11 +3652,7 @@ void bssn_class::Step(int lev, int YN) STEP_TIMER_DECL(timer_predictor_sync); Parallel::AsyncSyncState async_pre; -#if (ABEtype == 1) - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); -#else - Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre); -#endif + sync_predictor_start(lev, SynchList_pre, async_pre); #ifdef WithShell if (lev == 0) @@ -3696,9 +3671,7 @@ void bssn_class::Step(int lev, int YN) } } #endif -#if (ABEtype != 1) - Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry); -#endif + sync_predictor_finish(lev, async_pre, SynchList_pre); #ifdef WithShell // Complete non-blocking error reduction and check @@ -4044,11 +4017,7 @@ void bssn_class::Step(int lev, int YN) STEP_TIMER_DECL(timer_corrector_sync); Parallel::AsyncSyncState async_cor; -#if (ABEtype == 1) - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); -#else - Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); -#endif + sync_corrector_start(lev, SynchList_cor, async_cor); #ifdef WithShell if (lev == 0) @@ -4067,9 +4036,7 @@ void bssn_class::Step(int lev, int YN) } } #endif -#if (ABEtype != 1) - Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); -#endif + sync_corrector_finish(lev, async_cor, SynchList_cor); #ifdef WithShell // Complete non-blocking error reduction and check @@ -4554,15 +4521,11 @@ void bssn_class::Step(int lev, int YN) { int erh = ERROR; MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req); - } -#endif - - Parallel::AsyncSyncState async_pre; -#if (ABEtype == 1) - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); -#else - Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre); + } #endif + + Parallel::AsyncSyncState async_pre; + sync_predictor_start(lev, SynchList_pre, async_pre); #ifdef WithShell if (lev == 0) @@ -4581,9 +4544,7 @@ void bssn_class::Step(int lev, int YN) } } #endif -#if (ABEtype != 1) - Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry); -#endif + sync_predictor_finish(lev, async_pre, SynchList_pre); #ifdef WithShell // Complete non-blocking error reduction and check @@ -4911,12 +4872,8 @@ void bssn_class::Step(int lev, int YN) } #endif - Parallel::AsyncSyncState async_cor; -#if (ABEtype == 1) - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); -#else - Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); -#endif + Parallel::AsyncSyncState async_cor; + sync_corrector_start(lev, SynchList_cor, async_cor); #ifdef WithShell if (lev == 0) @@ -4935,9 +4892,7 @@ void bssn_class::Step(int lev, int YN) } } #endif -#if (ABEtype != 1) - Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); -#endif + sync_corrector_finish(lev, async_cor, SynchList_cor); #ifdef WithShell // Complete non-blocking error reduction and check @@ -5329,11 +5284,7 @@ void bssn_class::Step(int lev, int YN) // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync"); -#if (ABEtype == 1) - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); -#else - Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]); -#endif + sync_evolution(lev, SynchList_pre, sync_cache_pre); // Complete non-blocking error reduction and check MPI_Wait(&err_req, MPI_STATUS_IGNORE); @@ -5534,11 +5485,7 @@ void bssn_class::Step(int lev, int YN) // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync"); -#if (ABEtype == 1) - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); -#else - Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]); -#endif + sync_evolution(lev, SynchList_cor, sync_cache_cor); // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync"); @@ -6255,15 +6202,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, #endif #if (RPB == 0) -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry); -#else - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]); -#endif + restrict_evolution(lev, SL, SynchList_pre); #elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); -#endif + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); +#endif #if (PSTR == 1 || PSTR == 2) // a_stream.clear(); @@ -6272,11 +6215,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, // misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); #endif -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry); -#else - Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); -#endif + sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse); #if (PSTR == 1 || PSTR == 2) // a_stream.clear(); @@ -6287,21 +6226,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, #if (RPB == 0) #if (MIXOUTB == 0) -#if (ABEtype == 1 || ABEtype == 2) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#else - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]); -#endif + outbdlow2hi_evolution(lev, SynchList_pre, SL); #elif (MIXOUTB == 1) Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); #endif @@ -6328,15 +6253,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, #endif #if (RPB == 0) -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); -#else - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]); -#endif + restrict_evolution(lev, SL, SL); #elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); -#endif + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); +#endif #if (PSTR == 1 || PSTR == 2) // a_stream.clear(); @@ -6345,11 +6266,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, // misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); #endif -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry); -#else - Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); -#endif + sync_evolution(lev - 1, SL, sync_cache_rp_coarse); #if (PSTR == 1 || PSTR == 2) // a_stream.clear(); @@ -6360,21 +6277,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, #if (RPB == 0) #if (MIXOUTB == 0) -#if (ABEtype == 1 || ABEtype == 2) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry); - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#else - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]); -#endif + outbdlow2hi_evolution(lev, SL, SL); #elif (MIXOUTB == 1) Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); #endif @@ -6391,7 +6294,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, #endif } - Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); + sync_evolution(lev, SL, sync_cache_rp_fine); #if (PSTR == 1 || PSTR == 2) // a_stream.clear(); @@ -6530,17 +6433,17 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, } #if (RPB == 0) - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]); + restrict_evolution(lev, SL, SynchList_pre); #elif (RPB == 1) // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); #endif - Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); + sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse); #if (RPB == 0) #if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]); + outbdlow2hi_evolution(lev, SynchList_pre, SL); #elif (MIXOUTB == 1) Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); #endif @@ -6552,17 +6455,17 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, else // no time refinement levels and for all same time levels { #if (RPB == 0) - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]); + restrict_evolution(lev, SL, SL); #elif (RPB == 1) // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); #endif - Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); + sync_evolution(lev - 1, SL, sync_cache_rp_coarse); #if (RPB == 0) #if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]); + outbdlow2hi_evolution(lev, SL, SL); #elif (MIXOUTB == 1) Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); #endif @@ -6575,7 +6478,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, #if (ABEtype == 1 || ABEtype == 2) Parallel::Sync(GH->PatL[lev], SL, Symmetry); #else - Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); + sync_evolution(lev, SL, sync_cache_rp_fine); #endif } STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong); @@ -6708,39 +6611,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB) } #if (RPB == 0) -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry); -#else - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]); -#endif + restrict_evolution(lev, SynchList_cor, SynchList_pre); #elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); -#endif - -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry); -#else - Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); #endif + + sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse); #if (RPB == 0) #if (MIXOUTB == 0) -#if (ABEtype == 1 || ABEtype == 2) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#else - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); -#endif + outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor); #elif (MIXOUTB == 1) Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); #endif @@ -6754,39 +6635,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB) if (myrank == 0) cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; #if (RPB == 0) -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); -#else - Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]); -#endif + restrict_evolution(lev, SynchList_cor, StateList); #elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); -#endif - -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry); -#else - Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); + // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); + Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); #endif + + sync_evolution(lev - 1, StateList, sync_cache_rp_coarse); #if (RPB == 0) #if (MIXOUTB == 0) -#if (ABEtype == 1 || ABEtype == 2) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#else - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); -#endif + outbdlow2hi_evolution(lev, StateList, SynchList_cor); #elif (MIXOUTB == 1) Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); #endif @@ -6796,11 +6655,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB) #endif } -#if (ABEtype == 1 || ABEtype == 2) - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); -#else - Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); -#endif + sync_evolution(lev, SynchList_cor, sync_cache_rp_fine); } STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong); } @@ -6830,12 +6685,12 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB) Pp = Pp->next; } -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); -#endif +#if (RPB == 0) +#if (MIXOUTB == 0) + outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); +#endif #elif (RPB == 1) // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); @@ -6843,12 +6698,12 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB) } else // no time refinement levels and for all same time levels { -#if (RPB == 0) -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); -#endif +#if (RPB == 0) +#if (MIXOUTB == 0) + outbdlow2hi_evolution(lev, StateList, SynchList_cor); +#elif (MIXOUTB == 1) + Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); +#endif #elif (RPB == 1) // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); @@ -6864,12 +6719,12 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB) #else Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); #endif - Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); - } - - Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); - } -} + sync_evolution(lev - 1, StateList, sync_cache_rp_coarse); + } + + sync_evolution(lev, SynchList_cor, sync_cache_rp_fine); + } +} #undef MIXOUTB //================================================================================================ @@ -7597,6 +7452,169 @@ void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, va } } } + +bool bssn_class::use_transfer_cache() const +{ +#if BSSN_USE_TRANSFER_CACHE + return true; +#else + return false; +#endif +} + +void bssn_class::setup_transfer_caches() +{ + sync_cache_pre = 0; + sync_cache_cor = 0; + sync_cache_rp_coarse = 0; + sync_cache_rp_fine = 0; + sync_cache_restrict = 0; + sync_cache_outbd = 0; + + if (!use_transfer_cache() || !GH) + return; + + sync_cache_pre = new Parallel::SyncCache[GH->levels]; + sync_cache_cor = new Parallel::SyncCache[GH->levels]; + sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels]; + sync_cache_rp_fine = new Parallel::SyncCache[GH->levels]; + sync_cache_restrict = new Parallel::SyncCache[GH->levels]; + sync_cache_outbd = new Parallel::SyncCache[GH->levels]; +} + +void bssn_class::invalidate_transfer_caches() +{ + if (!use_transfer_cache() || !GH || !sync_cache_pre || !sync_cache_cor || + !sync_cache_rp_coarse || !sync_cache_rp_fine || !sync_cache_restrict || !sync_cache_outbd) + return; + + for (int il = 0; il < GH->levels; il++) + { + sync_cache_pre[il].invalidate(); + sync_cache_cor[il].invalidate(); + sync_cache_rp_coarse[il].invalidate(); + sync_cache_rp_fine[il].invalidate(); + sync_cache_restrict[il].invalidate(); + sync_cache_outbd[il].invalidate(); + } +} + +void bssn_class::destroy_transfer_caches() +{ + if (sync_cache_pre) + { + if (use_transfer_cache() && GH) + for (int i = 0; i < GH->levels; i++) + sync_cache_pre[i].destroy(); + delete[] sync_cache_pre; + sync_cache_pre = 0; + } + if (sync_cache_cor) + { + if (use_transfer_cache() && GH) + for (int i = 0; i < GH->levels; i++) + sync_cache_cor[i].destroy(); + delete[] sync_cache_cor; + sync_cache_cor = 0; + } + if (sync_cache_rp_coarse) + { + if (use_transfer_cache() && GH) + for (int i = 0; i < GH->levels; i++) + sync_cache_rp_coarse[i].destroy(); + delete[] sync_cache_rp_coarse; + sync_cache_rp_coarse = 0; + } + if (sync_cache_rp_fine) + { + if (use_transfer_cache() && GH) + for (int i = 0; i < GH->levels; i++) + sync_cache_rp_fine[i].destroy(); + delete[] sync_cache_rp_fine; + sync_cache_rp_fine = 0; + } + if (sync_cache_restrict) + { + if (use_transfer_cache() && GH) + for (int i = 0; i < GH->levels; i++) + sync_cache_restrict[i].destroy(); + delete[] sync_cache_restrict; + sync_cache_restrict = 0; + } + if (sync_cache_outbd) + { + if (use_transfer_cache() && GH) + for (int i = 0; i < GH->levels; i++) + sync_cache_outbd[i].destroy(); + delete[] sync_cache_outbd; + sync_cache_outbd = 0; + } +} + +void bssn_class::sync_predictor_start(int lev, MyList *VarList, Parallel::AsyncSyncState &async_state) +{ + if (use_transfer_cache()) + Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_pre[lev], async_state); + else + Parallel::Sync(GH->PatL[lev], VarList, Symmetry); +} + +void bssn_class::sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList *VarList) +{ + if (use_transfer_cache()) + Parallel::Sync_finish(sync_cache_pre[lev], async_state, VarList, Symmetry); +} + +void bssn_class::sync_corrector_start(int lev, MyList *VarList, Parallel::AsyncSyncState &async_state) +{ + if (use_transfer_cache()) + Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_cor[lev], async_state); + else + Parallel::Sync(GH->PatL[lev], VarList, Symmetry); +} + +void bssn_class::sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList *VarList) +{ + if (use_transfer_cache()) + Parallel::Sync_finish(sync_cache_cor[lev], async_state, VarList, Symmetry); +} + +void bssn_class::sync_evolution(int lev, MyList *VarList, Parallel::SyncCache *cache_array) +{ + if (use_transfer_cache() && cache_array) + Parallel::Sync_cached(GH->PatL[lev], VarList, Symmetry, cache_array[lev]); + else + Parallel::Sync(GH->PatL[lev], VarList, Symmetry); +} + +void bssn_class::restrict_evolution(int lev, MyList *src_var_list, MyList *dst_var_list) +{ + if (use_transfer_cache()) + Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_restrict[lev]); + else + Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry); +} + +void bssn_class::outbdlow2hi_evolution(int lev, MyList *src_var_list, MyList *dst_var_list) +{ + if (use_transfer_cache()) + { + Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_outbd[lev]); + return; + } + + MyList *Ppc = GH->PatL[lev - 1]; + while (Ppc) + { + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + Parallel::OutBdLow2Hi(Ppc->data, Pp->data, src_var_list, dst_var_list, Symmetry); + Pp = Pp->next; + } + Ppc = Ppc->next; + } +} #endif //================================================================================================ @@ -8265,7 +8283,7 @@ void bssn_class::AH_Prepare_derivatives() bool bssn_class::AH_Interp_Points(MyList *VarList, int NN, double **XX, - double *Shellf, int Symmetryi) + double *Shellf, int Symmetryi) { MyList *varl; int num_var = 0; diff --git a/AMSS_NCKU_source/bssn_class.h b/AMSS_NCKU_source/bssn_class.h index a2536cb..eb78a16 100644 --- a/AMSS_NCKU_source/bssn_class.h +++ b/AMSS_NCKU_source/bssn_class.h @@ -31,11 +31,19 @@ using namespace std; #include "surface_integral.h" #include "checkpoint.h" -extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN); - -class bssn_class -{ -public: +extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN); + +#ifndef BSSN_USE_TRANSFER_CACHE +#define BSSN_USE_TRANSFER_CACHE 1 +#endif + +#ifndef BSSN_USE_ESCALAR_C_KERNEL +#define BSSN_USE_ESCALAR_C_KERNEL 1 +#endif + +class bssn_class +{ +public: int ngfs; int nprocs, myrank; cgh *GH; @@ -167,14 +175,25 @@ public: void Setup_KerrSchild(); void Enforce_algcon(int lev, int fg); - void testRestrict(); - void testOutBd(); - - bool check_Stdin_Abort(); - - virtual void Setup_Initial_Data_Cao(); - virtual void Setup_Initial_Data_Lousto(); - virtual void Initialize(); + void testRestrict(); + void testOutBd(); + + bool check_Stdin_Abort(); + bool use_transfer_cache() const; + void setup_transfer_caches(); + void invalidate_transfer_caches(); + void destroy_transfer_caches(); + void sync_predictor_start(int lev, MyList *VarList, Parallel::AsyncSyncState &async_state); + void sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList *VarList); + void sync_corrector_start(int lev, MyList *VarList, Parallel::AsyncSyncState &async_state); + void sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList *VarList); + void sync_evolution(int lev, MyList *VarList, Parallel::SyncCache *cache_array = 0); + void restrict_evolution(int lev, MyList *src_var_list, MyList *dst_var_list); + void outbdlow2hi_evolution(int lev, MyList *src_var_list, MyList *dst_var_list); + + virtual void Setup_Initial_Data_Cao(); + virtual void Setup_Initial_Data_Lousto(); + virtual void Initialize(); virtual void Read_Ansorg(); virtual void Read_Pablo() {}; virtual void Compute_Psi4(int lev); diff --git a/AMSS_NCKU_source/bssn_escalar_rhs_c.C b/AMSS_NCKU_source/bssn_escalar_rhs_c.C new file mode 100644 index 0000000..6785af1 --- /dev/null +++ b/AMSS_NCKU_source/bssn_escalar_rhs_c.C @@ -0,0 +1,169 @@ +#include "macrodef.h" +#include "bssn_rhs.h" +#include "share_func.h" +#include "tool.h" +#include + +namespace +{ + // Reuse the temporary workspace across block calls to avoid repeated heap churn + // in the EScalar wrapper. MPI ranks execute this path sequentially, so a single + // process-local buffer is sufficient here. + std::vector g_escalar_tmp_store; +} + +#ifdef fortran1 +#define f_frpotential frpotential +#endif +#ifdef fortran2 +#define f_frpotential FRPOTENTIAL +#endif +#ifdef fortran3 +#define f_frpotential frpotential_ +#endif + +extern "C" +{ + void f_frpotential(int *, double *, double *, double *); +} + +int f_compute_rhs_bssn_escalar_c(int *ex, double &T, + double *X, double *Y, double *Z, + double *chi, double *trK, + double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, + double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, + double *Gamx, double *Gamy, double *Gamz, + double *Lap, double *betax, double *betay, double *betaz, + double *dtSfx, double *dtSfy, double *dtSfz, + double *Sphi, double *Spi, + double *chi_rhs, double *trK_rhs, + double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, + double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, + double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, + double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, + double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, + double *Sphi_rhs, double *Spi_rhs, + double *rho, double *Sx, double *Sy, double *Sz, + double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, + double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, + double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, + double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, + double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, + double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, + double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, + int &Symmetry, int &Lev, double &eps, int &co) +{ + const int nx = ex[0], ny = ex[1], nz = ex[2]; + const int all = nx * ny * nz; + + const size_t workspace_size = size_t(all) * 17; + if (g_escalar_tmp_store.size() < workspace_size) + g_escalar_tmp_store.resize(workspace_size); + + double *tmp_ptr = g_escalar_tmp_store.data(); + auto alloc_tmp = [&](int n = 1) -> double * + { + double *ptr = tmp_ptr; + tmp_ptr += size_t(all) * n; + return ptr; + }; + + double *chix = alloc_tmp(), *chiy = alloc_tmp(), *chiz = alloc_tmp(); + double *Kx = alloc_tmp(), *Ky = alloc_tmp(), *Kz = alloc_tmp(); + double *fxx = alloc_tmp(), *fxy = alloc_tmp(), *fxz = alloc_tmp(); + double *fyy = alloc_tmp(), *fyz = alloc_tmp(), *fzz = alloc_tmp(); + double *Lapx = alloc_tmp(), *Lapy = alloc_tmp(), *Lapz = alloc_tmp(); + double *V = alloc_tmp(), *dVdSphi = alloc_tmp(); + + const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, HALF = 0.5; + const double SSS[3] = {1.0, 1.0, 1.0}; + + fderivs(ex, chi, chix, chiy, chiz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev); + fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev); + fderivs(ex, Sphi, Kx, Ky, Kz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev); + fdderivs(ex, Sphi, fxx, fxy, fxz, fyy, fyz, fzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev); + + f_frpotential(ex, Sphi, V, dVdSphi); + + for (int i = 0; i < all; ++i) + { + const double alpn1 = Lap[i] + ONE; + const double chin1 = chi[i] + ONE; + const double gxx = dxx[i] + ONE; + const double gyy = dyy[i] + ONE; + const double gzz = dzz[i] + ONE; + const double det = gxx * gyy * gzz + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i] + - gxz[i] * gyy * gxz[i] - gxy[i] * gxy[i] * gzz - gxx * gyz[i] * gyz[i]; + const double gupxx = (gyy * gzz - gyz[i] * gyz[i]) / det; + const double gupxy = -(gxy[i] * gzz - gyz[i] * gxz[i]) / det; + const double gupxz = (gxy[i] * gyz[i] - gyy * gxz[i]) / det; + const double gupyy = (gxx * gzz - gxz[i] * gxz[i]) / det; + const double gupyz = -(gxx * gyz[i] - gxy[i] * gxz[i]) / det; + const double gupzz = (gxx * gyy - gxy[i] * gxy[i]) / det; + + Sphi_rhs[i] = alpn1 * Spi[i]; + + Spi_rhs[i] = gupxx * fxx[i] + gupyy * fyy[i] + gupzz * fzz[i] + + TWO * (gupxy * fxy[i] + gupxz * fxz[i] + gupyz * fyz[i]) + - ((Gamx[i] + (gupxx * chix[i] + gupxy * chiy[i] + gupxz * chiz[i]) / TWO / chin1) * Kx[i] + + (Gamy[i] + (gupxy * chix[i] + gupyy * chiy[i] + gupyz * chiz[i]) / TWO / chin1) * Ky[i] + + (Gamz[i] + (gupxz * chix[i] + gupyz * chiy[i] + gupzz * chiz[i]) / TWO / chin1) * Kz[i]); + + Spi_rhs[i] = Spi_rhs[i] * alpn1 + + gupxx * Lapx[i] * Kx[i] + gupxy * Lapx[i] * Ky[i] + gupxz * Lapx[i] * Kz[i] + + gupxy * Lapy[i] * Kx[i] + gupyy * Lapy[i] * Ky[i] + gupyz * Lapy[i] * Kz[i] + + gupxz * Lapz[i] * Kx[i] + gupyz * Lapz[i] * Ky[i] + gupzz * Lapz[i] * Kz[i]; + + Spi_rhs[i] = Spi_rhs[i] * chin1 + alpn1 * (trK[i] * Spi[i] - dVdSphi[i]); + + rho[i] = chin1 * ((gupxx * Kx[i] * Kx[i] + gupyy * Ky[i] * Ky[i] + gupzz * Kz[i] * Kz[i]) * HALF + + gupxy * Kx[i] * Ky[i] + gupxz * Kx[i] * Kz[i] + gupyz * Ky[i] * Kz[i]) + + Spi[i] * Spi[i] * HALF + V[i]; + Sx[i] = -Spi[i] * Kx[i]; + Sy[i] = -Spi[i] * Ky[i]; + Sz[i] = -Spi[i] * Kz[i]; + + const double pressure = (rho[i] - Spi[i] * Spi[i]) / chin1; + Sxx[i] = Kx[i] * Kx[i] - pressure * gxx; + Sxy[i] = Kx[i] * Ky[i] - pressure * gxy[i]; + Sxz[i] = Kx[i] * Kz[i] - pressure * gxz[i]; + Syy[i] = Ky[i] * Ky[i] - pressure * gyy; + Syz[i] = Ky[i] * Kz[i] - pressure * gyz[i]; + Szz[i] = Kz[i] * Kz[i] - pressure * gzz; + } + + if (f_compute_rhs_bssn(ex, T, X, Y, Z, + chi, trK, + dxx, gxy, gxz, dyy, gyz, dzz, + Axx, Axy, Axz, Ayy, Ayz, Azz, + Gamx, Gamy, Gamz, + Lap, betax, betay, betaz, + dtSfx, dtSfy, dtSfz, + chi_rhs, trK_rhs, + gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, + Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, + Gamx_rhs, Gamy_rhs, Gamz_rhs, + Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, + dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, + rho, Sx, Sy, Sz, + Sxx, Sxy, Sxz, Syy, Syz, Szz, + Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz, + Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz, + Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz, + Rxx, Rxy, Rxz, Ryy, Ryz, Rzz, + ham_Res, movx_Res, movy_Res, movz_Res, + Gmx_Res, Gmy_Res, Gmz_Res, + Symmetry, Lev, eps, co)) + return 1; + + lopsided_kodis(ex, X, Y, Z, Sphi, Sphi_rhs, betax, betay, betaz, Symmetry, SSS, eps); + lopsided_kodis(ex, X, Y, Z, Spi, Spi_rhs, betax, betay, betaz, Symmetry, SSS, eps); + + for (int i = 0; i < all; ++i) + { + if (Sphi_rhs[i] != Sphi_rhs[i] || Spi_rhs[i] != Spi_rhs[i] || rho[i] != rho[i]) + return 1; + } + + return 0; +} diff --git a/AMSS_NCKU_source/bssn_rhs.h b/AMSS_NCKU_source/bssn_rhs.h index 96a545a..4384118 100644 --- a/AMSS_NCKU_source/bssn_rhs.h +++ b/AMSS_NCKU_source/bssn_rhs.h @@ -63,13 +63,34 @@ extern "C" double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Ricci - double *, double *, double *, double *, double *, double *, double *, // constraint violation - int &, int &, double &, int &); -} - -extern "C" -{ - int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); +} + +int f_compute_rhs_bssn_escalar_c(int *, double &, double *, double *, double *, // ex,T,X,Y,Z + double *, double *, // chi, trK + double *, double *, double *, double *, double *, double *, // gij + double *, double *, double *, double *, double *, double *, // Aij + double *, double *, double *, // Gam + double *, double *, double *, double *, double *, double *, double *, // Gauge + double *, double *, // Sphi, Spi + double *, double *, // chi, trK + double *, double *, double *, double *, double *, double *, // gij + double *, double *, double *, double *, double *, double *, // Aij + double *, double *, double *, // Gam + double *, double *, double *, double *, double *, double *, double *, // Gauge + double *, double *, // Sphi, Spi + double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Ricci + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); + +extern "C" +{ + int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R double *, double *, double *, // X,Y,Z double *, double *, double *, // drhodx,drhody,drhodz double *, double *, double *, // dsigmadx,dsigmady,dsigmadz @@ -96,10 +117,10 @@ extern "C" int &, int &, double &, int &, int &); } -extern "C" -{ - int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *, // ex,T,X,Y,Z - double *, double *, // chi, trK +extern "C" +{ + int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *, // ex,T,X,Y,Z + double *, double *, // chi, trK double *, double *, double *, double *, double *, double *, // gij double *, double *, double *, double *, double *, double *, // Aij double *, double *, double *, // Gam @@ -116,14 +137,14 @@ extern "C" double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Ricci - double *, double *, double *, double *, double *, double *, double *, // constraint violation - int &, int &, double &, int &); -} - -extern "C" -{ - int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R - double *, double *, double *, // X,Y,Z + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); +} + +extern "C" +{ + int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R + double *, double *, double *, // X,Y,Z double *, double *, double *, // drhodx,drhody,drhodz double *, double *, double *, // dsigmadx,dsigmady,dsigmadz double *, double *, double *, // dRdx,dRdy,dRdz diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index cbafb76..30dba59 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -2,11 +2,43 @@ include makefile.inc +-include AMSS_NCKU_build.mk + +ABE_TYPE ?= $(shell awk '/^[[:space:]]*\#define[[:space:]]+ABEtype/ {print $$3; exit}' macrodef.h 2>/dev/null) + +ifeq ($(USE_TRANSFER_CACHE),auto) +ifeq ($(ABE_TYPE),0) +EFFECTIVE_USE_TRANSFER_CACHE = 1 +else +EFFECTIVE_USE_TRANSFER_CACHE = 0 +endif +else +EFFECTIVE_USE_TRANSFER_CACHE = $(USE_TRANSFER_CACHE) +endif + +ifeq ($(USE_CXX_ESCALAR_KERNEL),1) +ifeq ($(ABE_TYPE),1) +EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 1 +else +EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0 +endif +else +EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0 +endif + +ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1) +ifeq ($(USE_CXX_KERNELS),0) +$(error USE_CXX_ESCALAR_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_escalar_rhs_c.C reuses the C BSSN kernel) +endif +endif + ## polint(ordn=6) kernel selector: ## 1 (default): barycentric fast path ## 0 : fallback to Neville path POLINT6_USE_BARY ?= 1 POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY) +TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE) +ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL) ## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt) ## make -> opt (PGO-guided, maximum performance) @@ -16,7 +48,8 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata ifeq ($(PGO_MODE),instrument) ## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \ - -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) + -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \ + $(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) else @@ -26,7 +59,8 @@ else CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) + -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \ + $(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) endif @@ -46,11 +80,11 @@ endif $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) # C rewrite of BSSN RHS kernel and helpers -bssn_rhs_c.o: bssn_rhs_c.C - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ - -fderivs_c.o: fderivs_c.C - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ +bssn_rhs_c.o: bssn_rhs_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ + +fderivs_c.o: fderivs_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ fdderivs_c.o: fdderivs_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ @@ -89,8 +123,11 @@ ifeq ($(USE_CXX_KERNELS),0) # Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below CFILES = else -# C++ mode (default): C rewrite of bssn_rhs and helper kernels +# C++ mode (default): C rewrite of bssn/bssn-escalar rhs and helper kernels CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o +ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1) +CFILES += bssn_escalar_rhs_c.o +endif endif ifeq ($(USE_CXX_Z4C_KERNELS),1) diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index 40a5c39..224e893 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -53,6 +53,18 @@ USE_CXX_KERNELS ?= 1 ## 0 : use original Fortran Z4c_rhs.o USE_CXX_Z4C_KERNELS ?= 1 +## BSSN-EScalar RHS switch +## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path +## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs +## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel. +USE_CXX_ESCALAR_KERNEL ?= 1 + +## Cached transfer switch +## auto (default): enable for BSSN vacuum, keep other paths on the safe uncached path +## 1 : force cached Sync/Restrict/OutBd transfer on evolution hot paths +## 0 : force the original uncached transfer path +USE_TRANSFER_CACHE ?= auto + ## RK4 kernel implementation switch ## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments) ## 0 : use original Fortran rungekutta4_rout.o diff --git a/BSSN_BUILD_CONFIG_MIGRATION.md b/BSSN_BUILD_CONFIG_MIGRATION.md new file mode 100644 index 0000000..46a7ffb --- /dev/null +++ b/BSSN_BUILD_CONFIG_MIGRATION.md @@ -0,0 +1,211 @@ +# BSSN Build Config Migration + +This note records the build-configuration fix needed when replacing +`AMSS_NCKU_Input.py` or `generate_macrodef.py` with a newer upstream version. + +## Problem + +`AMSS_NCKU_source/macrodef.h` is not the authoritative file used by normal +runs. `AMSS_NCKU_Program.py` first generates macro files under +`input_data.File_directory`, copies `AMSS_NCKU_source` to +`/AMSS_NCKU_source_copy`, then copies the generated macro files +into that copied source tree and compiles there. + +Therefore, makefile logic must not depend only on the stale +`AMSS_NCKU_source/macrodef.h`. The actual equation path must be passed to the +copied build tree from the same generation step that creates `macrodef.h`. + +The performance regression was caused by compiling/linking the +`BSSN-EScalar` C wrapper into BSSN vacuum builds. For BSSN vacuum (`ABEtype=0`), +the build must use: + +```make +BSSN_USE_TRANSFER_CACHE=1 +BSSN_USE_ESCALAR_C_KERNEL=0 +``` + +and must not link `bssn_escalar_rhs_c.o`. + +## Required Migration Steps + +### 1. Add an ABE type helper in `generate_macrodef.py` + +Add a helper that maps `input_data.Equation_Class` to the numeric `ABEtype`. +Use the same mapping as `macrodef.h`: + +```python +def get_abe_type(): + if ( input_data.Equation_Class == "BSSN" ): + return 0 + elif ( input_data.Equation_Class == "BSSN-EScalar" ): + return 1 + elif ( input_data.Equation_Class == "BSSN-EM" ): + return 3 + elif ( input_data.Equation_Class == "Z4C" ): + return 2 + else: + raise ValueError("Equation_Class setting error!!!") +``` + +Update `generate_macrodef_h()` to print `#define ABEtype {get_abe_type()}` +instead of duplicating the if/elif mapping. + +### 2. Generate a makefile fragment + +In `generate_macrodef.py`, add: + +```python +def generate_build_config(): + file1 = open(os.path.join(input_data.File_directory, "AMSS_NCKU_build.mk"), "w") + print("# Generated by generate_macrodef.py; do not edit manually.", file=file1) + print(f"ABE_TYPE := {get_abe_type()}", file=file1) + file1.close() +``` + +This file is the build-time authority for the equation path. + +### 3. Call and copy the generated build config + +In `AMSS_NCKU_Program.py`, after generating `macrodef.h` and `macrodef.fh`, call: + +```python +generate_macrodef.generate_build_config() +print(" AMSS-NCKU build config AMSS_NCKU_build.mk has been generated. ") +``` + +When copying generated files into `AMSS_NCKU_source_copy`, also copy: + +```python +build_config_path = os.path.join(File_directory, "AMSS_NCKU_build.mk") +shutil.copy2(build_config_path, AMSS_NCKU_source_copy) +``` + +### 4. Make the source makefile consume the generated config + +At the top of `AMSS_NCKU_source/makefile`, after `include makefile.inc`, add: + +```make +-include AMSS_NCKU_build.mk + +ABE_TYPE ?= $(shell awk '/^[[:space:]]*\#define[[:space:]]+ABEtype/ {print $$3; exit}' macrodef.h 2>/dev/null) +``` + +The generated `AMSS_NCKU_build.mk` is used during normal Python-driven builds. +The fallback keeps manual source-tree builds usable. + +### 5. Gate path-specific build options by `ABE_TYPE` + +Use effective build switches: + +```make +ifeq ($(USE_TRANSFER_CACHE),auto) +ifeq ($(ABE_TYPE),0) +EFFECTIVE_USE_TRANSFER_CACHE = 1 +else +EFFECTIVE_USE_TRANSFER_CACHE = 0 +endif +else +EFFECTIVE_USE_TRANSFER_CACHE = $(USE_TRANSFER_CACHE) +endif + +ifeq ($(USE_CXX_ESCALAR_KERNEL),1) +ifeq ($(ABE_TYPE),1) +EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 1 +else +EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0 +endif +else +EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0 +endif + +TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE) +ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL) +``` + +Only add `bssn_escalar_rhs_c.o` when the effective EScalar C kernel switch is +enabled: + +```make +ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1) +CFILES += bssn_escalar_rhs_c.o +endif +``` + +### 6. Use safe transfer-cache default + +In `AMSS_NCKU_source/makefile.inc`, keep: + +```make +USE_TRANSFER_CACHE ?= auto +``` + +With the effective switch logic above, this enables cached transfer for BSSN +vacuum while keeping non-BSSN paths on the uncached path by default. + +## Verification Checklist + +Run these checks after migrating: + +```bash +python3 -c "import generate_macrodef; generate_macrodef.generate_build_config()" +cat GW150914/AMSS_NCKU_build.mk +``` + +For BSSN, the generated file should contain: + +```make +ABE_TYPE := 0 +``` + +Dry-run the copied or source makefile: + +```bash +make -n -B INTERP_LB_MODE=off ABE | grep -E 'BSSN_USE_TRANSFER_CACHE|BSSN_USE_ESCALAR_C_KERNEL|bssn_escalar_rhs_c' +``` + +Expected BSSN result: + +```text +-DBSSN_USE_TRANSFER_CACHE=1 -DBSSN_USE_ESCALAR_C_KERNEL=0 +``` + +and no `bssn_escalar_rhs_c.o` in the final link command. + +Run the full workflow: + +```bash +python3 AMSS_NCKU_Program.py +``` + +For the 10-step BSSN test, compare coordinate output: + +```bash +python3 - <<'PY' +from pathlib import Path +old = Path('../GW150914-06457/AMSS_NCKU_output/bssn_BH.dat') +new = Path('GW150914/AMSS_NCKU_output/bssn_BH.dat') + +def rows(path): + out = [] + for line in path.read_text().splitlines(): + if not line.strip() or line.lstrip().startswith('#'): + continue + out.append([float(x) for x in line.split()]) + return out + +ro, rn = rows(old), rows(new) +n = min(len(ro), len(rn)) +max_abs = 0.0 +for i in range(n): + for a, b in zip(ro[i], rn[i]): + max_abs = max(max_abs, abs(a - b)) +print(f"old_rows={len(ro)} new_rows={len(rn)} compared_rows={n}") +print(f"max_abs_diff={max_abs:.17g}") +PY +``` + +For the validated migration, the first 10 rows matched exactly: + +```text +max_abs_diff=0 +``` diff --git a/generate_macrodef.py b/generate_macrodef.py index 864bcce..a89ce97 100755 --- a/generate_macrodef.py +++ b/generate_macrodef.py @@ -12,6 +12,37 @@ import os import AMSS_NCKU_Input as input_data ## import program input file +################################################################## + +def get_abe_type(): + if ( input_data.Equation_Class == "BSSN" ): + return 0 + elif ( input_data.Equation_Class == "BSSN-EScalar" ): + return 1 + elif ( input_data.Equation_Class == "BSSN-EM" ): + return 3 + elif ( input_data.Equation_Class == "Z4C" ): + return 2 + else: + raise ValueError("Equation_Class setting error!!!") + + +################################################################## + +## Generate the makefile fragment used by the copied source tree. +## The source-tree macrodef.h is not authoritative because macro files +## are regenerated under File_directory for each run. + +def generate_build_config(): + + file1 = open( os.path.join(input_data.File_directory, "AMSS_NCKU_build.mk"), "w") + + print( "# Generated by generate_macrodef.py; do not edit manually.", file=file1 ) + print( f"ABE_TYPE := {get_abe_type()}", file=file1 ) + + file1.close() + + ################################################################## ## Generate the macro file macrodef.h according to user settings @@ -58,19 +89,10 @@ def generate_macrodef_h(): # 2: Z4c vacuum # 3: coupled to Maxwell field - if ( input_data.Equation_Class == "BSSN" ): - print( "#define ABEtype 0", file=file1 ) - print( file=file1 ) - elif ( input_data.Equation_Class == "BSSN-EScalar" ): - print( "#define ABEtype 1", file=file1 ) - print( file=file1 ) - elif ( input_data.Equation_Class == "BSSN-EM" ): - print( "#define ABEtype 3", file=file1 ) - print( file=file1 ) - elif ( input_data.Equation_Class == "Z4C" ): - print( "#define ABEtype 2", file=file1 ) - print( file=file1 ) - else: + try: + print( f"#define ABEtype {get_abe_type()}", file=file1 ) + print( file=file1 ) + except ValueError: print( "Equation_Class setting error!!!" ) print() print( "# Equation type #define ABEtype setting error!!!", file=file1 )