Merge branch 'cjy-vitality'

# Conflicts:
#	AMSS_NCKU_source/bssn_class.C
#	AMSS_NCKU_source/makefile.inc
This commit is contained in:
2026-04-28 16:55:47 +08:00
13 changed files with 1267 additions and 682 deletions

View File

@@ -177,6 +177,9 @@ print( " AMSS-NCKU macro file macrodef.h has been generated. " )
generate_macrodef.generate_macrodef_fh()
print( " AMSS-NCKU macro file macrodef.fh has been generated. " )
generate_macrodef.generate_build_config()
print( " AMSS-NCKU build config AMSS_NCKU_build.mk has been generated. " )
##################################################################
@@ -219,9 +222,11 @@ shutil.copytree(AMSS_NCKU_source_path, AMSS_NCKU_source_copy)
macrodef_h_path = os.path.join(File_directory, "macrodef.h")
macrodef_fh_path = os.path.join(File_directory, "macrodef.fh")
build_config_path = os.path.join(File_directory, "AMSS_NCKU_build.mk")
shutil.copy2(macrodef_h_path, AMSS_NCKU_source_copy)
shutil.copy2(macrodef_fh_path, AMSS_NCKU_source_copy)
shutil.copy2(build_config_path, AMSS_NCKU_source_copy)
# Notes on copying files:
# shutil.copy2 preserves file metadata such as modification time.

View File

@@ -5,6 +5,42 @@
#include "misc.h"
#include "parameters.h"
namespace
{
enum { MAX_DATA_PACKER_VARS = 64 };
int expand_var_list_pack_info(MyList<var> *src_list, MyList<var> *dst_list,
int *src_sgfn, int *dst_sgfn, double **src_soa)
{
int count = 0;
MyList<var> *src_it = src_list;
MyList<var> *dst_it = dst_list;
while (src_it && dst_it)
{
if (count >= MAX_DATA_PACKER_VARS)
{
cout << "Parallel::data_packer: too many variables in communication list." << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
src_sgfn[count] = src_it->data->sgfn;
dst_sgfn[count] = dst_it->data->sgfn;
src_soa[count] = src_it->data->SoA;
count++;
src_it = src_it->next;
dst_it = dst_it->next;
}
if (src_it || dst_it)
{
cout << "error in short data packer, var lists does not match." << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
return count;
}
}
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
{
nx = Mymax(1, shape / min_width);
@@ -3730,21 +3766,10 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
if (!src || !dst)
return size_out;
MyList<var> *varls, *varld;
varls = VarLists;
varld = VarListd;
while (varls && varld)
{
varls = varls->next;
varld = varld->next;
}
if (varls || varld)
{
cout << "error in short data packer, var lists does not match." << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
int src_sgfn[MAX_DATA_PACKER_VARS];
int dst_sgfn[MAX_DATA_PACKER_VARS];
double *src_soa[MAX_DATA_PACKER_VARS];
const int var_count = expand_var_list_pack_info(VarLists, VarListd, src_sgfn, dst_sgfn, src_soa);
int type; /* 1 copy, 2 restrict, 3 prolong */
if (src->data->Bg->lev == dst->data->Bg->lev)
@@ -3756,44 +3781,58 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
while (src && dst)
{
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank))
{
varls = VarLists;
varld = VarListd;
while (varls && varld)
const bool rank_match =
(dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank);
if (rank_match)
{
const int segment_size = dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2];
int offset = size_out;
if (data)
{
if (dir == PACK)
{
switch (type)
{
// attention must be paied to the difference between src's llb,uub and dst's llb,uub
case 1:
f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
dst->data->llb, dst->data->uub);
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + offset,
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape,
src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub);
break;
case 2:
f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + offset,
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape,
src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub,
src_soa[iv], Symmetry);
break;
case 3:
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape,
src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub,
dst->data->shape, data + offset, dst->data->llb, dst->data->uub,
src_soa[iv], Symmetry);
break;
default:
break;
}
if (dir == UNPACK) // from target data to corresponding grid
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
dst->data->llb, dst->data->uub);
}
size_out += dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2];
varls = varls->next;
varld = varld->next;
else
{
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape,
dst->data->Bg->fgfs[dst_sgfn[iv]], dst->data->llb, dst->data->uub,
dst->data->shape, data + offset, dst->data->llb, dst->data->uub);
}
}
size_out = offset + ((!data) ? segment_size * var_count : 0);
if (data)
size_out = offset;
}
dst = dst->next;
src = src->next;
}
@@ -3819,21 +3858,10 @@ int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyLis
if (!src || !dst)
return size_out;
MyList<var> *varls, *varld;
varls = VarLists;
varld = VarListd;
while (varls && varld)
{
varls = varls->next;
varld = varld->next;
}
if (varls || varld)
{
cout << "error in short data packer, var lists does not match." << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
int src_sgfn[MAX_DATA_PACKER_VARS];
int dst_sgfn[MAX_DATA_PACKER_VARS];
double *src_soa[MAX_DATA_PACKER_VARS];
const int var_count = expand_var_list_pack_info(VarLists, VarListd, src_sgfn, dst_sgfn, src_soa);
int type; /* 1 copy, 2 restrict, 3 prolong */
if (src->data->Bg->lev == dst->data->Bg->lev)
@@ -3851,31 +3879,42 @@ int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyLis
while (src && dst)
{
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank))
{
varls = VarLists;
varld = VarListd;
while (varls && varld)
const bool rank_match =
(dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank);
if (rank_match)
{
const int segment_size =
(src->data->shape[0] + 2 * ghost_width) *
(src->data->shape[1] + 2 * ghost_width) *
(src->data->shape[2] + 2 * ghost_width);
int offset = size_out;
if (data)
{
if (dir == PACK)
f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
dst->data->llb, dst->data->uub, src->data->shape, data + size_out,
src->data->llb, src->data->uub, varls->data->SoA, Symmetry);
if (dir == UNPACK) // from target data to corresponding grid
f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
src->data->llb, src->data->uub, src->data->shape, data + size_out,
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry, dst->data->illb, dst->data->iuub);
{
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape,
src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub,
src->data->shape, data + offset, src->data->llb, src->data->uub,
src_soa[iv], Symmetry);
}
// the symmetry problem should be dealt in prolongcopy3,
// so we always have ghost_width for both sides
size_out += (src->data->shape[0] + 2 * ghost_width) * (src->data->shape[1] + 2 * ghost_width) * (src->data->shape[2] + 2 * ghost_width);
varls = varls->next;
varld = varld->next;
else
{
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape,
dst->data->Bg->fgfs[dst_sgfn[iv]], src->data->llb, src->data->uub,
src->data->shape, data + offset, dst->data->llb, dst->data->uub,
src_soa[iv], Symmetry, dst->data->illb, dst->data->iuub);
}
}
size_out = offset + ((!data) ? segment_size * var_count : 0);
if (data)
size_out = offset;
}
dst = dst->next;
src = src->next;
}

View File

@@ -258,6 +258,8 @@ void bssnEM_class::Initialize()
PhysTime = StartTime;
Setup_Black_Hole_position();
}
setup_transfer_caches();
}
//================================================================================================

View File

@@ -26,6 +26,12 @@ using namespace std;
#include "shellfunctions.h"
#include "parameters.h"
#if BSSN_USE_ESCALAR_C_KERNEL
#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar_c
#else
#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar
#endif
#ifdef With_AHF
#include "derivatives.h"
#include "myglobal.h"
@@ -133,6 +139,9 @@ void bssnEScalar_class::Initialize()
}
GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor);
ConstraintRefreshLevels = new int[GH->levels];
for (int il = 0; il < GH->levels; il++)
ConstraintRefreshLevels[il] = 0;
if (checkrun)
CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry);
else
@@ -165,6 +174,8 @@ void bssnEScalar_class::Initialize()
PhysTime = StartTime;
Setup_Black_Hole_position();
}
setup_transfer_caches();
}
//================================================================================================
@@ -230,6 +241,9 @@ void bssnEScalar_class::Read_Ansorg()
}
int BH_NM;
double *Porg_here;
double *pmom_local;
double *spin_local;
double *mass_local;
// read parameter from file
{
const int LEN = 256;
@@ -271,9 +285,9 @@ void bssnEScalar_class::Read_Ansorg()
}
Porg_here = new double[3 * BH_NM];
Pmom = new double[3 * BH_NM];
Spin = new double[3 * BH_NM];
Mass = new double[BH_NM];
pmom_local = new double[3 * BH_NM];
spin_local = new double[3 * BH_NM];
mass_local = new double[BH_NM];
// read parameter from file
{
const int LEN = 256;
@@ -308,7 +322,7 @@ void bssnEScalar_class::Read_Ansorg()
if (sgrp == "BSSN" && sind < BH_NM)
{
if (skey == "Mass")
Mass[sind] = atof(sval.c_str());
mass_local[sind] = atof(sval.c_str());
else if (skey == "Porgx")
Porg_here[sind * 3] = atof(sval.c_str());
else if (skey == "Porgy")
@@ -316,17 +330,17 @@ void bssnEScalar_class::Read_Ansorg()
else if (skey == "Porgz")
Porg_here[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Spinx")
Spin[sind * 3] = atof(sval.c_str());
spin_local[sind * 3] = atof(sval.c_str());
else if (skey == "Spiny")
Spin[sind * 3 + 1] = atof(sval.c_str());
spin_local[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Spinz")
Spin[sind * 3 + 2] = atof(sval.c_str());
spin_local[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Pmomx")
Pmom[sind * 3] = atof(sval.c_str());
pmom_local[sind * 3] = atof(sval.c_str());
else if (skey == "Pmomy")
Pmom[sind * 3 + 1] = atof(sval.c_str());
pmom_local[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Pmomz")
Pmom[sind * 3 + 2] = atof(sval.c_str());
pmom_local[sind * 3 + 2] = atof(sval.c_str());
}
}
inf.close();
@@ -362,7 +376,7 @@ void bssnEScalar_class::Read_Ansorg()
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
Mass, Porg_here, Pmom, Spin, BH_NM);
mass_local, Porg_here, pmom_local, spin_local, BH_NM);
}
if (BL == Pp->data->ble)
break;
@@ -404,7 +418,7 @@ void bssnEScalar_class::Read_Ansorg()
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
Mass, Porg_here, Pmom, Spin, BH_NM);
mass_local, Porg_here, pmom_local, spin_local, BH_NM);
}
if (BL == Pp->data->ble)
break;
@@ -415,6 +429,9 @@ void bssnEScalar_class::Read_Ansorg()
#endif
delete[] Porg_here;
delete[] pmom_local;
delete[] spin_local;
delete[] mass_local;
// dump read_in initial data
// for(int lev=0;lev<GH->levels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT);
}
@@ -455,6 +472,9 @@ void bssnEScalar_class::Read_Pablo()
}
int BH_NM;
double *Porg_here;
double *pmom_local;
double *spin_local;
double *mass_local;
// read parameter from file
{
const int LEN = 256;
@@ -496,9 +516,9 @@ void bssnEScalar_class::Read_Pablo()
}
Porg_here = new double[3 * BH_NM];
Pmom = new double[3 * BH_NM];
Spin = new double[3 * BH_NM];
Mass = new double[BH_NM];
pmom_local = new double[3 * BH_NM];
spin_local = new double[3 * BH_NM];
mass_local = new double[BH_NM];
// read parameter from file
{
const int LEN = 256;
@@ -533,7 +553,7 @@ void bssnEScalar_class::Read_Pablo()
if (sgrp == "BSSN" && sind < BH_NM)
{
if (skey == "Mass")
Mass[sind] = atof(sval.c_str());
mass_local[sind] = atof(sval.c_str());
else if (skey == "Porgx")
Porg_here[sind * 3] = atof(sval.c_str());
else if (skey == "Porgy")
@@ -541,17 +561,17 @@ void bssnEScalar_class::Read_Pablo()
else if (skey == "Porgz")
Porg_here[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Spinx")
Spin[sind * 3] = atof(sval.c_str());
spin_local[sind * 3] = atof(sval.c_str());
else if (skey == "Spiny")
Spin[sind * 3 + 1] = atof(sval.c_str());
spin_local[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Spinz")
Spin[sind * 3 + 2] = atof(sval.c_str());
spin_local[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Pmomx")
Pmom[sind * 3] = atof(sval.c_str());
pmom_local[sind * 3] = atof(sval.c_str());
else if (skey == "Pmomy")
Pmom[sind * 3 + 1] = atof(sval.c_str());
pmom_local[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Pmomz")
Pmom[sind * 3 + 2] = atof(sval.c_str());
pmom_local[sind * 3 + 2] = atof(sval.c_str());
}
}
inf.close();
@@ -598,7 +618,7 @@ void bssnEScalar_class::Read_Pablo()
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
Mass, Porg_here, Pmom, Spin, BH_NM);
mass_local, Porg_here, pmom_local, spin_local, BH_NM);
}
if (BL == Pp->data->ble)
break;
@@ -662,7 +682,7 @@ void bssnEScalar_class::Read_Pablo()
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
Mass, Porg_here, Pmom, Spin, BH_NM);
mass_local, Porg_here, pmom_local, spin_local, BH_NM);
}
if (BL == Pp->data->ble)
break;
@@ -686,6 +706,9 @@ void bssnEScalar_class::Read_Pablo()
#endif
delete[] Porg_here;
delete[] pmom_local;
delete[] spin_local;
delete[] mass_local;
if (flag && myrank == 0)
MPI_Abort(MPI_COMM_WORLD, 1);
// dump read_in initial data
@@ -739,7 +762,7 @@ void bssnEScalar_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -993,7 +1016,8 @@ void bssnEScalar_class::Step(int lev, int YN)
}
#endif
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
Parallel::AsyncSyncState async_pre;
sync_predictor_start(lev, SynchList_pre, async_pre);
#ifdef WithShell
if (lev == 0)
@@ -1012,6 +1036,7 @@ void bssnEScalar_class::Step(int lev, int YN)
}
}
#endif
sync_predictor_finish(lev, async_pre, SynchList_pre);
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
@@ -1081,7 +1106,7 @@ void bssnEScalar_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -1349,7 +1374,8 @@ void bssnEScalar_class::Step(int lev, int YN)
}
#endif
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
Parallel::AsyncSyncState async_cor;
sync_corrector_start(lev, SynchList_cor, async_cor);
#ifdef WithShell
if (lev == 0)
@@ -1368,6 +1394,7 @@ void bssnEScalar_class::Step(int lev, int YN)
}
}
#endif
sync_corrector_finish(lev, async_cor, SynchList_cor);
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
@@ -1835,8 +1862,11 @@ void bssnEScalar_class::AnalysisStuff_EScalar(int lev, double dT_lev)
//================================================================================================
void bssnEScalar_class::Interp_Constraint()
void bssnEScalar_class::Interp_Constraint(bool infg)
{
if (!infg)
return;
// we do not support a_lev != 0 yet.
if (a_lev > 0)
return;
@@ -1858,7 +1888,7 @@ void bssnEScalar_class::Interp_Constraint()
if (myrank == cg->rank)
{
if (lev > 0)
f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -2078,7 +2108,7 @@ void bssnEScalar_class::Constraint_Out()
if (myrank == cg->rank)
{
if (lev > 0)
f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],

View File

@@ -51,7 +51,7 @@ public:
void Compute_Psi4(int lev);
void Step(int lev, int YN);
void AnalysisStuff_EScalar(int lev, double dT_lev);
void Interp_Constraint();
void Interp_Constraint(bool infg);
void Constraint_Out();
protected:

View File

@@ -299,6 +299,28 @@ bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
// Derived classes override Initialize(), so ownership-sensitive members must
// be in a known state before any specialized setup path runs.
GH = 0;
SH = 0;
PhysTime = 0.0;
BH_num = 0;
BH_num_input = 0;
Porg0 = 0;
Porgbr = 0;
Porg = 0;
Porg1 = 0;
Porg_rhs = 0;
Mass = 0;
Pmom = 0;
Spin = 0;
sync_cache_pre = 0;
sync_cache_cor = 0;
sync_cache_rp_coarse = 0;
sync_cache_rp_fine = 0;
sync_cache_restrict = 0;
sync_cache_outbd = 0;
// setup Monitors
{
stringstream a_stream;
@@ -986,13 +1008,7 @@ void bssn_class::Initialize()
Setup_Black_Hole_position();
}
// Initialize sync caches (per-level, for predictor and corrector)
sync_cache_pre = new Parallel::SyncCache[GH->levels];
sync_cache_cor = new Parallel::SyncCache[GH->levels];
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
setup_transfer_caches();
}
//================================================================================================
@@ -1247,30 +1263,7 @@ bssn_class::~bssn_class()
#endif
// Destroy sync caches before GH
if (sync_cache_pre)
{
for (int i = 0; i < GH->levels; i++)
sync_cache_pre[i].destroy();
delete[] sync_cache_pre;
}
if (sync_cache_cor)
{
for (int i = 0; i < GH->levels; i++)
sync_cache_cor[i].destroy();
delete[] sync_cache_cor;
}
if (sync_cache_rp_coarse)
{
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_coarse[i].destroy();
delete[] sync_cache_rp_coarse;
}
if (sync_cache_rp_fine)
{
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_fine[i].destroy();
delete[] sync_cache_rp_fine;
}
destroy_transfer_caches();
delete GH;
#ifdef WithShell
@@ -2489,9 +2482,7 @@ void bssn_class::Evolve(int Steps)
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
#if (ABEtype != 1 && ABEtype != 2)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
invalidate_transfer_caches();
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
#endif
@@ -2732,9 +2723,7 @@ void bssn_class::RecursiveStep(int lev)
{
if (ConstraintRefreshLevels)
ConstraintRefreshLevels[lev] = 1;
#if (ABEtype != 1 && ABEtype != 2)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
invalidate_transfer_caches();
}
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
#endif
@@ -2915,9 +2904,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
#if (ABEtype != 1 && ABEtype != 2)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
invalidate_transfer_caches();
#endif
}
@@ -3084,9 +3071,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
#if (ABEtype != 1 && ABEtype != 2)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
invalidate_transfer_caches();
// a_stream.clear();
// a_stream.str("");
@@ -3101,9 +3086,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
#if (ABEtype != 1 && ABEtype != 2)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
invalidate_transfer_caches();
// a_stream.clear();
// a_stream.str("");
@@ -3122,9 +3105,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
#if (ABEtype != 1 && ABEtype != 2)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
invalidate_transfer_caches();
// a_stream.clear();
// a_stream.str("");
@@ -3140,9 +3121,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
#if (ABEtype != 1 && ABEtype != 2)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
invalidate_transfer_caches();
// a_stream.clear();
// a_stream.str("");
@@ -3673,11 +3652,7 @@ void bssn_class::Step(int lev, int YN)
STEP_TIMER_DECL(timer_predictor_sync);
Parallel::AsyncSyncState async_pre;
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#endif
sync_predictor_start(lev, SynchList_pre, async_pre);
#ifdef WithShell
if (lev == 0)
@@ -3696,9 +3671,7 @@ void bssn_class::Step(int lev, int YN)
}
}
#endif
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#endif
sync_predictor_finish(lev, async_pre, SynchList_pre);
#ifdef WithShell
// Complete non-blocking error reduction and check
@@ -4044,11 +4017,7 @@ void bssn_class::Step(int lev, int YN)
STEP_TIMER_DECL(timer_corrector_sync);
Parallel::AsyncSyncState async_cor;
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#endif
sync_corrector_start(lev, SynchList_cor, async_cor);
#ifdef WithShell
if (lev == 0)
@@ -4067,9 +4036,7 @@ void bssn_class::Step(int lev, int YN)
}
}
#endif
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#endif
sync_corrector_finish(lev, async_cor, SynchList_cor);
#ifdef WithShell
// Complete non-blocking error reduction and check
@@ -4558,11 +4525,7 @@ void bssn_class::Step(int lev, int YN)
#endif
Parallel::AsyncSyncState async_pre;
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#endif
sync_predictor_start(lev, SynchList_pre, async_pre);
#ifdef WithShell
if (lev == 0)
@@ -4581,9 +4544,7 @@ void bssn_class::Step(int lev, int YN)
}
}
#endif
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#endif
sync_predictor_finish(lev, async_pre, SynchList_pre);
#ifdef WithShell
// Complete non-blocking error reduction and check
@@ -4912,11 +4873,7 @@ void bssn_class::Step(int lev, int YN)
#endif
Parallel::AsyncSyncState async_cor;
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#endif
sync_corrector_start(lev, SynchList_cor, async_cor);
#ifdef WithShell
if (lev == 0)
@@ -4935,9 +4892,7 @@ void bssn_class::Step(int lev, int YN)
}
}
#endif
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#endif
sync_corrector_finish(lev, async_cor, SynchList_cor);
#ifdef WithShell
// Complete non-blocking error reduction and check
@@ -5329,11 +5284,7 @@ void bssn_class::Step(int lev, int YN)
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
#endif
sync_evolution(lev, SynchList_pre, sync_cache_pre);
// Complete non-blocking error reduction and check
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
@@ -5534,11 +5485,7 @@ void bssn_class::Step(int lev, int YN)
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
#endif
sync_evolution(lev, SynchList_cor, sync_cache_cor);
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
@@ -6255,11 +6202,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif
#if (RPB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
#endif
restrict_evolution(lev, SL, SynchList_pre);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
@@ -6272,11 +6215,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
@@ -6287,21 +6226,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#if (RPB == 0)
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
#endif
outbdlow2hi_evolution(lev, SynchList_pre, SL);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif
@@ -6328,11 +6253,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif
#if (RPB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
#endif
restrict_evolution(lev, SL, SL);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
@@ -6345,11 +6266,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
#endif
sync_evolution(lev - 1, SL, sync_cache_rp_coarse);
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
@@ -6360,21 +6277,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#if (RPB == 0)
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
#endif
outbdlow2hi_evolution(lev, SL, SL);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif
@@ -6391,7 +6294,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif
}
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
sync_evolution(lev, SL, sync_cache_rp_fine);
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
@@ -6530,17 +6433,17 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
}
#if (RPB == 0)
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
restrict_evolution(lev, SL, SynchList_pre);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
#endif
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
#if (RPB == 0)
#if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
outbdlow2hi_evolution(lev, SynchList_pre, SL);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif
@@ -6552,17 +6455,17 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
else // no time refinement levels and for all same time levels
{
#if (RPB == 0)
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
restrict_evolution(lev, SL, SL);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
#endif
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
sync_evolution(lev - 1, SL, sync_cache_rp_coarse);
#if (RPB == 0)
#if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
outbdlow2hi_evolution(lev, SL, SL);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif
@@ -6575,7 +6478,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
sync_evolution(lev, SL, sync_cache_rp_fine);
#endif
}
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
@@ -6708,39 +6611,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
}
#if (RPB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
#endif
restrict_evolution(lev, SynchList_cor, SynchList_pre);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
#endif
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
#if (RPB == 0)
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
#endif
outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif
@@ -6754,39 +6635,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
if (myrank == 0)
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
#if (RPB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]);
#endif
restrict_evolution(lev, SynchList_cor, StateList);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
#endif
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
#endif
sync_evolution(lev - 1, StateList, sync_cache_rp_coarse);
#if (RPB == 0)
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
#endif
outbdlow2hi_evolution(lev, StateList, SynchList_cor);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif
@@ -6796,11 +6655,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
#endif
}
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
#endif
sync_evolution(lev, SynchList_cor, sync_cache_rp_fine);
}
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
}
@@ -6832,7 +6687,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
#if (RPB == 0)
#if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif
@@ -6845,7 +6700,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
{
#if (RPB == 0)
#if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
outbdlow2hi_evolution(lev, StateList, SynchList_cor);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif
@@ -6864,10 +6719,10 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
#else
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
#endif
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
sync_evolution(lev - 1, StateList, sync_cache_rp_coarse);
}
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
sync_evolution(lev, SynchList_cor, sync_cache_rp_fine);
}
}
#undef MIXOUTB
@@ -7597,6 +7452,169 @@ void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, va
}
}
}
bool bssn_class::use_transfer_cache() const
{
#if BSSN_USE_TRANSFER_CACHE
return true;
#else
return false;
#endif
}
void bssn_class::setup_transfer_caches()
{
sync_cache_pre = 0;
sync_cache_cor = 0;
sync_cache_rp_coarse = 0;
sync_cache_rp_fine = 0;
sync_cache_restrict = 0;
sync_cache_outbd = 0;
if (!use_transfer_cache() || !GH)
return;
sync_cache_pre = new Parallel::SyncCache[GH->levels];
sync_cache_cor = new Parallel::SyncCache[GH->levels];
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
}
void bssn_class::invalidate_transfer_caches()
{
if (!use_transfer_cache() || !GH || !sync_cache_pre || !sync_cache_cor ||
!sync_cache_rp_coarse || !sync_cache_rp_fine || !sync_cache_restrict || !sync_cache_outbd)
return;
for (int il = 0; il < GH->levels; il++)
{
sync_cache_pre[il].invalidate();
sync_cache_cor[il].invalidate();
sync_cache_rp_coarse[il].invalidate();
sync_cache_rp_fine[il].invalidate();
sync_cache_restrict[il].invalidate();
sync_cache_outbd[il].invalidate();
}
}
void bssn_class::destroy_transfer_caches()
{
if (sync_cache_pre)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_pre[i].destroy();
delete[] sync_cache_pre;
sync_cache_pre = 0;
}
if (sync_cache_cor)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_cor[i].destroy();
delete[] sync_cache_cor;
sync_cache_cor = 0;
}
if (sync_cache_rp_coarse)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_coarse[i].destroy();
delete[] sync_cache_rp_coarse;
sync_cache_rp_coarse = 0;
}
if (sync_cache_rp_fine)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_fine[i].destroy();
delete[] sync_cache_rp_fine;
sync_cache_rp_fine = 0;
}
if (sync_cache_restrict)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_restrict[i].destroy();
delete[] sync_cache_restrict;
sync_cache_restrict = 0;
}
if (sync_cache_outbd)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_outbd[i].destroy();
delete[] sync_cache_outbd;
sync_cache_outbd = 0;
}
}
void bssn_class::sync_predictor_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state)
{
if (use_transfer_cache())
Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_pre[lev], async_state);
else
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
}
void bssn_class::sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList)
{
if (use_transfer_cache())
Parallel::Sync_finish(sync_cache_pre[lev], async_state, VarList, Symmetry);
}
void bssn_class::sync_corrector_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state)
{
if (use_transfer_cache())
Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_cor[lev], async_state);
else
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
}
void bssn_class::sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList)
{
if (use_transfer_cache())
Parallel::Sync_finish(sync_cache_cor[lev], async_state, VarList, Symmetry);
}
void bssn_class::sync_evolution(int lev, MyList<var> *VarList, Parallel::SyncCache *cache_array)
{
if (use_transfer_cache() && cache_array)
Parallel::Sync_cached(GH->PatL[lev], VarList, Symmetry, cache_array[lev]);
else
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
}
void bssn_class::restrict_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list)
{
if (use_transfer_cache())
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_restrict[lev]);
else
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry);
}
void bssn_class::outbdlow2hi_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list)
{
if (use_transfer_cache())
{
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_outbd[lev]);
return;
}
MyList<Patch> *Ppc = GH->PatL[lev - 1];
while (Ppc)
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, src_var_list, dst_var_list, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
}
#endif
//================================================================================================

View File

@@ -33,6 +33,14 @@ using namespace std;
extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
#ifndef BSSN_USE_TRANSFER_CACHE
#define BSSN_USE_TRANSFER_CACHE 1
#endif
#ifndef BSSN_USE_ESCALAR_C_KERNEL
#define BSSN_USE_ESCALAR_C_KERNEL 1
#endif
class bssn_class
{
public:
@@ -171,6 +179,17 @@ public:
void testOutBd();
bool check_Stdin_Abort();
bool use_transfer_cache() const;
void setup_transfer_caches();
void invalidate_transfer_caches();
void destroy_transfer_caches();
void sync_predictor_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state);
void sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList);
void sync_corrector_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state);
void sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList);
void sync_evolution(int lev, MyList<var> *VarList, Parallel::SyncCache *cache_array = 0);
void restrict_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list);
void outbdlow2hi_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list);
virtual void Setup_Initial_Data_Cao();
virtual void Setup_Initial_Data_Lousto();

View File

@@ -0,0 +1,169 @@
#include "macrodef.h"
#include "bssn_rhs.h"
#include "share_func.h"
#include "tool.h"
#include <vector>
namespace
{
// Reuse the temporary workspace across block calls to avoid repeated heap churn
// in the EScalar wrapper. MPI ranks execute this path sequentially, so a single
// process-local buffer is sufficient here.
std::vector<double> g_escalar_tmp_store;
}
#ifdef fortran1
#define f_frpotential frpotential
#endif
#ifdef fortran2
#define f_frpotential FRPOTENTIAL
#endif
#ifdef fortran3
#define f_frpotential frpotential_
#endif
extern "C"
{
void f_frpotential(int *, double *, double *, double *);
}
int f_compute_rhs_bssn_escalar_c(int *ex, double &T,
double *X, double *Y, double *Z,
double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *Sphi, double *Spi,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *Sphi_rhs, double *Spi_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
int &Symmetry, int &Lev, double &eps, int &co)
{
const int nx = ex[0], ny = ex[1], nz = ex[2];
const int all = nx * ny * nz;
const size_t workspace_size = size_t(all) * 17;
if (g_escalar_tmp_store.size() < workspace_size)
g_escalar_tmp_store.resize(workspace_size);
double *tmp_ptr = g_escalar_tmp_store.data();
auto alloc_tmp = [&](int n = 1) -> double *
{
double *ptr = tmp_ptr;
tmp_ptr += size_t(all) * n;
return ptr;
};
double *chix = alloc_tmp(), *chiy = alloc_tmp(), *chiz = alloc_tmp();
double *Kx = alloc_tmp(), *Ky = alloc_tmp(), *Kz = alloc_tmp();
double *fxx = alloc_tmp(), *fxy = alloc_tmp(), *fxz = alloc_tmp();
double *fyy = alloc_tmp(), *fyz = alloc_tmp(), *fzz = alloc_tmp();
double *Lapx = alloc_tmp(), *Lapy = alloc_tmp(), *Lapz = alloc_tmp();
double *V = alloc_tmp(), *dVdSphi = alloc_tmp();
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, HALF = 0.5;
const double SSS[3] = {1.0, 1.0, 1.0};
fderivs(ex, chi, chix, chiy, chiz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, Sphi, Kx, Ky, Kz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fdderivs(ex, Sphi, fxx, fxy, fxz, fyy, fyz, fzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
f_frpotential(ex, Sphi, V, dVdSphi);
for (int i = 0; i < all; ++i)
{
const double alpn1 = Lap[i] + ONE;
const double chin1 = chi[i] + ONE;
const double gxx = dxx[i] + ONE;
const double gyy = dyy[i] + ONE;
const double gzz = dzz[i] + ONE;
const double det = gxx * gyy * gzz + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i]
- gxz[i] * gyy * gxz[i] - gxy[i] * gxy[i] * gzz - gxx * gyz[i] * gyz[i];
const double gupxx = (gyy * gzz - gyz[i] * gyz[i]) / det;
const double gupxy = -(gxy[i] * gzz - gyz[i] * gxz[i]) / det;
const double gupxz = (gxy[i] * gyz[i] - gyy * gxz[i]) / det;
const double gupyy = (gxx * gzz - gxz[i] * gxz[i]) / det;
const double gupyz = -(gxx * gyz[i] - gxy[i] * gxz[i]) / det;
const double gupzz = (gxx * gyy - gxy[i] * gxy[i]) / det;
Sphi_rhs[i] = alpn1 * Spi[i];
Spi_rhs[i] = gupxx * fxx[i] + gupyy * fyy[i] + gupzz * fzz[i]
+ TWO * (gupxy * fxy[i] + gupxz * fxz[i] + gupyz * fyz[i])
- ((Gamx[i] + (gupxx * chix[i] + gupxy * chiy[i] + gupxz * chiz[i]) / TWO / chin1) * Kx[i]
+ (Gamy[i] + (gupxy * chix[i] + gupyy * chiy[i] + gupyz * chiz[i]) / TWO / chin1) * Ky[i]
+ (Gamz[i] + (gupxz * chix[i] + gupyz * chiy[i] + gupzz * chiz[i]) / TWO / chin1) * Kz[i]);
Spi_rhs[i] = Spi_rhs[i] * alpn1
+ gupxx * Lapx[i] * Kx[i] + gupxy * Lapx[i] * Ky[i] + gupxz * Lapx[i] * Kz[i]
+ gupxy * Lapy[i] * Kx[i] + gupyy * Lapy[i] * Ky[i] + gupyz * Lapy[i] * Kz[i]
+ gupxz * Lapz[i] * Kx[i] + gupyz * Lapz[i] * Ky[i] + gupzz * Lapz[i] * Kz[i];
Spi_rhs[i] = Spi_rhs[i] * chin1 + alpn1 * (trK[i] * Spi[i] - dVdSphi[i]);
rho[i] = chin1 * ((gupxx * Kx[i] * Kx[i] + gupyy * Ky[i] * Ky[i] + gupzz * Kz[i] * Kz[i]) * HALF
+ gupxy * Kx[i] * Ky[i] + gupxz * Kx[i] * Kz[i] + gupyz * Ky[i] * Kz[i])
+ Spi[i] * Spi[i] * HALF + V[i];
Sx[i] = -Spi[i] * Kx[i];
Sy[i] = -Spi[i] * Ky[i];
Sz[i] = -Spi[i] * Kz[i];
const double pressure = (rho[i] - Spi[i] * Spi[i]) / chin1;
Sxx[i] = Kx[i] * Kx[i] - pressure * gxx;
Sxy[i] = Kx[i] * Ky[i] - pressure * gxy[i];
Sxz[i] = Kx[i] * Kz[i] - pressure * gxz[i];
Syy[i] = Ky[i] * Ky[i] - pressure * gyy;
Syz[i] = Ky[i] * Kz[i] - pressure * gyz[i];
Szz[i] = Kz[i] * Kz[i] - pressure * gzz;
}
if (f_compute_rhs_bssn(ex, T, X, Y, Z,
chi, trK,
dxx, gxy, gxz, dyy, gyz, dzz,
Axx, Axy, Axz, Ayy, Ayz, Azz,
Gamx, Gamy, Gamz,
Lap, betax, betay, betaz,
dtSfx, dtSfy, dtSfz,
chi_rhs, trK_rhs,
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
Gamx_rhs, Gamy_rhs, Gamz_rhs,
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
rho, Sx, Sy, Sz,
Sxx, Sxy, Sxz, Syy, Syz, Szz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
ham_Res, movx_Res, movy_Res, movz_Res,
Gmx_Res, Gmy_Res, Gmz_Res,
Symmetry, Lev, eps, co))
return 1;
lopsided_kodis(ex, X, Y, Z, Sphi, Sphi_rhs, betax, betay, betaz, Symmetry, SSS, eps);
lopsided_kodis(ex, X, Y, Z, Spi, Spi_rhs, betax, betay, betaz, Symmetry, SSS, eps);
for (int i = 0; i < all; ++i)
{
if (Sphi_rhs[i] != Sphi_rhs[i] || Spi_rhs[i] != Spi_rhs[i] || rho[i] != rho[i])
return 1;
}
return 0;
}

View File

@@ -67,6 +67,27 @@ extern "C"
int &, int &, double &, int &);
}
int f_compute_rhs_bssn_escalar_c(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
double *, double *, // chi, trK
double *, double *, double *, double *, double *, double *, // gij
double *, double *, double *, double *, double *, double *, // Aij
double *, double *, double *, // Gam
double *, double *, double *, double *, double *, double *, double *, // Gauge
double *, double *, // Sphi, Spi
double *, double *, // chi, trK
double *, double *, double *, double *, double *, double *, // gij
double *, double *, double *, double *, double *, double *, // Aij
double *, double *, double *, // Gam
double *, double *, double *, double *, double *, double *, double *, // Gauge
double *, double *, // Sphi, Spi
double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Ricci
double *, double *, double *, double *, double *, double *, double *, // constraint violation
int &, int &, double &, int &);
extern "C"
{
int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R

View File

@@ -2,11 +2,43 @@
include makefile.inc
-include AMSS_NCKU_build.mk
ABE_TYPE ?= $(shell awk '/^[[:space:]]*\#define[[:space:]]+ABEtype/ {print $$3; exit}' macrodef.h 2>/dev/null)
ifeq ($(USE_TRANSFER_CACHE),auto)
ifeq ($(ABE_TYPE),0)
EFFECTIVE_USE_TRANSFER_CACHE = 1
else
EFFECTIVE_USE_TRANSFER_CACHE = 0
endif
else
EFFECTIVE_USE_TRANSFER_CACHE = $(USE_TRANSFER_CACHE)
endif
ifeq ($(USE_CXX_ESCALAR_KERNEL),1)
ifeq ($(ABE_TYPE),1)
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 1
else
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0
endif
else
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0
endif
ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1)
ifeq ($(USE_CXX_KERNELS),0)
$(error USE_CXX_ESCALAR_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_escalar_rhs_c.C reuses the C BSSN kernel)
endif
endif
## polint(ordn=6) kernel selector:
## 1 (default): barycentric fast path
## 0 : fallback to Neville path
POLINT6_USE_BARY ?= 1
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
## make -> opt (PGO-guided, maximum performance)
@@ -16,7 +48,8 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
ifeq ($(PGO_MODE),instrument)
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
else
@@ -26,7 +59,8 @@ else
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
endif
@@ -89,8 +123,11 @@ ifeq ($(USE_CXX_KERNELS),0)
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
CFILES =
else
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
# C++ mode (default): C rewrite of bssn/bssn-escalar rhs and helper kernels
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1)
CFILES += bssn_escalar_rhs_c.o
endif
endif
ifeq ($(USE_CXX_Z4C_KERNELS),1)

View File

@@ -53,6 +53,18 @@ USE_CXX_KERNELS ?= 1
## 0 : use original Fortran Z4c_rhs.o
USE_CXX_Z4C_KERNELS ?= 1
## BSSN-EScalar RHS switch
## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path
## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
USE_CXX_ESCALAR_KERNEL ?= 1
## Cached transfer switch
## auto (default): enable for BSSN vacuum, keep other paths on the safe uncached path
## 1 : force cached Sync/Restrict/OutBd transfer on evolution hot paths
## 0 : force the original uncached transfer path
USE_TRANSFER_CACHE ?= auto
## RK4 kernel implementation switch
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
## 0 : use original Fortran rungekutta4_rout.o

View File

@@ -0,0 +1,211 @@
# BSSN Build Config Migration
This note records the build-configuration fix needed when replacing
`AMSS_NCKU_Input.py` or `generate_macrodef.py` with a newer upstream version.
## Problem
`AMSS_NCKU_source/macrodef.h` is not the authoritative file used by normal
runs. `AMSS_NCKU_Program.py` first generates macro files under
`input_data.File_directory`, copies `AMSS_NCKU_source` to
`<File_directory>/AMSS_NCKU_source_copy`, then copies the generated macro files
into that copied source tree and compiles there.
Therefore, makefile logic must not depend only on the stale
`AMSS_NCKU_source/macrodef.h`. The actual equation path must be passed to the
copied build tree from the same generation step that creates `macrodef.h`.
The performance regression was caused by compiling/linking the
`BSSN-EScalar` C wrapper into BSSN vacuum builds. For BSSN vacuum (`ABEtype=0`),
the build must use:
```make
BSSN_USE_TRANSFER_CACHE=1
BSSN_USE_ESCALAR_C_KERNEL=0
```
and must not link `bssn_escalar_rhs_c.o`.
## Required Migration Steps
### 1. Add an ABE type helper in `generate_macrodef.py`
Add a helper that maps `input_data.Equation_Class` to the numeric `ABEtype`.
Use the same mapping as `macrodef.h`:
```python
def get_abe_type():
if ( input_data.Equation_Class == "BSSN" ):
return 0
elif ( input_data.Equation_Class == "BSSN-EScalar" ):
return 1
elif ( input_data.Equation_Class == "BSSN-EM" ):
return 3
elif ( input_data.Equation_Class == "Z4C" ):
return 2
else:
raise ValueError("Equation_Class setting error!!!")
```
Update `generate_macrodef_h()` to print `#define ABEtype {get_abe_type()}`
instead of duplicating the if/elif mapping.
### 2. Generate a makefile fragment
In `generate_macrodef.py`, add:
```python
def generate_build_config():
file1 = open(os.path.join(input_data.File_directory, "AMSS_NCKU_build.mk"), "w")
print("# Generated by generate_macrodef.py; do not edit manually.", file=file1)
print(f"ABE_TYPE := {get_abe_type()}", file=file1)
file1.close()
```
This file is the build-time authority for the equation path.
### 3. Call and copy the generated build config
In `AMSS_NCKU_Program.py`, after generating `macrodef.h` and `macrodef.fh`, call:
```python
generate_macrodef.generate_build_config()
print(" AMSS-NCKU build config AMSS_NCKU_build.mk has been generated. ")
```
When copying generated files into `AMSS_NCKU_source_copy`, also copy:
```python
build_config_path = os.path.join(File_directory, "AMSS_NCKU_build.mk")
shutil.copy2(build_config_path, AMSS_NCKU_source_copy)
```
### 4. Make the source makefile consume the generated config
At the top of `AMSS_NCKU_source/makefile`, after `include makefile.inc`, add:
```make
-include AMSS_NCKU_build.mk
ABE_TYPE ?= $(shell awk '/^[[:space:]]*\#define[[:space:]]+ABEtype/ {print $$3; exit}' macrodef.h 2>/dev/null)
```
The generated `AMSS_NCKU_build.mk` is used during normal Python-driven builds.
The fallback keeps manual source-tree builds usable.
### 5. Gate path-specific build options by `ABE_TYPE`
Use effective build switches:
```make
ifeq ($(USE_TRANSFER_CACHE),auto)
ifeq ($(ABE_TYPE),0)
EFFECTIVE_USE_TRANSFER_CACHE = 1
else
EFFECTIVE_USE_TRANSFER_CACHE = 0
endif
else
EFFECTIVE_USE_TRANSFER_CACHE = $(USE_TRANSFER_CACHE)
endif
ifeq ($(USE_CXX_ESCALAR_KERNEL),1)
ifeq ($(ABE_TYPE),1)
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 1
else
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0
endif
else
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0
endif
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
```
Only add `bssn_escalar_rhs_c.o` when the effective EScalar C kernel switch is
enabled:
```make
ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1)
CFILES += bssn_escalar_rhs_c.o
endif
```
### 6. Use safe transfer-cache default
In `AMSS_NCKU_source/makefile.inc`, keep:
```make
USE_TRANSFER_CACHE ?= auto
```
With the effective switch logic above, this enables cached transfer for BSSN
vacuum while keeping non-BSSN paths on the uncached path by default.
## Verification Checklist
Run these checks after migrating:
```bash
python3 -c "import generate_macrodef; generate_macrodef.generate_build_config()"
cat GW150914/AMSS_NCKU_build.mk
```
For BSSN, the generated file should contain:
```make
ABE_TYPE := 0
```
Dry-run the copied or source makefile:
```bash
make -n -B INTERP_LB_MODE=off ABE | grep -E 'BSSN_USE_TRANSFER_CACHE|BSSN_USE_ESCALAR_C_KERNEL|bssn_escalar_rhs_c'
```
Expected BSSN result:
```text
-DBSSN_USE_TRANSFER_CACHE=1 -DBSSN_USE_ESCALAR_C_KERNEL=0
```
and no `bssn_escalar_rhs_c.o` in the final link command.
Run the full workflow:
```bash
python3 AMSS_NCKU_Program.py
```
For the 10-step BSSN test, compare coordinate output:
```bash
python3 - <<'PY'
from pathlib import Path
old = Path('../GW150914-06457/AMSS_NCKU_output/bssn_BH.dat')
new = Path('GW150914/AMSS_NCKU_output/bssn_BH.dat')
def rows(path):
out = []
for line in path.read_text().splitlines():
if not line.strip() or line.lstrip().startswith('#'):
continue
out.append([float(x) for x in line.split()])
return out
ro, rn = rows(old), rows(new)
n = min(len(ro), len(rn))
max_abs = 0.0
for i in range(n):
for a, b in zip(ro[i], rn[i]):
max_abs = max(max_abs, abs(a - b))
print(f"old_rows={len(ro)} new_rows={len(rn)} compared_rows={n}")
print(f"max_abs_diff={max_abs:.17g}")
PY
```
For the validated migration, the first 10 rows matched exactly:
```text
max_abs_diff=0
```

View File

@@ -12,6 +12,37 @@ import os
import AMSS_NCKU_Input as input_data ## import program input file
##################################################################
def get_abe_type():
if ( input_data.Equation_Class == "BSSN" ):
return 0
elif ( input_data.Equation_Class == "BSSN-EScalar" ):
return 1
elif ( input_data.Equation_Class == "BSSN-EM" ):
return 3
elif ( input_data.Equation_Class == "Z4C" ):
return 2
else:
raise ValueError("Equation_Class setting error!!!")
##################################################################
## Generate the makefile fragment used by the copied source tree.
## The source-tree macrodef.h is not authoritative because macro files
## are regenerated under File_directory for each run.
def generate_build_config():
file1 = open( os.path.join(input_data.File_directory, "AMSS_NCKU_build.mk"), "w")
print( "# Generated by generate_macrodef.py; do not edit manually.", file=file1 )
print( f"ABE_TYPE := {get_abe_type()}", file=file1 )
file1.close()
##################################################################
## Generate the macro file macrodef.h according to user settings
@@ -58,19 +89,10 @@ def generate_macrodef_h():
# 2: Z4c vacuum
# 3: coupled to Maxwell field
if ( input_data.Equation_Class == "BSSN" ):
print( "#define ABEtype 0", file=file1 )
try:
print( f"#define ABEtype {get_abe_type()}", file=file1 )
print( file=file1 )
elif ( input_data.Equation_Class == "BSSN-EScalar" ):
print( "#define ABEtype 1", file=file1 )
print( file=file1 )
elif ( input_data.Equation_Class == "BSSN-EM" ):
print( "#define ABEtype 3", file=file1 )
print( file=file1 )
elif ( input_data.Equation_Class == "Z4C" ):
print( "#define ABEtype 2", file=file1 )
print( file=file1 )
else:
except ValueError:
print( "Equation_Class setting error!!!" )
print()
print( "# Equation type #define ABEtype setting error!!!", file=file1 )