Add fine-grained step timing and trim BH RHS overhead

This commit is contained in:
2026-04-13 14:50:55 +08:00
parent f3988ac8ca
commit 968522995b
4 changed files with 497 additions and 235 deletions

View File

@@ -47,6 +47,157 @@ using namespace std;
#define BSSN_ENABLE_MEM_USAGE_LOG 0
#endif
#ifndef BSSN_FINE_TIMING
#define BSSN_FINE_TIMING 0
#endif
#ifndef BSSN_FINE_TIMING_EVERY
#define BSSN_FINE_TIMING_EVERY 1
#endif
#ifndef BSSN_FINE_TIMING_TOPN
#define BSSN_FINE_TIMING_TOPN 8
#endif
#if BSSN_FINE_TIMING
namespace step_timing
{
enum Bucket
{
TB_ANALYSIS_PSI4 = 0,
TB_ANALYSIS_SURFACE,
TB_ANALYSIS_IO,
TB_BH_PREDICTOR,
TB_PREDICTOR_RHS,
TB_PREDICTOR_SYNC,
TB_BH_CORRECTOR,
TB_CORRECTOR_RHS,
TB_CORRECTOR_SYNC,
TB_STATE_SWAP,
TB_RESTRICT_PROLONG,
TB_CONSTRAINT_OUT,
TB_DUMP_3D,
TB_DUMP_2D,
TB_CHECKPOINT,
TB_REGRID,
TB_COUNT
};
static double local_bucket_seconds[TB_COUNT];
static const char *bucket_labels[TB_COUNT] =
{
"analysis_psi4",
"analysis_surface",
"analysis_io",
"bh_predictor",
"predictor_rhs",
"predictor_sync",
"bh_corrector",
"corrector_rhs",
"corrector_sync",
"state_swap",
"restrict_prolong",
"constraint_out",
"dump_3d",
"dump_2d",
"checkpoint",
"regrid"
};
void reset()
{
for (int i = 0; i < TB_COUNT; i++)
local_bucket_seconds[i] = 0.0;
}
void add(Bucket bucket, double seconds)
{
local_bucket_seconds[int(bucket)] += seconds;
}
void report(int myrank, int nprocs, monitor *TimingMonitor,
int step_index, double phys_time, double step_wall_seconds)
{
double max_bucket_seconds[TB_COUNT];
double avg_bucket_seconds[TB_COUNT];
MPI_Reduce(local_bucket_seconds, max_bucket_seconds, TB_COUNT, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce(local_bucket_seconds, avg_bucket_seconds, TB_COUNT, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (myrank != 0)
return;
for (int i = 0; i < TB_COUNT; i++)
avg_bucket_seconds[i] /= Mymax(1, nprocs);
if (TimingMonitor)
{
double row[2 + 2 * TB_COUNT];
row[0] = double(step_index);
row[1] = step_wall_seconds;
for (int i = 0; i < TB_COUNT; i++)
{
row[2 + i] = max_bucket_seconds[i];
row[2 + TB_COUNT + i] = avg_bucket_seconds[i];
}
TimingMonitor->writefile(phys_time, 2 + 2 * TB_COUNT, row);
}
double residual = step_wall_seconds;
for (int i = 0; i < TB_COUNT; i++)
residual -= max_bucket_seconds[i];
if (residual < 0.0)
residual = 0.0;
int order[TB_COUNT];
for (int i = 0; i < TB_COUNT; i++)
order[i] = i;
for (int i = 0; i < TB_COUNT - 1; i++)
for (int j = i + 1; j < TB_COUNT; j++)
if (max_bucket_seconds[order[j]] > max_bucket_seconds[order[i]])
{
int tmp = order[i];
order[i] = order[j];
order[j] = tmp;
}
ios::fmtflags old_flags = cout.flags();
streamsize old_precision = cout.precision();
cout << " Fine timing hot spots (max rank wall estimate):" << endl;
const int topn = Mymin(BSSN_FINE_TIMING_TOPN, TB_COUNT);
for (int i = 0; i < topn; i++)
{
const int ib = order[i];
const double frac = (step_wall_seconds > 0.0) ? (100.0 * max_bucket_seconds[ib] / step_wall_seconds) : 0.0;
cout << " "
<< setw(20) << left << bucket_labels[ib]
<< " = " << setw(10) << right << setprecision(6) << max_bucket_seconds[ib]
<< " s (" << setw(6) << setprecision(4) << frac << "%)" << endl;
}
if (residual > 1.0e-6)
{
const double frac = (step_wall_seconds > 0.0) ? (100.0 * residual / step_wall_seconds) : 0.0;
cout << " "
<< setw(20) << left << "unprofiled_residual"
<< " = " << setw(10) << right << setprecision(6) << residual
<< " s (" << setw(6) << setprecision(4) << frac << "%)" << endl;
}
cout << endl;
cout.flags(old_flags);
cout.precision(old_precision);
}
}
#define STEP_TIMER_DECL(var_name) const double var_name = MPI_Wtime()
#define STEP_TIMER_ADD(bucket_name, var_name) step_timing::add(step_timing::bucket_name, MPI_Wtime() - (var_name))
#else
#define STEP_TIMER_DECL(var_name)
#define STEP_TIMER_ADD(bucket_name, var_name)
#endif
//================================================================================================
// define bssn_class
@@ -104,11 +255,29 @@ bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
a_stream << setw(15) << "# time ADMmass ADMPx ADMPy ADMPz ADMSx ADMSy ADMSz";
MAPMonitor = new monitor("bssn_ADMQs.dat", myrank, a_stream.str());
a_stream.clear();
a_stream.str("");
a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz";
ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str());
}
a_stream.clear();
a_stream.str("");
a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz";
ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str());
#if BSSN_FINE_TIMING
a_stream.clear();
a_stream.str("");
a_stream << setw(8) << "# step";
a_stream << setw(14) << "wall";
for (int ib = 0; ib < step_timing::TB_COUNT; ib++)
a_stream << setw(18) << step_timing::bucket_labels[ib];
for (int ib = 0; ib < step_timing::TB_COUNT; ib++)
{
char str_avg[64];
sprintf(str_avg, "avg_%s", step_timing::bucket_labels[ib]);
a_stream << setw(18) << str_avg;
}
TimingMonitor = new monitor("bssn_step_timing.dat", myrank, a_stream.str());
#else
TimingMonitor = 0;
#endif
}
// setup sphere integration engine
Waveshell = new surface_integral(Symmetry);
@@ -1053,10 +1222,11 @@ bssn_class::~bssn_class()
delete ErrorMonitor;
delete Psi4Monitor;
delete BHMonitor;
delete MAPMonitor;
delete ConVMonitor;
delete Waveshell;
delete BHMonitor;
delete MAPMonitor;
delete ConVMonitor;
delete TimingMonitor;
delete Waveshell;
delete CheckPoint;
}
@@ -2151,9 +2321,13 @@ void bssn_class::Evolve(int Steps)
GH->settrfls(trfls);
for (int ncount = 1; ncount < Steps + 1; ncount++)
{
// special for large mass ratio consideration
for (int ncount = 1; ncount < Steps + 1; ncount++)
{
#if BSSN_FINE_TIMING
step_timing::reset();
STEP_TIMER_DECL(step_wall_start);
#endif
// special for large mass ratio consideration
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
// { GH->levels=GH->movls; }
@@ -2177,17 +2351,19 @@ void bssn_class::Evolve(int Steps)
LastCheck += dT_mon;
// When LastDump >= DumpTime, output corresponding binary data
if (LastDump >= DumpTime)
{
// misc::tillherecheck("before Dump_Data");
for (int lev = 0; lev < GH->levels; lev++)
Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
#ifdef WithShell
SH->Dump_Data(DumpList, 0, PhysTime, dT_mon);
#endif
LastDump = 0;
if (LastDump >= DumpTime)
{
STEP_TIMER_DECL(timer_dump3d);
// misc::tillherecheck("before Dump_Data");
for (int lev = 0; lev < GH->levels; lev++)
Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
#ifdef WithShell
SH->Dump_Data(DumpList, 0, PhysTime, dT_mon);
#endif
STEP_TIMER_ADD(TB_DUMP_3D, timer_dump3d);
LastDump = 0;
if (myrank == 0)
{
@@ -2196,14 +2372,16 @@ void bssn_class::Evolve(int Steps)
}
// When Last2dDump >= d2DumpTime, output corresponding 2D data
if (Last2dDump >= d2DumpTime)
{
// misc::tillherecheck("before 2dDump_Data");
for (int lev = 0; lev < GH->levels; lev++)
Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
Last2dDump = 0;
if (Last2dDump >= d2DumpTime)
{
STEP_TIMER_DECL(timer_dump2d);
// misc::tillherecheck("before 2dDump_Data");
for (int lev = 0; lev < GH->levels; lev++)
Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
STEP_TIMER_ADD(TB_DUMP_2D, timer_dump2d);
Last2dDump = 0;
if (myrank == 0)
{
@@ -2225,12 +2403,14 @@ void bssn_class::Evolve(int Steps)
if (PhysTime >= TotalTime)
break;
#if (REGLEV == 1)
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
#if (REGLEV == 1)
STEP_TIMER_DECL(timer_regrid);
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
#endif
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
// GH->Regrid_fake(Symmetry,BH_num,Porgbr,Porg0,
@@ -2302,18 +2482,25 @@ void bssn_class::Evolve(int Steps)
////////////////////////////////////////////////////////////
// When LastCheck >= CheckTime, perform runtime checks and output status data
if (LastCheck >= CheckTime)
{
LastCheck = 0;
CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass);
CheckPoint->writecheck_cgh(PhysTime, GH);
#ifdef WithShell
CheckPoint->writecheck_sh(PhysTime, SH);
#endif
CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas);
}
}
if (LastCheck >= CheckTime)
{
STEP_TIMER_DECL(timer_checkpoint);
LastCheck = 0;
CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass);
CheckPoint->writecheck_cgh(PhysTime, GH);
#ifdef WithShell
CheckPoint->writecheck_sh(PhysTime, SH);
#endif
CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas);
STEP_TIMER_ADD(TB_CHECKPOINT, timer_checkpoint);
}
#if BSSN_FINE_TIMING
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
step_timing::report(myrank, nprocs, TimingMonitor, ncount, PhysTime, MPI_Wtime() - step_wall_start);
#endif
}
/*
#ifdef With_AHF
// final apparent horizon finding
@@ -2444,6 +2631,7 @@ void bssn_class::RecursiveStep(int lev)
#endif
#if (REGLEV == 0)
STEP_TIMER_DECL(timer_regrid_onelevel);
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
@@ -2452,6 +2640,7 @@ void bssn_class::RecursiveStep(int lev)
ConstraintRefreshLevels[lev] = 1;
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
}
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
#endif
}
@@ -3042,11 +3231,12 @@ void bssn_class::Step(int lev, int YN)
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
// new code 2013-2-15, zjcao
#if (MAPBH == 1)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
// new code 2013-2-15, zjcao
#if (MAPBH == 1)
STEP_TIMER_DECL(timer_bh_predictor);
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
@@ -3070,12 +3260,13 @@ void bssn_class::Step(int lev, int YN)
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
// data analysis part
DG_List->clearList();
}
}
}
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
// data analysis part
// Warning NOTE: the variables1 are used as temp storege room
if (lev == a_lev)
{
@@ -3093,11 +3284,12 @@ void bssn_class::Step(int lev, int YN)
double TRK4 = PhysTime;
int iter_count = 0; // count RK4 substeps
int pre = 0, cor = 1;
int ERROR = 0;
MyList<ss_patch> *sPp;
// Predictor
MyList<Patch> *Pp = GH->PatL[lev];
int ERROR = 0;
MyList<ss_patch> *sPp;
STEP_TIMER_DECL(timer_predictor_rhs);
// Predictor
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
@@ -3365,14 +3557,17 @@ void bssn_class::Step(int lev, int YN)
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
}
#endif
Parallel::AsyncSyncState async_pre;
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
}
#endif
STEP_TIMER_ADD(TB_PREDICTOR_RHS, timer_predictor_rhs);
STEP_TIMER_DECL(timer_predictor_sync);
Parallel::AsyncSyncState async_pre;
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#ifdef WithShell
if (lev == 0)
@@ -3393,9 +3588,9 @@ void bssn_class::Step(int lev, int YN)
#endif
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#ifdef WithShell
// Complete non-blocking error reduction and check
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
#ifdef WithShell
// Complete non-blocking error reduction and check
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
@@ -3404,12 +3599,13 @@ void bssn_class::Step(int lev, int YN)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
#if (MAPBH == 0)
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
#if (MAPBH == 0)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
@@ -3449,12 +3645,13 @@ void bssn_class::Step(int lev, int YN)
}
#endif
// corrector
for (iter_count = 1; iter_count < 4; iter_count++)
{
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
if (iter_count == 1 || iter_count == 3)
TRK4 += dT_lev / 2;
// corrector
for (iter_count = 1; iter_count < 4; iter_count++)
{
STEP_TIMER_DECL(timer_corrector_rhs);
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
if (iter_count == 1 || iter_count == 3)
TRK4 += dT_lev / 2;
Pp = GH->PatL[lev];
while (Pp)
{
@@ -3725,14 +3922,17 @@ void bssn_class::Step(int lev, int YN)
}
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req_cor;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
}
#endif
Parallel::AsyncSyncState async_cor;
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
}
#endif
STEP_TIMER_ADD(TB_CORRECTOR_RHS, timer_corrector_rhs);
STEP_TIMER_DECL(timer_corrector_sync);
Parallel::AsyncSyncState async_cor;
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#ifdef WithShell
if (lev == 0)
@@ -3767,14 +3967,16 @@ void bssn_class::Step(int lev, int YN)
<< " variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
#if (MAPBH == 0)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
}
}
#endif
STEP_TIMER_ADD(TB_CORRECTOR_SYNC, timer_corrector_sync);
#if (MAPBH == 0)
STEP_TIMER_DECL(timer_bh_corrector);
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
@@ -3800,16 +4002,18 @@ void bssn_class::Step(int lev, int YN)
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
#endif
// swap time level
if (iter_count < 3)
{
Pp = GH->PatL[lev];
DG_List->clearList();
}
}
}
STEP_TIMER_ADD(TB_BH_CORRECTOR, timer_bh_corrector);
#endif
// swap time level
if (iter_count < 3)
{
STEP_TIMER_DECL(timer_state_swap);
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
@@ -3839,11 +4043,11 @@ void bssn_class::Step(int lev, int YN)
BP = BP->next;
}
sPp = sPp->next;
}
}
#endif
#if (MAPBH == 0)
}
}
#endif
#if (MAPBH == 0)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
@@ -3852,14 +4056,16 @@ void bssn_class::Step(int lev, int YN)
Porg[ithBH][0] = Porg1[ithBH][0];
Porg[ithBH][1] = Porg1[ithBH][1];
Porg[ithBH][2] = Porg1[ithBH][2];
}
}
#endif
}
}
#if (RPS == 0)
// mesh refinement boundary part
RestrictProlong(lev, YN, BB);
}
}
#endif
STEP_TIMER_ADD(TB_STATE_SWAP, timer_state_swap);
}
}
#if (RPS == 0)
STEP_TIMER_DECL(timer_restrict_prolong);
// mesh refinement boundary part
RestrictProlong(lev, YN, BB);
#ifdef WithShell
if (lev == 0)
@@ -3876,17 +4082,19 @@ void bssn_class::Step(int lev, int YN)
<< " seconds! " << endl;
}
}
#endif
#endif
// note the data structure before update
#endif
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
#endif
// note the data structure before update
// SynchList_cor 1 -----------
//
// StateList 0 -----------
//
// OldStateList old -----------
// update
Pp = GH->PatL[lev];
// OldStateList old -----------
// update
STEP_TIMER_DECL(timer_state_commit);
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
@@ -3939,10 +4147,11 @@ void bssn_class::Step(int lev, int YN)
{
Porg0[ithBH][0] = Porg1[ithBH][0];
Porg0[ithBH][1] = Porg1[ithBH][1];
Porg0[ithBH][2] = Porg1[ithBH][2];
}
}
}
Porg0[ithBH][2] = Porg1[ithBH][2];
}
}
STEP_TIMER_ADD(TB_STATE_SWAP, timer_state_commit);
}
//================================================================================================
@@ -4266,12 +4475,14 @@ void bssn_class::Step(int lev, int YN)
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
}
#endif
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
STEP_TIMER_DECL(timer_bh_predictor);
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
@@ -4303,10 +4514,11 @@ void bssn_class::Step(int lev, int YN)
}
// data analysis part
// Warning NOTE: the variables1 are used as temp storege room
if (lev == a_lev)
{
AnalysisStuff(lev, dT_lev);
}
if (lev == a_lev)
{
AnalysisStuff(lev, dT_lev);
}
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
// corrector
for (iter_count = 1; iter_count < 3; iter_count++)
{
@@ -5768,18 +5980,19 @@ void bssn_class::SHStep()
// 0: do not use mixing two levels data for OutBD; 1: do use
#define MIXOUTB 0
void bssn_class::RestrictProlong(int lev, int YN, bool BB,
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
// we assume
// StateList 1 -----------
//
// OldStateList 0 -----------
//
// SynchList_cor old -----------
{
#if (PSTR == 1 || PSTR == 2)
// stringstream a_stream;
// a_stream.setf(ios::left);
void bssn_class::RestrictProlong(int lev, int YN, bool BB,
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
// we assume
// StateList 1 -----------
//
// OldStateList 0 -----------
//
// SynchList_cor old -----------
{
STEP_TIMER_DECL(timer_restrict_prolong);
#if (PSTR == 1 || PSTR == 2)
// stringstream a_stream;
// a_stream.setf(ios::left);
#endif
if (lev > 0)
@@ -5912,14 +6125,15 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": after Sync";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
}
}
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": after Sync";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
}
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
}
//================================================================================================
@@ -5929,16 +6143,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
// auxiliary operation, input lev means original lev-1
void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
// we assume
// StateList 1 -----------
//
// OldStateList 0 -----------
//
// SynchList_cor old -----------
{
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
// we assume
// StateList 1 -----------
//
// OldStateList 0 -----------
//
// SynchList_cor old -----------
{
STEP_TIMER_DECL(timer_restrict_prolong);
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
if (lev >= GH->levels - 1)
return;
@@ -6003,10 +6218,11 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
#endif
}
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
}
}
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
}
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
}
//================================================================================================
@@ -6014,9 +6230,10 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
//================================================================================================
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
{
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
{
STEP_TIMER_DECL(timer_restrict_prolong);
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
// we assume for fine
// SynchList_cor 1 -----------
//
@@ -6092,10 +6309,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
#endif
}
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
}
}
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
}
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
}
//================================================================================================
@@ -6843,60 +7061,52 @@ void bssn_class::compute_Porg_rhs(double **BH_PS,double **BH_RHS,var *forx,var *
// new code considering diferent levels for different black hole
void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev)
{
const int InList = 3;
MyList<var> *DG_List = new MyList<var>(forx);
DG_List->insert(fory);
DG_List->insert(forz);
double *x1, *y1, *z1;
double *shellf;
shellf = new double[3];
double *pox[3];
for (int i = 0; i < 3; i++)
pox[i] = new double[1];
for (int n = 0; n < BH_num; n++)
{
void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev)
{
MyList<var> DG_List_x(forx);
MyList<var> DG_List_y(fory);
MyList<var> DG_List_z(forz);
DG_List_x.next = &DG_List_y;
DG_List_y.next = &DG_List_z;
double shellf[3];
double pox_buf[3][1];
double *pox[3] = {pox_buf[0], pox_buf[1], pox_buf[2]};
for (int n = 0; n < BH_num; n++)
{
pox[0][0] = BH_PS[n][0];
pox[1][0] = BH_PS[n][1];
pox[2][0] = BH_PS[n][2];
int lev = ilev;
#if (PSTR == 0)
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry))
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry, GH->Commlev[lev]))
#endif
{
lev--;
if (lev < 0)
{
ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl;
for (n = 0; n < BH_num; n++)
ErrorMonitor->outfile << "(x,y,z) = ("
<< pox[0][n] << "," << pox[1][n] << "," << pox[2][n]
<< ")" << endl;
break;
}
#if (PSTR == 0)
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], &DG_List_x, 1, pox, shellf, Symmetry))
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], &DG_List_x, 1, pox, shellf, Symmetry, GH->Commlev[lev]))
#endif
{
lev--;
if (lev < 0)
{
ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl;
for (n = 0; n < BH_num; n++)
ErrorMonitor->outfile << "(x,y,z) = ("
<< BH_PS[n][0] << "," << BH_PS[n][1] << "," << BH_PS[n][2]
<< ")" << endl;
break;
}
}
if (lev >= 0)
{
BH_RHS[n][0] = -shellf[0];
BH_RHS[n][1] = -shellf[1];
BH_RHS[n][2] = -shellf[2];
}
}
DG_List->clearList();
delete[] shellf;
for (int i = 0; i < 3; i++)
delete[] pox[i];
}
BH_RHS[n][2] = -shellf[2];
}
}
}
#endif
//================================================================================================