Add fine-grained step timing and trim BH RHS overhead
This commit is contained in:
@@ -47,6 +47,157 @@ using namespace std;
|
||||
#define BSSN_ENABLE_MEM_USAGE_LOG 0
|
||||
#endif
|
||||
|
||||
#ifndef BSSN_FINE_TIMING
|
||||
#define BSSN_FINE_TIMING 0
|
||||
#endif
|
||||
|
||||
#ifndef BSSN_FINE_TIMING_EVERY
|
||||
#define BSSN_FINE_TIMING_EVERY 1
|
||||
#endif
|
||||
|
||||
#ifndef BSSN_FINE_TIMING_TOPN
|
||||
#define BSSN_FINE_TIMING_TOPN 8
|
||||
#endif
|
||||
|
||||
#if BSSN_FINE_TIMING
|
||||
namespace step_timing
|
||||
{
|
||||
enum Bucket
|
||||
{
|
||||
TB_ANALYSIS_PSI4 = 0,
|
||||
TB_ANALYSIS_SURFACE,
|
||||
TB_ANALYSIS_IO,
|
||||
TB_BH_PREDICTOR,
|
||||
TB_PREDICTOR_RHS,
|
||||
TB_PREDICTOR_SYNC,
|
||||
TB_BH_CORRECTOR,
|
||||
TB_CORRECTOR_RHS,
|
||||
TB_CORRECTOR_SYNC,
|
||||
TB_STATE_SWAP,
|
||||
TB_RESTRICT_PROLONG,
|
||||
TB_CONSTRAINT_OUT,
|
||||
TB_DUMP_3D,
|
||||
TB_DUMP_2D,
|
||||
TB_CHECKPOINT,
|
||||
TB_REGRID,
|
||||
TB_COUNT
|
||||
};
|
||||
|
||||
static double local_bucket_seconds[TB_COUNT];
|
||||
|
||||
static const char *bucket_labels[TB_COUNT] =
|
||||
{
|
||||
"analysis_psi4",
|
||||
"analysis_surface",
|
||||
"analysis_io",
|
||||
"bh_predictor",
|
||||
"predictor_rhs",
|
||||
"predictor_sync",
|
||||
"bh_corrector",
|
||||
"corrector_rhs",
|
||||
"corrector_sync",
|
||||
"state_swap",
|
||||
"restrict_prolong",
|
||||
"constraint_out",
|
||||
"dump_3d",
|
||||
"dump_2d",
|
||||
"checkpoint",
|
||||
"regrid"
|
||||
};
|
||||
|
||||
void reset()
|
||||
{
|
||||
for (int i = 0; i < TB_COUNT; i++)
|
||||
local_bucket_seconds[i] = 0.0;
|
||||
}
|
||||
|
||||
void add(Bucket bucket, double seconds)
|
||||
{
|
||||
local_bucket_seconds[int(bucket)] += seconds;
|
||||
}
|
||||
|
||||
void report(int myrank, int nprocs, monitor *TimingMonitor,
|
||||
int step_index, double phys_time, double step_wall_seconds)
|
||||
{
|
||||
double max_bucket_seconds[TB_COUNT];
|
||||
double avg_bucket_seconds[TB_COUNT];
|
||||
|
||||
MPI_Reduce(local_bucket_seconds, max_bucket_seconds, TB_COUNT, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
|
||||
MPI_Reduce(local_bucket_seconds, avg_bucket_seconds, TB_COUNT, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
|
||||
|
||||
if (myrank != 0)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < TB_COUNT; i++)
|
||||
avg_bucket_seconds[i] /= Mymax(1, nprocs);
|
||||
|
||||
if (TimingMonitor)
|
||||
{
|
||||
double row[2 + 2 * TB_COUNT];
|
||||
row[0] = double(step_index);
|
||||
row[1] = step_wall_seconds;
|
||||
for (int i = 0; i < TB_COUNT; i++)
|
||||
{
|
||||
row[2 + i] = max_bucket_seconds[i];
|
||||
row[2 + TB_COUNT + i] = avg_bucket_seconds[i];
|
||||
}
|
||||
TimingMonitor->writefile(phys_time, 2 + 2 * TB_COUNT, row);
|
||||
}
|
||||
|
||||
double residual = step_wall_seconds;
|
||||
for (int i = 0; i < TB_COUNT; i++)
|
||||
residual -= max_bucket_seconds[i];
|
||||
if (residual < 0.0)
|
||||
residual = 0.0;
|
||||
|
||||
int order[TB_COUNT];
|
||||
for (int i = 0; i < TB_COUNT; i++)
|
||||
order[i] = i;
|
||||
|
||||
for (int i = 0; i < TB_COUNT - 1; i++)
|
||||
for (int j = i + 1; j < TB_COUNT; j++)
|
||||
if (max_bucket_seconds[order[j]] > max_bucket_seconds[order[i]])
|
||||
{
|
||||
int tmp = order[i];
|
||||
order[i] = order[j];
|
||||
order[j] = tmp;
|
||||
}
|
||||
|
||||
ios::fmtflags old_flags = cout.flags();
|
||||
streamsize old_precision = cout.precision();
|
||||
|
||||
cout << " Fine timing hot spots (max rank wall estimate):" << endl;
|
||||
const int topn = Mymin(BSSN_FINE_TIMING_TOPN, TB_COUNT);
|
||||
for (int i = 0; i < topn; i++)
|
||||
{
|
||||
const int ib = order[i];
|
||||
const double frac = (step_wall_seconds > 0.0) ? (100.0 * max_bucket_seconds[ib] / step_wall_seconds) : 0.0;
|
||||
cout << " "
|
||||
<< setw(20) << left << bucket_labels[ib]
|
||||
<< " = " << setw(10) << right << setprecision(6) << max_bucket_seconds[ib]
|
||||
<< " s (" << setw(6) << setprecision(4) << frac << "%)" << endl;
|
||||
}
|
||||
if (residual > 1.0e-6)
|
||||
{
|
||||
const double frac = (step_wall_seconds > 0.0) ? (100.0 * residual / step_wall_seconds) : 0.0;
|
||||
cout << " "
|
||||
<< setw(20) << left << "unprofiled_residual"
|
||||
<< " = " << setw(10) << right << setprecision(6) << residual
|
||||
<< " s (" << setw(6) << setprecision(4) << frac << "%)" << endl;
|
||||
}
|
||||
cout << endl;
|
||||
cout.flags(old_flags);
|
||||
cout.precision(old_precision);
|
||||
}
|
||||
}
|
||||
|
||||
#define STEP_TIMER_DECL(var_name) const double var_name = MPI_Wtime()
|
||||
#define STEP_TIMER_ADD(bucket_name, var_name) step_timing::add(step_timing::bucket_name, MPI_Wtime() - (var_name))
|
||||
#else
|
||||
#define STEP_TIMER_DECL(var_name)
|
||||
#define STEP_TIMER_ADD(bucket_name, var_name)
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
// define bssn_class
|
||||
@@ -104,11 +255,29 @@ bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
|
||||
a_stream << setw(15) << "# time ADMmass ADMPx ADMPy ADMPz ADMSx ADMSy ADMSz";
|
||||
MAPMonitor = new monitor("bssn_ADMQs.dat", myrank, a_stream.str());
|
||||
|
||||
a_stream.clear();
|
||||
a_stream.str("");
|
||||
a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz";
|
||||
ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str());
|
||||
}
|
||||
a_stream.clear();
|
||||
a_stream.str("");
|
||||
a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz";
|
||||
ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str());
|
||||
|
||||
#if BSSN_FINE_TIMING
|
||||
a_stream.clear();
|
||||
a_stream.str("");
|
||||
a_stream << setw(8) << "# step";
|
||||
a_stream << setw(14) << "wall";
|
||||
for (int ib = 0; ib < step_timing::TB_COUNT; ib++)
|
||||
a_stream << setw(18) << step_timing::bucket_labels[ib];
|
||||
for (int ib = 0; ib < step_timing::TB_COUNT; ib++)
|
||||
{
|
||||
char str_avg[64];
|
||||
sprintf(str_avg, "avg_%s", step_timing::bucket_labels[ib]);
|
||||
a_stream << setw(18) << str_avg;
|
||||
}
|
||||
TimingMonitor = new monitor("bssn_step_timing.dat", myrank, a_stream.str());
|
||||
#else
|
||||
TimingMonitor = 0;
|
||||
#endif
|
||||
}
|
||||
// setup sphere integration engine
|
||||
Waveshell = new surface_integral(Symmetry);
|
||||
|
||||
@@ -1053,10 +1222,11 @@ bssn_class::~bssn_class()
|
||||
|
||||
delete ErrorMonitor;
|
||||
delete Psi4Monitor;
|
||||
delete BHMonitor;
|
||||
delete MAPMonitor;
|
||||
delete ConVMonitor;
|
||||
delete Waveshell;
|
||||
delete BHMonitor;
|
||||
delete MAPMonitor;
|
||||
delete ConVMonitor;
|
||||
delete TimingMonitor;
|
||||
delete Waveshell;
|
||||
|
||||
delete CheckPoint;
|
||||
}
|
||||
@@ -2151,9 +2321,13 @@ void bssn_class::Evolve(int Steps)
|
||||
|
||||
GH->settrfls(trfls);
|
||||
|
||||
for (int ncount = 1; ncount < Steps + 1; ncount++)
|
||||
{
|
||||
// special for large mass ratio consideration
|
||||
for (int ncount = 1; ncount < Steps + 1; ncount++)
|
||||
{
|
||||
#if BSSN_FINE_TIMING
|
||||
step_timing::reset();
|
||||
STEP_TIMER_DECL(step_wall_start);
|
||||
#endif
|
||||
// special for large mass ratio consideration
|
||||
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
|
||||
// { GH->levels=GH->movls; }
|
||||
|
||||
@@ -2177,17 +2351,19 @@ void bssn_class::Evolve(int Steps)
|
||||
LastCheck += dT_mon;
|
||||
|
||||
// When LastDump >= DumpTime, output corresponding binary data
|
||||
if (LastDump >= DumpTime)
|
||||
{
|
||||
// misc::tillherecheck("before Dump_Data");
|
||||
|
||||
for (int lev = 0; lev < GH->levels; lev++)
|
||||
Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
|
||||
#ifdef WithShell
|
||||
SH->Dump_Data(DumpList, 0, PhysTime, dT_mon);
|
||||
#endif
|
||||
|
||||
LastDump = 0;
|
||||
if (LastDump >= DumpTime)
|
||||
{
|
||||
STEP_TIMER_DECL(timer_dump3d);
|
||||
// misc::tillherecheck("before Dump_Data");
|
||||
|
||||
for (int lev = 0; lev < GH->levels; lev++)
|
||||
Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
|
||||
#ifdef WithShell
|
||||
SH->Dump_Data(DumpList, 0, PhysTime, dT_mon);
|
||||
#endif
|
||||
STEP_TIMER_ADD(TB_DUMP_3D, timer_dump3d);
|
||||
|
||||
LastDump = 0;
|
||||
|
||||
if (myrank == 0)
|
||||
{
|
||||
@@ -2196,14 +2372,16 @@ void bssn_class::Evolve(int Steps)
|
||||
}
|
||||
|
||||
// When Last2dDump >= d2DumpTime, output corresponding 2D data
|
||||
if (Last2dDump >= d2DumpTime)
|
||||
{
|
||||
// misc::tillherecheck("before 2dDump_Data");
|
||||
|
||||
for (int lev = 0; lev < GH->levels; lev++)
|
||||
Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
|
||||
|
||||
Last2dDump = 0;
|
||||
if (Last2dDump >= d2DumpTime)
|
||||
{
|
||||
STEP_TIMER_DECL(timer_dump2d);
|
||||
// misc::tillherecheck("before 2dDump_Data");
|
||||
|
||||
for (int lev = 0; lev < GH->levels; lev++)
|
||||
Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
|
||||
STEP_TIMER_ADD(TB_DUMP_2D, timer_dump2d);
|
||||
|
||||
Last2dDump = 0;
|
||||
|
||||
if (myrank == 0)
|
||||
{
|
||||
@@ -2225,12 +2403,14 @@ void bssn_class::Evolve(int Steps)
|
||||
if (PhysTime >= TotalTime)
|
||||
break;
|
||||
|
||||
#if (REGLEV == 1)
|
||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
#endif
|
||||
#if (REGLEV == 1)
|
||||
STEP_TIMER_DECL(timer_regrid);
|
||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
|
||||
#endif
|
||||
|
||||
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
|
||||
// GH->Regrid_fake(Symmetry,BH_num,Porgbr,Porg0,
|
||||
@@ -2302,18 +2482,25 @@ void bssn_class::Evolve(int Steps)
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
// When LastCheck >= CheckTime, perform runtime checks and output status data
|
||||
if (LastCheck >= CheckTime)
|
||||
{
|
||||
LastCheck = 0;
|
||||
|
||||
CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass);
|
||||
CheckPoint->writecheck_cgh(PhysTime, GH);
|
||||
#ifdef WithShell
|
||||
CheckPoint->writecheck_sh(PhysTime, SH);
|
||||
#endif
|
||||
CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas);
|
||||
}
|
||||
}
|
||||
if (LastCheck >= CheckTime)
|
||||
{
|
||||
STEP_TIMER_DECL(timer_checkpoint);
|
||||
LastCheck = 0;
|
||||
|
||||
CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass);
|
||||
CheckPoint->writecheck_cgh(PhysTime, GH);
|
||||
#ifdef WithShell
|
||||
CheckPoint->writecheck_sh(PhysTime, SH);
|
||||
#endif
|
||||
CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas);
|
||||
STEP_TIMER_ADD(TB_CHECKPOINT, timer_checkpoint);
|
||||
}
|
||||
|
||||
#if BSSN_FINE_TIMING
|
||||
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
|
||||
step_timing::report(myrank, nprocs, TimingMonitor, ncount, PhysTime, MPI_Wtime() - step_wall_start);
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
#ifdef With_AHF
|
||||
// final apparent horizon finding
|
||||
@@ -2444,6 +2631,7 @@ void bssn_class::RecursiveStep(int lev)
|
||||
#endif
|
||||
|
||||
#if (REGLEV == 0)
|
||||
STEP_TIMER_DECL(timer_regrid_onelevel);
|
||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||
@@ -2452,6 +2640,7 @@ void bssn_class::RecursiveStep(int lev)
|
||||
ConstraintRefreshLevels[lev] = 1;
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
}
|
||||
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -3042,11 +3231,12 @@ void bssn_class::Step(int lev, int YN)
|
||||
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
|
||||
// new code 2013-2-15, zjcao
|
||||
#if (MAPBH == 1)
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
// new code 2013-2-15, zjcao
|
||||
#if (MAPBH == 1)
|
||||
STEP_TIMER_DECL(timer_bh_predictor);
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
@@ -3070,12 +3260,13 @@ void bssn_class::Step(int lev, int YN)
|
||||
DG_List->insert(Sfy0);
|
||||
DG_List->insert(Sfz0);
|
||||
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
|
||||
DG_List->clearList();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// data analysis part
|
||||
DG_List->clearList();
|
||||
}
|
||||
}
|
||||
}
|
||||
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
|
||||
|
||||
// data analysis part
|
||||
// Warning NOTE: the variables1 are used as temp storege room
|
||||
if (lev == a_lev)
|
||||
{
|
||||
@@ -3093,11 +3284,12 @@ void bssn_class::Step(int lev, int YN)
|
||||
double TRK4 = PhysTime;
|
||||
int iter_count = 0; // count RK4 substeps
|
||||
int pre = 0, cor = 1;
|
||||
int ERROR = 0;
|
||||
|
||||
MyList<ss_patch> *sPp;
|
||||
// Predictor
|
||||
MyList<Patch> *Pp = GH->PatL[lev];
|
||||
int ERROR = 0;
|
||||
|
||||
MyList<ss_patch> *sPp;
|
||||
STEP_TIMER_DECL(timer_predictor_rhs);
|
||||
// Predictor
|
||||
MyList<Patch> *Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
@@ -3365,14 +3557,17 @@ void bssn_class::Step(int lev, int YN)
|
||||
|
||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
||||
MPI_Request err_req;
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
|
||||
}
|
||||
#endif
|
||||
|
||||
Parallel::AsyncSyncState async_pre;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
|
||||
}
|
||||
#endif
|
||||
|
||||
STEP_TIMER_ADD(TB_PREDICTOR_RHS, timer_predictor_rhs);
|
||||
|
||||
STEP_TIMER_DECL(timer_predictor_sync);
|
||||
Parallel::AsyncSyncState async_pre;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -3393,9 +3588,9 @@ void bssn_class::Step(int lev, int YN)
|
||||
#endif
|
||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
||||
|
||||
#ifdef WithShell
|
||||
// Complete non-blocking error reduction and check
|
||||
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
||||
#ifdef WithShell
|
||||
// Complete non-blocking error reduction and check
|
||||
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||
@@ -3404,12 +3599,13 @@ void bssn_class::Step(int lev, int YN)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (MAPBH == 0)
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
|
||||
|
||||
#if (MAPBH == 0)
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
@@ -3449,12 +3645,13 @@ void bssn_class::Step(int lev, int YN)
|
||||
}
|
||||
#endif
|
||||
|
||||
// corrector
|
||||
for (iter_count = 1; iter_count < 4; iter_count++)
|
||||
{
|
||||
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
|
||||
if (iter_count == 1 || iter_count == 3)
|
||||
TRK4 += dT_lev / 2;
|
||||
// corrector
|
||||
for (iter_count = 1; iter_count < 4; iter_count++)
|
||||
{
|
||||
STEP_TIMER_DECL(timer_corrector_rhs);
|
||||
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
|
||||
if (iter_count == 1 || iter_count == 3)
|
||||
TRK4 += dT_lev / 2;
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
@@ -3725,14 +3922,17 @@ void bssn_class::Step(int lev, int YN)
|
||||
}
|
||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
||||
MPI_Request err_req_cor;
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
|
||||
}
|
||||
#endif
|
||||
|
||||
Parallel::AsyncSyncState async_cor;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
|
||||
}
|
||||
#endif
|
||||
|
||||
STEP_TIMER_ADD(TB_CORRECTOR_RHS, timer_corrector_rhs);
|
||||
|
||||
STEP_TIMER_DECL(timer_corrector_sync);
|
||||
Parallel::AsyncSyncState async_cor;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -3767,14 +3967,16 @@ void bssn_class::Step(int lev, int YN)
|
||||
<< " variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (MAPBH == 0)
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
}
|
||||
}
|
||||
#endif
|
||||
STEP_TIMER_ADD(TB_CORRECTOR_SYNC, timer_corrector_sync);
|
||||
|
||||
#if (MAPBH == 0)
|
||||
STEP_TIMER_DECL(timer_bh_corrector);
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev);
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
@@ -3800,16 +4002,18 @@ void bssn_class::Step(int lev, int YN)
|
||||
DG_List->insert(Sfy0);
|
||||
DG_List->insert(Sfz0);
|
||||
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
|
||||
DG_List->clearList();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// swap time level
|
||||
if (iter_count < 3)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
DG_List->clearList();
|
||||
}
|
||||
}
|
||||
}
|
||||
STEP_TIMER_ADD(TB_BH_CORRECTOR, timer_bh_corrector);
|
||||
#endif
|
||||
|
||||
// swap time level
|
||||
if (iter_count < 3)
|
||||
{
|
||||
STEP_TIMER_DECL(timer_state_swap);
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
@@ -3839,11 +4043,11 @@ void bssn_class::Step(int lev, int YN)
|
||||
BP = BP->next;
|
||||
}
|
||||
sPp = sPp->next;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (MAPBH == 0)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (MAPBH == 0)
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
@@ -3852,14 +4056,16 @@ void bssn_class::Step(int lev, int YN)
|
||||
Porg[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if (RPS == 0)
|
||||
// mesh refinement boundary part
|
||||
RestrictProlong(lev, YN, BB);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
STEP_TIMER_ADD(TB_STATE_SWAP, timer_state_swap);
|
||||
}
|
||||
}
|
||||
#if (RPS == 0)
|
||||
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||
// mesh refinement boundary part
|
||||
RestrictProlong(lev, YN, BB);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -3876,17 +4082,19 @@ void bssn_class::Step(int lev, int YN)
|
||||
<< " seconds! " << endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
// note the data structure before update
|
||||
#endif
|
||||
|
||||
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||
#endif
|
||||
// note the data structure before update
|
||||
// SynchList_cor 1 -----------
|
||||
//
|
||||
// StateList 0 -----------
|
||||
//
|
||||
// OldStateList old -----------
|
||||
// update
|
||||
Pp = GH->PatL[lev];
|
||||
// OldStateList old -----------
|
||||
// update
|
||||
STEP_TIMER_DECL(timer_state_commit);
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
@@ -3939,10 +4147,11 @@ void bssn_class::Step(int lev, int YN)
|
||||
{
|
||||
Porg0[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg0[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
}
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
STEP_TIMER_ADD(TB_STATE_SWAP, timer_state_commit);
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -4266,12 +4475,14 @@ void bssn_class::Step(int lev, int YN)
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
|
||||
|
||||
STEP_TIMER_DECL(timer_bh_predictor);
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
@@ -4303,10 +4514,11 @@ void bssn_class::Step(int lev, int YN)
|
||||
}
|
||||
// data analysis part
|
||||
// Warning NOTE: the variables1 are used as temp storege room
|
||||
if (lev == a_lev)
|
||||
{
|
||||
AnalysisStuff(lev, dT_lev);
|
||||
}
|
||||
if (lev == a_lev)
|
||||
{
|
||||
AnalysisStuff(lev, dT_lev);
|
||||
}
|
||||
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
|
||||
// corrector
|
||||
for (iter_count = 1; iter_count < 3; iter_count++)
|
||||
{
|
||||
@@ -5768,18 +5980,19 @@ void bssn_class::SHStep()
|
||||
// 0: do not use mixing two levels data for OutBD; 1: do use
|
||||
|
||||
#define MIXOUTB 0
|
||||
void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
|
||||
// we assume
|
||||
// StateList 1 -----------
|
||||
//
|
||||
// OldStateList 0 -----------
|
||||
//
|
||||
// SynchList_cor old -----------
|
||||
{
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// stringstream a_stream;
|
||||
// a_stream.setf(ios::left);
|
||||
void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
|
||||
// we assume
|
||||
// StateList 1 -----------
|
||||
//
|
||||
// OldStateList 0 -----------
|
||||
//
|
||||
// SynchList_cor old -----------
|
||||
{
|
||||
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// stringstream a_stream;
|
||||
// a_stream.setf(ios::left);
|
||||
#endif
|
||||
|
||||
if (lev > 0)
|
||||
@@ -5912,14 +6125,15 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
|
||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
||||
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
// a_stream<<GH->mylev<<": after Sync";
|
||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
// a_stream<<GH->mylev<<": after Sync";
|
||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||
#endif
|
||||
}
|
||||
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -5929,16 +6143,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
|
||||
// auxiliary operation, input lev means original lev-1
|
||||
|
||||
void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
|
||||
// we assume
|
||||
// StateList 1 -----------
|
||||
//
|
||||
// OldStateList 0 -----------
|
||||
//
|
||||
// SynchList_cor old -----------
|
||||
{
|
||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
|
||||
void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
|
||||
// we assume
|
||||
// StateList 1 -----------
|
||||
//
|
||||
// OldStateList 0 -----------
|
||||
//
|
||||
// SynchList_cor old -----------
|
||||
{
|
||||
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
|
||||
|
||||
if (lev >= GH->levels - 1)
|
||||
return;
|
||||
@@ -6003,10 +6218,11 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||
#endif
|
||||
}
|
||||
|
||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
||||
}
|
||||
}
|
||||
|
||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
||||
}
|
||||
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -6014,9 +6230,10 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
|
||||
//================================================================================================
|
||||
|
||||
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
{
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
{
|
||||
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
// we assume for fine
|
||||
// SynchList_cor 1 -----------
|
||||
//
|
||||
@@ -6092,10 +6309,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||
#endif
|
||||
}
|
||||
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
||||
}
|
||||
}
|
||||
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
||||
}
|
||||
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -6843,60 +7061,52 @@ void bssn_class::compute_Porg_rhs(double **BH_PS,double **BH_RHS,var *forx,var *
|
||||
|
||||
// new code considering diferent levels for different black hole
|
||||
|
||||
void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev)
|
||||
{
|
||||
const int InList = 3;
|
||||
|
||||
MyList<var> *DG_List = new MyList<var>(forx);
|
||||
DG_List->insert(fory);
|
||||
DG_List->insert(forz);
|
||||
|
||||
double *x1, *y1, *z1;
|
||||
double *shellf;
|
||||
shellf = new double[3];
|
||||
double *pox[3];
|
||||
for (int i = 0; i < 3; i++)
|
||||
pox[i] = new double[1];
|
||||
|
||||
for (int n = 0; n < BH_num; n++)
|
||||
{
|
||||
void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev)
|
||||
{
|
||||
MyList<var> DG_List_x(forx);
|
||||
MyList<var> DG_List_y(fory);
|
||||
MyList<var> DG_List_z(forz);
|
||||
DG_List_x.next = &DG_List_y;
|
||||
DG_List_y.next = &DG_List_z;
|
||||
|
||||
double shellf[3];
|
||||
double pox_buf[3][1];
|
||||
double *pox[3] = {pox_buf[0], pox_buf[1], pox_buf[2]};
|
||||
|
||||
for (int n = 0; n < BH_num; n++)
|
||||
{
|
||||
pox[0][0] = BH_PS[n][0];
|
||||
pox[1][0] = BH_PS[n][1];
|
||||
pox[2][0] = BH_PS[n][2];
|
||||
|
||||
int lev = ilev;
|
||||
|
||||
#if (PSTR == 0)
|
||||
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry))
|
||||
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry, GH->Commlev[lev]))
|
||||
#endif
|
||||
{
|
||||
lev--;
|
||||
if (lev < 0)
|
||||
{
|
||||
ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl;
|
||||
for (n = 0; n < BH_num; n++)
|
||||
ErrorMonitor->outfile << "(x,y,z) = ("
|
||||
<< pox[0][n] << "," << pox[1][n] << "," << pox[2][n]
|
||||
<< ")" << endl;
|
||||
break;
|
||||
}
|
||||
|
||||
#if (PSTR == 0)
|
||||
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], &DG_List_x, 1, pox, shellf, Symmetry))
|
||||
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], &DG_List_x, 1, pox, shellf, Symmetry, GH->Commlev[lev]))
|
||||
#endif
|
||||
{
|
||||
lev--;
|
||||
if (lev < 0)
|
||||
{
|
||||
ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl;
|
||||
for (n = 0; n < BH_num; n++)
|
||||
ErrorMonitor->outfile << "(x,y,z) = ("
|
||||
<< BH_PS[n][0] << "," << BH_PS[n][1] << "," << BH_PS[n][2]
|
||||
<< ")" << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (lev >= 0)
|
||||
{
|
||||
BH_RHS[n][0] = -shellf[0];
|
||||
BH_RHS[n][1] = -shellf[1];
|
||||
BH_RHS[n][2] = -shellf[2];
|
||||
}
|
||||
}
|
||||
|
||||
DG_List->clearList();
|
||||
delete[] shellf;
|
||||
for (int i = 0; i < 3; i++)
|
||||
delete[] pox[i];
|
||||
}
|
||||
BH_RHS[n][2] = -shellf[2];
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -134,9 +134,9 @@ public:
|
||||
Parallel::SyncCache *sync_cache_restrict; // cached Restrict in RestrictProlong
|
||||
Parallel::SyncCache *sync_cache_outbd; // cached OutBdLow2Hi in RestrictProlong
|
||||
|
||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||
monitor *ConVMonitor;
|
||||
surface_integral *Waveshell;
|
||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||
monitor *ConVMonitor, *TimingMonitor;
|
||||
surface_integral *Waveshell;
|
||||
checkpoint *CheckPoint;
|
||||
|
||||
public:
|
||||
|
||||
@@ -29,6 +29,12 @@
|
||||
|
||||
#define REGLEV 0
|
||||
|
||||
#define BSSN_FINE_TIMING 1
|
||||
|
||||
#define BSSN_FINE_TIMING_EVERY 1
|
||||
|
||||
#define BSSN_FINE_TIMING_TOPN 8
|
||||
|
||||
//#define USE_GPU
|
||||
|
||||
//#define CHECKDETAIL
|
||||
@@ -88,6 +94,15 @@
|
||||
// 0: for every level;
|
||||
// 1: for all
|
||||
//
|
||||
// define BSSN_FINE_TIMING
|
||||
// enable fine-grained per-timestep timing monitor
|
||||
//
|
||||
// define BSSN_FINE_TIMING_EVERY
|
||||
// report timing every N coarse timesteps
|
||||
//
|
||||
// define BSSN_FINE_TIMING_TOPN
|
||||
// number of hottest timing buckets shown in stdout
|
||||
//
|
||||
// define USE_GPU
|
||||
// use gpu or not
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user