Add diagnostic timing to Shell-Patch initialization

Print MPI_Wtime breakdown of Initialize() shell setup steps and
Read_Ansorg::Compute_Constraint duration. Reveals that
ShellPatch::setupintintstuff() takes ~511s of the ~590s startup.

The function builds interpolation tables by searching every shell
grid point against all Cartesian patches — thread-safe OpenMP
parallelization is blocked by shared linked-list mutations in
prolongpointstru(), which would need a search/append split first.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-09 21:51:07 +08:00
parent fbb2ed112d
commit d0d3f965a6

View File

@@ -1751,13 +1751,25 @@ void bssn_class::Initialize()
else
GH->compose_cgh(nprocs);
#ifdef WithShell
if (myrank == 0) cout << " [Init] ShellPatch new... " << flush;
double _t0 = MPI_Wtime();
SH = new ShellPatch(0, ngfs, pname, Symmetry, myrank, ErrorMonitor);
SH->matchcheck(GH->PatL[0]);
if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl;
if (myrank == 0) cout << " [Init] compose_sh... " << flush; _t0 = MPI_Wtime();
SH->compose_sh(nprocs);
// SH->compose_shr(nprocs); //sh is faster than shr
if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl;
if (myrank == 0) cout << " [Init] setupcordtrans... " << flush; _t0 = MPI_Wtime();
SH->setupcordtrans();
if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl;
SH->Dump_xyz(0, 0, 1);
if (myrank == 0) cout << " [Init] setupintintstuff... " << flush; _t0 = MPI_Wtime();
SH->setupintintstuff(nprocs, GH->PatL[0], Symmetry);
if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl;
if (checkrun)
CheckPoint->readcheck_sh(SH, myrank);
@@ -3006,7 +3018,7 @@ void bssn_class::Read_Ansorg()
cg->fgfs[Pp->data->fngfs + ShellPatch::gy][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]],
cg->fgfs[Pp->data->fngfs + ShellPatch::gz][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]]);
f_get_ansorg_nbhs_ss(cg->shape,
f_get_ansorg_nbhs_ss(cg->shape,
cg->fgfs[Pp->data->fngfs + ShellPatch::gx],
cg->fgfs[Pp->data->fngfs + ShellPatch::gy],
cg->fgfs[Pp->data->fngfs + ShellPatch::gz],
@@ -3060,7 +3072,9 @@ void bssn_class::Read_Ansorg()
delete[] Pmom_here;
delete[] Spin_here;
if (myrank == 0) cout << " [Read_Ansorg] before Compute_Constraint: " << MPI_Wtime() << " s" << endl;
Compute_Constraint();
if (myrank == 0) cout << " [Read_Ansorg] after Compute_Constraint: " << MPI_Wtime() << " s" << endl;
// dump read_in initial data
for (int lev = 0; lev < GH->levels; lev++)
Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT);