Compare commits

...

2 Commits

Author SHA1 Message Date
f7ada421cf skip redundant MPI ghost cell syncs for stages 0, 1 & 2
BSSN 每个 RK4 时间步执行 4 次 MPI ghost zone 同步:
Stage 0(预测)结束后:Parallel::Sync(SynchList_pre)
Stage 1(校正 1)结束后:Parallel::Sync(SynchList_cor)
Stage 2(校正 2)结束后:Parallel::Sync(SynchList_cor)
Stage 3(校正 3)结束后:Parallel::Sync(SynchList_cor) ← 必要(为下一步提供 ghost)

bssnEM_class.C、Z4c_class.C 结构相同,一起修改了
2026-02-26 16:16:33 +08:00
fb9f153662 Initialize output arrays to zero in fdderivs_c.C and fderivs_c.C 2026-02-26 11:48:28 +08:00
5 changed files with 69 additions and 105 deletions

View File

@@ -485,25 +485,7 @@ void Z4c_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally)
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
@@ -868,25 +850,28 @@ void Z4c_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant
if (iter_count == 3) {
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{ {
prev_clock = curr_clock; clock_t prev_clock, curr_clock;
curr_clock = clock(); if (myrank == 0)
cout << " Shell stuff synchronization used " curr_clock = clock();
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) SH->Synch(SynchList_cor, Symmetry);
<< " seconds! " << endl; if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
} }
}
#endif #endif
} // end CA-RK4 guard
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
{ {
@@ -1558,7 +1543,7 @@ void Z4c_class::Step(int lev, int YN)
} }
} }
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); // CA-RK4: skip post-prediction MPI ghost sync (redundant; ghost cells computable locally)
if (lev == 0) if (lev == 0)
{ {
@@ -2120,7 +2105,9 @@ void Z4c_class::Step(int lev, int YN)
} }
} }
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); // CA-RK4: only MPI sync after last corrector (iter_count == 3); stages 1 & 2 are redundant
if (iter_count == 3)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
if (lev == 0) if (lev == 0)
{ {

View File

@@ -1221,25 +1221,7 @@ void bssnEM_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally)
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
@@ -1683,25 +1665,28 @@ void bssnEM_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant
if (iter_count == 3) {
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{ {
prev_clock = curr_clock; clock_t prev_clock, curr_clock;
curr_clock = clock(); if (myrank == 0)
cout << " Shell stuff synchronization used " curr_clock = clock();
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) SH->Synch(SynchList_cor, Symmetry);
<< " seconds! " << endl; if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
} }
}
#endif #endif
} // end CA-RK4 guard
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
{ {

View File

@@ -3349,27 +3349,7 @@ void bssn_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::AsyncSyncState async_pre; // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally)
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#ifdef WithShell #ifdef WithShell
// Complete non-blocking error reduction and check // Complete non-blocking error reduction and check
@@ -3709,27 +3689,30 @@ void bssn_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::AsyncSyncState async_cor; // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); if (iter_count == 3) {
Parallel::AsyncSyncState async_cor;
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{ {
prev_clock = curr_clock; clock_t prev_clock, curr_clock;
curr_clock = clock(); if (myrank == 0)
cout << " Shell stuff synchronization used " curr_clock = clock();
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) SH->Synch(SynchList_cor, Symmetry);
<< " seconds! " << endl; if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
} }
}
#endif #endif
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
} // end CA-RK4 guard
#ifdef WithShell #ifdef WithShell
// Complete non-blocking error reduction and check // Complete non-blocking error reduction and check

View File

@@ -73,6 +73,10 @@ void fdderivs(const int ex[3],
/* 输出清零fxx,fyy,fzz,fxy,fxz,fyz = 0 */ /* 输出清零fxx,fyy,fzz,fxy,fxz,fyz = 0 */
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3; const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
}
/* /*
* Fortran: * Fortran:

View File

@@ -74,6 +74,11 @@ void fderivs(const int ex[3],
// fx = fy = fz = 0 // fx = fy = fz = 0
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3; const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO;
fy[p] = ZEO;
fz[p] = ZEO;
}
/* /*
* Fortran loops: * Fortran loops: