Skip zero EM resident downloads

This commit is contained in:
2026-05-07 13:04:46 +08:00
parent cb911dec06
commit 83afaf19ce
2 changed files with 81 additions and 45 deletions

View File

@@ -87,7 +87,18 @@ bool bssn_em_zero_analysis_fastpath_enabled()
return enabled != 0;
}
bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
bool bssn_em_zero_resident_download_fastpath_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH");
enabled = (!env || atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool bssn_em_resident_zero_fastpath_ready(MyList<Patch> *PatL,
#ifdef WithShell
ShellPatch *shell,
#else
@@ -95,8 +106,6 @@ bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
#endif
int rank)
{
if (!bssn_em_zero_analysis_fastpath_enabled())
return false;
int local_ok = 1;
int local_seen = 0;
MyList<Patch> *Pp = PatL;
@@ -149,6 +158,19 @@ bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
return global_seen && global_ok;
}
bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
#ifdef WithShell
ShellPatch *shell,
#else
ShellPatch *shell,
#endif
int rank)
{
if (!bssn_em_zero_analysis_fastpath_enabled())
return false;
return bssn_em_resident_zero_fastpath_ready(PatL, shell, rank);
}
void zero_em_analysis_outputs(MyList<Patch> *PatL,
#ifdef WithShell
ShellPatch *shell,
@@ -1660,29 +1682,29 @@ void bssnEM_class::Step(int lev, int YN)
}
#endif
if (em_step_timing)
em_t0 = MPI_Wtime();
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
if (em_step_timing)
em_t_predictor_sync += MPI_Wtime() - em_t0;
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
if (em_step_timing)
em_t0 = MPI_Wtime();
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
if (em_step_timing)
em_t_predictor_sync += MPI_Wtime() - em_t0;
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
@@ -2198,24 +2220,24 @@ void bssnEM_class::Step(int lev, int YN)
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
if (em_step_timing)
em_t_corrector_sync += MPI_Wtime() - em_t0;
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
@@ -2310,7 +2332,19 @@ void bssnEM_class::Step(int lev, int YN)
{
if (em_step_timing)
em_t0 = MPI_Wtime();
if (!bssn_em_cuda_keep_resident_after_step(lev, trfls, a_lev))
const bool needs_resident_download =
!bssn_em_cuda_keep_resident_after_step(lev, trfls, a_lev);
const bool skip_zero_resident_download =
needs_resident_download &&
bssn_em_zero_resident_download_fastpath_enabled() &&
bssn_em_resident_zero_fastpath_ready(GH->PatL[lev],
#ifdef WithShell
0,
#else
0,
#endif
myrank);
if (needs_resident_download && !skip_zero_resident_download)
bssn_em_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true);
if (em_step_timing)
em_t_resident += MPI_Wtime() - em_t0;

View File

@@ -153,6 +153,7 @@ def _gpu_runtime_env():
"AMSS_CUDA_EM_CACHE_SOURCES": "1",
"AMSS_CUDA_EM_ZERO_FASTPATH": "1",
"AMSS_EM_ZERO_ANALYSIS_FASTPATH": "1",
"AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH": "1",
"AMSS_CUDA_AMR_HOST_STAGED": "1",
"AMSS_CUDA_AMR_RESTRICT_DEVICE": "0",
"AMSS_CUDA_AMR_RESTRICT_BATCH": "0",
@@ -293,6 +294,7 @@ def run_ABE():
print(f" AMSS_CUDA_EM_CACHE_SOURCES={mpi_env.get('AMSS_CUDA_EM_CACHE_SOURCES', '')}")
print(f" AMSS_CUDA_EM_ZERO_FASTPATH={mpi_env.get('AMSS_CUDA_EM_ZERO_FASTPATH', '')}")
print(f" AMSS_EM_ZERO_ANALYSIS_FASTPATH={mpi_env.get('AMSS_EM_ZERO_ANALYSIS_FASTPATH', '')}")
print(f" AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH={mpi_env.get('AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH', '')}")
print(f" AMSS_CUDA_AMR_HOST_STAGED={mpi_env.get('AMSS_CUDA_AMR_HOST_STAGED', '')}")
print(f" AMSS_CUDA_AMR_RESTRICT_DEVICE={mpi_env.get('AMSS_CUDA_AMR_RESTRICT_DEVICE', '')}")
print(f" AMSS_CUDA_AMR_RESTRICT_BATCH={mpi_env.get('AMSS_CUDA_AMR_RESTRICT_BATCH', '')}")