Skip zero EM resident downloads
This commit is contained in:
@@ -87,7 +87,18 @@ bool bssn_em_zero_analysis_fastpath_enabled()
|
||||
return enabled != 0;
|
||||
}
|
||||
|
||||
bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
|
||||
bool bssn_em_zero_resident_download_fastpath_enabled()
|
||||
{
|
||||
static int enabled = -1;
|
||||
if (enabled < 0)
|
||||
{
|
||||
const char *env = getenv("AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH");
|
||||
enabled = (!env || atoi(env) != 0) ? 1 : 0;
|
||||
}
|
||||
return enabled != 0;
|
||||
}
|
||||
|
||||
bool bssn_em_resident_zero_fastpath_ready(MyList<Patch> *PatL,
|
||||
#ifdef WithShell
|
||||
ShellPatch *shell,
|
||||
#else
|
||||
@@ -95,8 +106,6 @@ bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
|
||||
#endif
|
||||
int rank)
|
||||
{
|
||||
if (!bssn_em_zero_analysis_fastpath_enabled())
|
||||
return false;
|
||||
int local_ok = 1;
|
||||
int local_seen = 0;
|
||||
MyList<Patch> *Pp = PatL;
|
||||
@@ -149,6 +158,19 @@ bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
|
||||
return global_seen && global_ok;
|
||||
}
|
||||
|
||||
bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
|
||||
#ifdef WithShell
|
||||
ShellPatch *shell,
|
||||
#else
|
||||
ShellPatch *shell,
|
||||
#endif
|
||||
int rank)
|
||||
{
|
||||
if (!bssn_em_zero_analysis_fastpath_enabled())
|
||||
return false;
|
||||
return bssn_em_resident_zero_fastpath_ready(PatL, shell, rank);
|
||||
}
|
||||
|
||||
void zero_em_analysis_outputs(MyList<Patch> *PatL,
|
||||
#ifdef WithShell
|
||||
ShellPatch *shell,
|
||||
@@ -1660,29 +1682,29 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (em_step_timing)
|
||||
em_t0 = MPI_Wtime();
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
|
||||
if (em_step_timing)
|
||||
em_t_predictor_sync += MPI_Wtime() - em_t0;
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
{
|
||||
clock_t prev_clock, curr_clock;
|
||||
if (myrank == 0)
|
||||
curr_clock = clock();
|
||||
SH->Synch(SynchList_pre, Symmetry);
|
||||
if (myrank == 0)
|
||||
{
|
||||
prev_clock = curr_clock;
|
||||
curr_clock = clock();
|
||||
cout << " Shell stuff synchronization used "
|
||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||
<< " seconds! " << endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (em_step_timing)
|
||||
em_t0 = MPI_Wtime();
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
|
||||
if (em_step_timing)
|
||||
em_t_predictor_sync += MPI_Wtime() - em_t0;
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
{
|
||||
clock_t prev_clock, curr_clock;
|
||||
if (myrank == 0)
|
||||
curr_clock = clock();
|
||||
SH->Synch(SynchList_pre, Symmetry);
|
||||
if (myrank == 0)
|
||||
{
|
||||
prev_clock = curr_clock;
|
||||
curr_clock = clock();
|
||||
cout << " Shell stuff synchronization used "
|
||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||
<< " seconds! " << endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
@@ -2198,24 +2220,24 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
|
||||
if (em_step_timing)
|
||||
em_t_corrector_sync += MPI_Wtime() - em_t0;
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
{
|
||||
clock_t prev_clock, curr_clock;
|
||||
if (myrank == 0)
|
||||
curr_clock = clock();
|
||||
SH->Synch(SynchList_cor, Symmetry);
|
||||
if (myrank == 0)
|
||||
{
|
||||
prev_clock = curr_clock;
|
||||
curr_clock = clock();
|
||||
cout << " Shell stuff synchronization used "
|
||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||
<< " seconds! " << endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
{
|
||||
clock_t prev_clock, curr_clock;
|
||||
if (myrank == 0)
|
||||
curr_clock = clock();
|
||||
SH->Synch(SynchList_cor, Symmetry);
|
||||
if (myrank == 0)
|
||||
{
|
||||
prev_clock = curr_clock;
|
||||
curr_clock = clock();
|
||||
cout << " Shell stuff synchronization used "
|
||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||
<< " seconds! " << endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
@@ -2310,7 +2332,19 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
{
|
||||
if (em_step_timing)
|
||||
em_t0 = MPI_Wtime();
|
||||
if (!bssn_em_cuda_keep_resident_after_step(lev, trfls, a_lev))
|
||||
const bool needs_resident_download =
|
||||
!bssn_em_cuda_keep_resident_after_step(lev, trfls, a_lev);
|
||||
const bool skip_zero_resident_download =
|
||||
needs_resident_download &&
|
||||
bssn_em_zero_resident_download_fastpath_enabled() &&
|
||||
bssn_em_resident_zero_fastpath_ready(GH->PatL[lev],
|
||||
#ifdef WithShell
|
||||
0,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
myrank);
|
||||
if (needs_resident_download && !skip_zero_resident_download)
|
||||
bssn_em_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true);
|
||||
if (em_step_timing)
|
||||
em_t_resident += MPI_Wtime() - em_t0;
|
||||
|
||||
@@ -153,6 +153,7 @@ def _gpu_runtime_env():
|
||||
"AMSS_CUDA_EM_CACHE_SOURCES": "1",
|
||||
"AMSS_CUDA_EM_ZERO_FASTPATH": "1",
|
||||
"AMSS_EM_ZERO_ANALYSIS_FASTPATH": "1",
|
||||
"AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH": "1",
|
||||
"AMSS_CUDA_AMR_HOST_STAGED": "1",
|
||||
"AMSS_CUDA_AMR_RESTRICT_DEVICE": "0",
|
||||
"AMSS_CUDA_AMR_RESTRICT_BATCH": "0",
|
||||
@@ -293,6 +294,7 @@ def run_ABE():
|
||||
print(f" AMSS_CUDA_EM_CACHE_SOURCES={mpi_env.get('AMSS_CUDA_EM_CACHE_SOURCES', '')}")
|
||||
print(f" AMSS_CUDA_EM_ZERO_FASTPATH={mpi_env.get('AMSS_CUDA_EM_ZERO_FASTPATH', '')}")
|
||||
print(f" AMSS_EM_ZERO_ANALYSIS_FASTPATH={mpi_env.get('AMSS_EM_ZERO_ANALYSIS_FASTPATH', '')}")
|
||||
print(f" AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH={mpi_env.get('AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH', '')}")
|
||||
print(f" AMSS_CUDA_AMR_HOST_STAGED={mpi_env.get('AMSS_CUDA_AMR_HOST_STAGED', '')}")
|
||||
print(f" AMSS_CUDA_AMR_RESTRICT_DEVICE={mpi_env.get('AMSS_CUDA_AMR_RESTRICT_DEVICE', '')}")
|
||||
print(f" AMSS_CUDA_AMR_RESTRICT_BATCH={mpi_env.get('AMSS_CUDA_AMR_RESTRICT_BATCH', '')}")
|
||||
|
||||
Reference in New Issue
Block a user