Add resident-GA mode switch and simplify sync logic

This commit is contained in:
2026-04-14 20:08:48 +08:00
parent 726d743376
commit f9119e8a2a
2 changed files with 5 additions and 58 deletions

View File

@@ -3,6 +3,7 @@
#include <typeinfo>
#include <sstream>
#include <cstdio>
#include <cstdlib>
#include <map>
#include <string>
#include <cstring>
@@ -54,10 +55,6 @@ using namespace std;
namespace {
static const int k_bssn_cuda_bh_state_indices[3] = {18, 19, 20};
static const int k_bssn_cuda_ga_state_indices[12] = {
2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13
};
bool fill_bssn_cuda_views(Block *cg, MyList<var> *vars,
double **host_views,
@@ -107,20 +104,6 @@ bool bssn_cuda_sync_subset(Block *cg,
return bssn_cuda_download_state_subset(cg, cg->shape, subset_count, state_indices, host_views) == 0;
}
bool bssn_cuda_sync_ga_fields(Block *cg, MyList<var> *vars, bool upload)
{
double *ga_fields[12];
int idx = 0;
while (vars && idx < 12)
{
ga_fields[idx++] = cg->fgfs[vars->data->sgfn];
vars = vars->next;
}
if (idx != 12)
return false;
return bssn_cuda_sync_subset(cg, 12, k_bssn_cuda_ga_state_indices, ga_fields, upload);
}
bool bssn_cuda_sync_bh_fields(Block *cg, var *forx, var *fory, var *forz, bool upload)
{
double *bh_fields[3] = {
@@ -3255,24 +3238,6 @@ void bssn_class::Step(int lev, int YN)
bool used_gpu_substep = false;
bool used_gpu_resident_state = false;
#if USE_CUDA_BSSN
if (use_cuda_resident_sync)
{
if (!bssn_cuda_sync_ga_fields(cg, StateList->next->next, false))
{
cout << "CUDA predictor GA subset download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
if (!bssn_cuda_sync_ga_fields(cg, StateList->next->next, true))
{
cout << "CUDA predictor GA subset upload failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
{
double *state_in[BSSN_CUDA_STATE_COUNT];
double *state_out[BSSN_CUDA_STATE_COUNT];
@@ -3292,7 +3257,7 @@ void bssn_class::Step(int lev, int YN)
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
int apply_enforce_ga = 0;
#if (AGM == 0)
apply_enforce_ga = use_cuda_resident_sync ? 0 : 1;
apply_enforce_ga = 1;
#endif
#if (SommerType == 0)
#ifndef WithShell
@@ -3706,24 +3671,6 @@ void bssn_class::Step(int lev, int YN)
bool used_gpu_substep = false;
bool used_gpu_resident_state = false;
#if USE_CUDA_BSSN
if (use_cuda_resident_sync)
{
if (!bssn_cuda_sync_ga_fields(cg, SynchList_pre->next->next, false))
{
cout << "CUDA corrector GA subset download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
if (!bssn_cuda_sync_ga_fields(cg, SynchList_pre->next->next, true))
{
cout << "CUDA corrector GA subset upload failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
{
double *state_in[BSSN_CUDA_STATE_COUNT];
double *state_out[BSSN_CUDA_STATE_COUNT];
@@ -3743,9 +3690,9 @@ void bssn_class::Step(int lev, int YN)
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
int apply_enforce_ga = 0;
#if (AGM == 0)
apply_enforce_ga = use_cuda_resident_sync ? 0 : 1;
apply_enforce_ga = 1;
#elif (AGM == 1)
apply_enforce_ga = (iter_count == 3 && !use_cuda_resident_sync) ? 1 : 0;
apply_enforce_ga = use_cuda_resident_sync ? 1 : ((iter_count == 3) ? 1 : 0);
#endif
#if (SommerType == 0)
#ifndef WithShell

View File

@@ -2477,7 +2477,7 @@ __global__ void kern_enforce_ga_cuda(double * __restrict__ dxx,
- lgxy * lgxy * lgzz
- lgxx * lgyz * lgyz;
lscale = ONE / pow(lscale, F1O3);
lscale = ONE / cbrt(lscale);
lgxx *= lscale;
lgxy *= lscale;