Add resident-GA mode switch and simplify sync logic
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
#include <typeinfo>
|
||||
#include <sstream>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
@@ -54,10 +55,6 @@ using namespace std;
|
||||
namespace {
|
||||
|
||||
static const int k_bssn_cuda_bh_state_indices[3] = {18, 19, 20};
|
||||
static const int k_bssn_cuda_ga_state_indices[12] = {
|
||||
2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13
|
||||
};
|
||||
|
||||
bool fill_bssn_cuda_views(Block *cg, MyList<var> *vars,
|
||||
double **host_views,
|
||||
@@ -107,20 +104,6 @@ bool bssn_cuda_sync_subset(Block *cg,
|
||||
return bssn_cuda_download_state_subset(cg, cg->shape, subset_count, state_indices, host_views) == 0;
|
||||
}
|
||||
|
||||
bool bssn_cuda_sync_ga_fields(Block *cg, MyList<var> *vars, bool upload)
|
||||
{
|
||||
double *ga_fields[12];
|
||||
int idx = 0;
|
||||
while (vars && idx < 12)
|
||||
{
|
||||
ga_fields[idx++] = cg->fgfs[vars->data->sgfn];
|
||||
vars = vars->next;
|
||||
}
|
||||
if (idx != 12)
|
||||
return false;
|
||||
return bssn_cuda_sync_subset(cg, 12, k_bssn_cuda_ga_state_indices, ga_fields, upload);
|
||||
}
|
||||
|
||||
bool bssn_cuda_sync_bh_fields(Block *cg, var *forx, var *fory, var *forz, bool upload)
|
||||
{
|
||||
double *bh_fields[3] = {
|
||||
@@ -3255,24 +3238,6 @@ void bssn_class::Step(int lev, int YN)
|
||||
bool used_gpu_substep = false;
|
||||
bool used_gpu_resident_state = false;
|
||||
#if USE_CUDA_BSSN
|
||||
if (use_cuda_resident_sync)
|
||||
{
|
||||
if (!bssn_cuda_sync_ga_fields(cg, StateList->next->next, false))
|
||||
{
|
||||
cout << "CUDA predictor GA subset download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
f_enforce_ga(cg->shape,
|
||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||
if (!bssn_cuda_sync_ga_fields(cg, StateList->next->next, true))
|
||||
{
|
||||
cout << "CUDA predictor GA subset upload failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
{
|
||||
double *state_in[BSSN_CUDA_STATE_COUNT];
|
||||
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||
@@ -3292,7 +3257,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
|
||||
int apply_enforce_ga = 0;
|
||||
#if (AGM == 0)
|
||||
apply_enforce_ga = use_cuda_resident_sync ? 0 : 1;
|
||||
apply_enforce_ga = 1;
|
||||
#endif
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
@@ -3706,24 +3671,6 @@ void bssn_class::Step(int lev, int YN)
|
||||
bool used_gpu_substep = false;
|
||||
bool used_gpu_resident_state = false;
|
||||
#if USE_CUDA_BSSN
|
||||
if (use_cuda_resident_sync)
|
||||
{
|
||||
if (!bssn_cuda_sync_ga_fields(cg, SynchList_pre->next->next, false))
|
||||
{
|
||||
cout << "CUDA corrector GA subset download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
f_enforce_ga(cg->shape,
|
||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||
if (!bssn_cuda_sync_ga_fields(cg, SynchList_pre->next->next, true))
|
||||
{
|
||||
cout << "CUDA corrector GA subset upload failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
{
|
||||
double *state_in[BSSN_CUDA_STATE_COUNT];
|
||||
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||
@@ -3743,9 +3690,9 @@ void bssn_class::Step(int lev, int YN)
|
||||
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
|
||||
int apply_enforce_ga = 0;
|
||||
#if (AGM == 0)
|
||||
apply_enforce_ga = use_cuda_resident_sync ? 0 : 1;
|
||||
apply_enforce_ga = 1;
|
||||
#elif (AGM == 1)
|
||||
apply_enforce_ga = (iter_count == 3 && !use_cuda_resident_sync) ? 1 : 0;
|
||||
apply_enforce_ga = use_cuda_resident_sync ? 1 : ((iter_count == 3) ? 1 : 0);
|
||||
#endif
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
|
||||
@@ -2477,7 +2477,7 @@ __global__ void kern_enforce_ga_cuda(double * __restrict__ dxx,
|
||||
- lgxy * lgxy * lgzz
|
||||
- lgxx * lgyz * lgyz;
|
||||
|
||||
lscale = ONE / pow(lscale, F1O3);
|
||||
lscale = ONE / cbrt(lscale);
|
||||
|
||||
lgxx *= lscale;
|
||||
lgxy *= lscale;
|
||||
|
||||
Reference in New Issue
Block a user