Fix BSSN-EScalar CUDA boundary and scalar KO
This commit is contained in:
@@ -502,6 +502,7 @@ static const int STAGE_SLOT_COUNT =
|
||||
static constexpr int BSSN_STATE_COUNT = 24;
|
||||
static constexpr int BSSN_MATTER_COUNT = 10;
|
||||
static constexpr int BSSN_LK_FIELD_COUNT = 24;
|
||||
static constexpr int BSSN_ESCALAR_LK_FIELD_COUNT = 26;
|
||||
static constexpr int BSSN_RESIDENT_BANK_COUNT = 6;
|
||||
static constexpr int BSSN_ESCALAR_STATE_COUNT = 26;
|
||||
static constexpr int BSSN_RESIDENT_STATE_CAPACITY = BSSN_ESCALAR_STATE_COUNT;
|
||||
@@ -1568,10 +1569,10 @@ void kern_kodis(const double * __restrict__ fh,
|
||||
/* Host wrapper helpers */
|
||||
/* ================================================================== */
|
||||
struct LopsidedKodisTables {
|
||||
const double *adv_fields[BSSN_LK_FIELD_COUNT];
|
||||
const double *ko_fields[BSSN_LK_FIELD_COUNT];
|
||||
double *rhs_fields[BSSN_LK_FIELD_COUNT];
|
||||
int soa_signs[3 * BSSN_LK_FIELD_COUNT];
|
||||
const double *adv_fields[BSSN_ESCALAR_LK_FIELD_COUNT];
|
||||
const double *ko_fields[BSSN_ESCALAR_LK_FIELD_COUNT];
|
||||
double *rhs_fields[BSSN_ESCALAR_LK_FIELD_COUNT];
|
||||
int soa_signs[3 * BSSN_ESCALAR_LK_FIELD_COUNT];
|
||||
};
|
||||
|
||||
struct FDerivTables {
|
||||
@@ -1612,6 +1613,11 @@ struct PatchBoundaryTables {
|
||||
double *dst_fields[BSSN_STATE_COUNT];
|
||||
};
|
||||
|
||||
struct EScalarBoundaryTables {
|
||||
const double *f0_fields[BSSN_ESCALAR_STATE_COUNT];
|
||||
double *out_fields[BSSN_ESCALAR_STATE_COUNT];
|
||||
};
|
||||
|
||||
static const int BLK = 128;
|
||||
static inline int grid(size_t n) {
|
||||
if (n == 0) return 1;
|
||||
@@ -2559,7 +2565,7 @@ void kern_lopsided_kodis_batched(const double * __restrict__ Sfx,
|
||||
}
|
||||
}
|
||||
|
||||
static void gpu_lopsided_kodis_state_batch(double eps_val, int all)
|
||||
static void gpu_lopsided_kodis_state_batch(double eps_val, int all, bool include_escalar = false)
|
||||
{
|
||||
LopsidedKodisTables tables = {};
|
||||
for (int i = 0; i < BSSN_LK_FIELD_COUNT; ++i) {
|
||||
@@ -2569,7 +2575,26 @@ static void gpu_lopsided_kodis_state_batch(double eps_val, int all)
|
||||
}
|
||||
std::memcpy(tables.soa_signs, k_lk_soa_signs, sizeof(k_lk_soa_signs));
|
||||
|
||||
dim3 launch_grid((unsigned int)grid((size_t)all), (unsigned int)BSSN_LK_FIELD_COUNT);
|
||||
int field_count = BSSN_LK_FIELD_COUNT;
|
||||
if (include_escalar) {
|
||||
tables.adv_fields[field_count] = g_buf.slot[S_Sphi];
|
||||
tables.ko_fields[field_count] = g_buf.slot[S_Sphi];
|
||||
tables.rhs_fields[field_count] = g_buf.slot[S_Sphi_rhs];
|
||||
tables.soa_signs[3 * field_count + 0] = 1;
|
||||
tables.soa_signs[3 * field_count + 1] = 1;
|
||||
tables.soa_signs[3 * field_count + 2] = 1;
|
||||
++field_count;
|
||||
|
||||
tables.adv_fields[field_count] = g_buf.slot[S_Spi];
|
||||
tables.ko_fields[field_count] = g_buf.slot[S_Spi];
|
||||
tables.rhs_fields[field_count] = g_buf.slot[S_Spi_rhs];
|
||||
tables.soa_signs[3 * field_count + 0] = 1;
|
||||
tables.soa_signs[3 * field_count + 1] = 1;
|
||||
tables.soa_signs[3 * field_count + 2] = 1;
|
||||
++field_count;
|
||||
}
|
||||
|
||||
dim3 launch_grid((unsigned int)grid((size_t)all), (unsigned int)field_count);
|
||||
kern_lopsided_kodis_batched<<<launch_grid, BLK>>>(
|
||||
g_buf.slot[S_betax], g_buf.slot[S_betay], g_buf.slot[S_betaz], tables, eps_val);
|
||||
}
|
||||
@@ -2862,8 +2887,7 @@ static void gpu_copy_patch_boundary_batch(int all,
|
||||
}
|
||||
|
||||
__global__ __launch_bounds__(128, 4)
|
||||
void kern_copy_patch_boundary_scalar(const double * __restrict__ src,
|
||||
double * __restrict__ dst,
|
||||
void kern_escalar_restore_patch_boundary_batched(EScalarBoundaryTables tables,
|
||||
int touch_xmin, int touch_xmax,
|
||||
int touch_ymin, int touch_ymax,
|
||||
int touch_zmin, int touch_zmax)
|
||||
@@ -2873,34 +2897,42 @@ void kern_copy_patch_boundary_scalar(const double * __restrict__ src,
|
||||
|
||||
const int nx = d_gp.ex[0];
|
||||
const int ny = d_gp.ex[1];
|
||||
const int nz = d_gp.ex[2];
|
||||
const int i0 = tid % nx;
|
||||
const int j0 = (tid / nx) % ny;
|
||||
const int k0 = tid / (nx * ny);
|
||||
|
||||
const bool on_boundary =
|
||||
(touch_xmin && i0 == 0) ||
|
||||
(touch_xmax && i0 == nx - 1) ||
|
||||
(touch_ymin && j0 == 0) ||
|
||||
(touch_ymax && j0 == ny - 1) ||
|
||||
(touch_zmin && k0 == 0) ||
|
||||
(touch_zmax && k0 == d_gp.ex[2] - 1);
|
||||
if (on_boundary) dst[tid] = src[tid];
|
||||
(touch_zmax && k0 == nz - 1);
|
||||
if (!on_boundary) return;
|
||||
|
||||
const int field = blockIdx.y;
|
||||
tables.out_fields[field][tid] = tables.f0_fields[field][tid];
|
||||
}
|
||||
|
||||
static void gpu_escalar_copy_patch_boundary_batch(int all,
|
||||
static void gpu_escalar_restore_patch_boundary_batch(const StepContext &ctx,
|
||||
int all,
|
||||
int touch_xmin, int touch_xmax,
|
||||
int touch_ymin, int touch_ymax,
|
||||
int touch_zmin, int touch_zmax)
|
||||
{
|
||||
if (!(touch_xmin || touch_xmax || touch_ymax || touch_ymin || touch_zmin || touch_zmax))
|
||||
return;
|
||||
gpu_copy_patch_boundary_batch(all, touch_xmin, touch_xmax,
|
||||
touch_ymin, touch_ymax,
|
||||
touch_zmin, touch_zmax);
|
||||
kern_copy_patch_boundary_scalar<<<grid((size_t)all), BLK>>>(
|
||||
g_buf.slot[S_Sphi], g_buf.slot[S_Sphi_rhs],
|
||||
touch_xmin, touch_xmax, touch_ymin, touch_ymax, touch_zmin, touch_zmax);
|
||||
kern_copy_patch_boundary_scalar<<<grid((size_t)all), BLK>>>(
|
||||
g_buf.slot[S_Spi], g_buf.slot[S_Spi_rhs],
|
||||
|
||||
EScalarBoundaryTables tables = {};
|
||||
for (int i = 0; i < BSSN_ESCALAR_STATE_COUNT; ++i) {
|
||||
tables.f0_fields[i] = ctx.d_state0[i];
|
||||
tables.out_fields[i] = g_buf.slot[k_escalar_state_rhs_slots[i]];
|
||||
}
|
||||
|
||||
dim3 launch_grid((unsigned int)grid((size_t)all), (unsigned int)BSSN_ESCALAR_STATE_COUNT);
|
||||
kern_escalar_restore_patch_boundary_batched<<<launch_grid, BLK>>>(
|
||||
tables,
|
||||
touch_xmin, touch_xmax, touch_ymin, touch_ymax, touch_zmin, touch_zmax);
|
||||
}
|
||||
|
||||
@@ -6197,7 +6229,7 @@ static void launch_rhs_pipeline(int all, double eps, int co, bool compute_escala
|
||||
D(S_f_arr), D(S_S_arr));
|
||||
MARK_RHS_STAGE(RHS_STAGE_GAUGE_RHS);
|
||||
|
||||
gpu_lopsided_kodis_state_batch(eps, all);
|
||||
gpu_lopsided_kodis_state_batch(eps, all, compute_escalar);
|
||||
MARK_RHS_STAGE(RHS_STAGE_KODIS);
|
||||
|
||||
if (co == 0) {
|
||||
@@ -6406,7 +6438,7 @@ int bssn_escalar_cuda_rk4_substep(void *block_tag,
|
||||
|
||||
gpu_escalar_rk4_finalize_batch(ctx, all, dT, RK4, chitiny);
|
||||
if (Lev > 0) {
|
||||
gpu_escalar_copy_patch_boundary_batch((int)all,
|
||||
gpu_escalar_restore_patch_boundary_batch(ctx, (int)all,
|
||||
touch_xmin, touch_xmax,
|
||||
touch_ymin, touch_ymax,
|
||||
touch_zmin, touch_zmax);
|
||||
|
||||
Reference in New Issue
Block a user