Fix BSSN-EScalar CUDA boundary and scalar KO

This commit is contained in:
2026-05-06 15:44:35 +08:00
parent ffa0d801ed
commit dd0e20d8c7

View File

@@ -502,6 +502,7 @@ static const int STAGE_SLOT_COUNT =
static constexpr int BSSN_STATE_COUNT = 24;
static constexpr int BSSN_MATTER_COUNT = 10;
static constexpr int BSSN_LK_FIELD_COUNT = 24;
static constexpr int BSSN_ESCALAR_LK_FIELD_COUNT = 26;
static constexpr int BSSN_RESIDENT_BANK_COUNT = 6;
static constexpr int BSSN_ESCALAR_STATE_COUNT = 26;
static constexpr int BSSN_RESIDENT_STATE_CAPACITY = BSSN_ESCALAR_STATE_COUNT;
@@ -1568,10 +1569,10 @@ void kern_kodis(const double * __restrict__ fh,
/* Host wrapper helpers */
/* ================================================================== */
struct LopsidedKodisTables {
const double *adv_fields[BSSN_LK_FIELD_COUNT];
const double *ko_fields[BSSN_LK_FIELD_COUNT];
double *rhs_fields[BSSN_LK_FIELD_COUNT];
int soa_signs[3 * BSSN_LK_FIELD_COUNT];
const double *adv_fields[BSSN_ESCALAR_LK_FIELD_COUNT];
const double *ko_fields[BSSN_ESCALAR_LK_FIELD_COUNT];
double *rhs_fields[BSSN_ESCALAR_LK_FIELD_COUNT];
int soa_signs[3 * BSSN_ESCALAR_LK_FIELD_COUNT];
};
struct FDerivTables {
@@ -1612,6 +1613,11 @@ struct PatchBoundaryTables {
double *dst_fields[BSSN_STATE_COUNT];
};
struct EScalarBoundaryTables {
const double *f0_fields[BSSN_ESCALAR_STATE_COUNT];
double *out_fields[BSSN_ESCALAR_STATE_COUNT];
};
static const int BLK = 128;
static inline int grid(size_t n) {
if (n == 0) return 1;
@@ -2559,7 +2565,7 @@ void kern_lopsided_kodis_batched(const double * __restrict__ Sfx,
}
}
static void gpu_lopsided_kodis_state_batch(double eps_val, int all)
static void gpu_lopsided_kodis_state_batch(double eps_val, int all, bool include_escalar = false)
{
LopsidedKodisTables tables = {};
for (int i = 0; i < BSSN_LK_FIELD_COUNT; ++i) {
@@ -2569,7 +2575,26 @@ static void gpu_lopsided_kodis_state_batch(double eps_val, int all)
}
std::memcpy(tables.soa_signs, k_lk_soa_signs, sizeof(k_lk_soa_signs));
dim3 launch_grid((unsigned int)grid((size_t)all), (unsigned int)BSSN_LK_FIELD_COUNT);
int field_count = BSSN_LK_FIELD_COUNT;
if (include_escalar) {
tables.adv_fields[field_count] = g_buf.slot[S_Sphi];
tables.ko_fields[field_count] = g_buf.slot[S_Sphi];
tables.rhs_fields[field_count] = g_buf.slot[S_Sphi_rhs];
tables.soa_signs[3 * field_count + 0] = 1;
tables.soa_signs[3 * field_count + 1] = 1;
tables.soa_signs[3 * field_count + 2] = 1;
++field_count;
tables.adv_fields[field_count] = g_buf.slot[S_Spi];
tables.ko_fields[field_count] = g_buf.slot[S_Spi];
tables.rhs_fields[field_count] = g_buf.slot[S_Spi_rhs];
tables.soa_signs[3 * field_count + 0] = 1;
tables.soa_signs[3 * field_count + 1] = 1;
tables.soa_signs[3 * field_count + 2] = 1;
++field_count;
}
dim3 launch_grid((unsigned int)grid((size_t)all), (unsigned int)field_count);
kern_lopsided_kodis_batched<<<launch_grid, BLK>>>(
g_buf.slot[S_betax], g_buf.slot[S_betay], g_buf.slot[S_betaz], tables, eps_val);
}
@@ -2862,8 +2887,7 @@ static void gpu_copy_patch_boundary_batch(int all,
}
__global__ __launch_bounds__(128, 4)
void kern_copy_patch_boundary_scalar(const double * __restrict__ src,
double * __restrict__ dst,
void kern_escalar_restore_patch_boundary_batched(EScalarBoundaryTables tables,
int touch_xmin, int touch_xmax,
int touch_ymin, int touch_ymax,
int touch_zmin, int touch_zmax)
@@ -2873,34 +2897,42 @@ void kern_copy_patch_boundary_scalar(const double * __restrict__ src,
const int nx = d_gp.ex[0];
const int ny = d_gp.ex[1];
const int nz = d_gp.ex[2];
const int i0 = tid % nx;
const int j0 = (tid / nx) % ny;
const int k0 = tid / (nx * ny);
const bool on_boundary =
(touch_xmin && i0 == 0) ||
(touch_xmax && i0 == nx - 1) ||
(touch_ymin && j0 == 0) ||
(touch_ymax && j0 == ny - 1) ||
(touch_zmin && k0 == 0) ||
(touch_zmax && k0 == d_gp.ex[2] - 1);
if (on_boundary) dst[tid] = src[tid];
(touch_zmax && k0 == nz - 1);
if (!on_boundary) return;
const int field = blockIdx.y;
tables.out_fields[field][tid] = tables.f0_fields[field][tid];
}
static void gpu_escalar_copy_patch_boundary_batch(int all,
static void gpu_escalar_restore_patch_boundary_batch(const StepContext &ctx,
int all,
int touch_xmin, int touch_xmax,
int touch_ymin, int touch_ymax,
int touch_zmin, int touch_zmax)
{
if (!(touch_xmin || touch_xmax || touch_ymax || touch_ymin || touch_zmin || touch_zmax))
return;
gpu_copy_patch_boundary_batch(all, touch_xmin, touch_xmax,
touch_ymin, touch_ymax,
touch_zmin, touch_zmax);
kern_copy_patch_boundary_scalar<<<grid((size_t)all), BLK>>>(
g_buf.slot[S_Sphi], g_buf.slot[S_Sphi_rhs],
touch_xmin, touch_xmax, touch_ymin, touch_ymax, touch_zmin, touch_zmax);
kern_copy_patch_boundary_scalar<<<grid((size_t)all), BLK>>>(
g_buf.slot[S_Spi], g_buf.slot[S_Spi_rhs],
EScalarBoundaryTables tables = {};
for (int i = 0; i < BSSN_ESCALAR_STATE_COUNT; ++i) {
tables.f0_fields[i] = ctx.d_state0[i];
tables.out_fields[i] = g_buf.slot[k_escalar_state_rhs_slots[i]];
}
dim3 launch_grid((unsigned int)grid((size_t)all), (unsigned int)BSSN_ESCALAR_STATE_COUNT);
kern_escalar_restore_patch_boundary_batched<<<launch_grid, BLK>>>(
tables,
touch_xmin, touch_xmax, touch_ymin, touch_ymax, touch_zmin, touch_zmax);
}
@@ -6197,7 +6229,7 @@ static void launch_rhs_pipeline(int all, double eps, int co, bool compute_escala
D(S_f_arr), D(S_S_arr));
MARK_RHS_STAGE(RHS_STAGE_GAUGE_RHS);
gpu_lopsided_kodis_state_batch(eps, all);
gpu_lopsided_kodis_state_batch(eps, all, compute_escalar);
MARK_RHS_STAGE(RHS_STAGE_KODIS);
if (co == 0) {
@@ -6406,7 +6438,7 @@ int bssn_escalar_cuda_rk4_substep(void *block_tag,
gpu_escalar_rk4_finalize_batch(ctx, all, dT, RK4, chitiny);
if (Lev > 0) {
gpu_escalar_copy_patch_boundary_batch((int)all,
gpu_escalar_restore_patch_boundary_batch(ctx, (int)all,
touch_xmin, touch_xmax,
touch_ymin, touch_ymax,
touch_zmin, touch_zmax);