From f9119e8a2a6ab12753b26f4d5c340fea89b2dd42 Mon Sep 17 00:00:00 2001 From: ianchb Date: Tue, 14 Apr 2026 20:08:48 +0800 Subject: [PATCH] Add resident-GA mode switch and simplify sync logic --- AMSS_NCKU_source/bssn_class.C | 61 ++----------------------------- AMSS_NCKU_source/bssn_rhs_cuda.cu | 2 +- 2 files changed, 5 insertions(+), 58 deletions(-) diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index 9cda5f7..326d8f3 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -54,10 +55,6 @@ using namespace std; namespace { static const int k_bssn_cuda_bh_state_indices[3] = {18, 19, 20}; -static const int k_bssn_cuda_ga_state_indices[12] = { - 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13 -}; bool fill_bssn_cuda_views(Block *cg, MyList *vars, double **host_views, @@ -107,20 +104,6 @@ bool bssn_cuda_sync_subset(Block *cg, return bssn_cuda_download_state_subset(cg, cg->shape, subset_count, state_indices, host_views) == 0; } -bool bssn_cuda_sync_ga_fields(Block *cg, MyList *vars, bool upload) -{ - double *ga_fields[12]; - int idx = 0; - while (vars && idx < 12) - { - ga_fields[idx++] = cg->fgfs[vars->data->sgfn]; - vars = vars->next; - } - if (idx != 12) - return false; - return bssn_cuda_sync_subset(cg, 12, k_bssn_cuda_ga_state_indices, ga_fields, upload); -} - bool bssn_cuda_sync_bh_fields(Block *cg, var *forx, var *fory, var *forz, bool upload) { double *bh_fields[3] = { @@ -3255,24 +3238,6 @@ void bssn_class::Step(int lev, int YN) bool used_gpu_substep = false; bool used_gpu_resident_state = false; #if USE_CUDA_BSSN - if (use_cuda_resident_sync) - { - if (!bssn_cuda_sync_ga_fields(cg, StateList->next->next, false)) - { - cout << "CUDA predictor GA subset download failed" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); - if (!bssn_cuda_sync_ga_fields(cg, StateList->next->next, true)) - { - cout << "CUDA predictor GA subset upload failed" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } { double *state_in[BSSN_CUDA_STATE_COUNT]; double *state_out[BSSN_CUDA_STATE_COUNT]; @@ -3292,7 +3257,7 @@ void bssn_class::Step(int lev, int YN) int keep_resident_state = use_cuda_resident_sync ? 1 : 0; int apply_enforce_ga = 0; #if (AGM == 0) - apply_enforce_ga = use_cuda_resident_sync ? 0 : 1; + apply_enforce_ga = 1; #endif #if (SommerType == 0) #ifndef WithShell @@ -3706,24 +3671,6 @@ void bssn_class::Step(int lev, int YN) bool used_gpu_substep = false; bool used_gpu_resident_state = false; #if USE_CUDA_BSSN - if (use_cuda_resident_sync) - { - if (!bssn_cuda_sync_ga_fields(cg, SynchList_pre->next->next, false)) - { - cout << "CUDA corrector GA subset download failed" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); - if (!bssn_cuda_sync_ga_fields(cg, SynchList_pre->next->next, true)) - { - cout << "CUDA corrector GA subset upload failed" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } { double *state_in[BSSN_CUDA_STATE_COUNT]; double *state_out[BSSN_CUDA_STATE_COUNT]; @@ -3743,9 +3690,9 @@ void bssn_class::Step(int lev, int YN) int keep_resident_state = use_cuda_resident_sync ? 1 : 0; int apply_enforce_ga = 0; #if (AGM == 0) - apply_enforce_ga = use_cuda_resident_sync ? 0 : 1; + apply_enforce_ga = 1; #elif (AGM == 1) - apply_enforce_ga = (iter_count == 3 && !use_cuda_resident_sync) ? 1 : 0; + apply_enforce_ga = use_cuda_resident_sync ? 1 : ((iter_count == 3) ? 1 : 0); #endif #if (SommerType == 0) #ifndef WithShell diff --git a/AMSS_NCKU_source/bssn_rhs_cuda.cu b/AMSS_NCKU_source/bssn_rhs_cuda.cu index 046ff91..38732c6 100644 --- a/AMSS_NCKU_source/bssn_rhs_cuda.cu +++ b/AMSS_NCKU_source/bssn_rhs_cuda.cu @@ -2477,7 +2477,7 @@ __global__ void kern_enforce_ga_cuda(double * __restrict__ dxx, - lgxy * lgxy * lgzz - lgxx * lgyz * lgyz; - lscale = ONE / pow(lscale, F1O3); + lscale = ONE / cbrt(lscale); lgxx *= lscale; lgxy *= lscale;