From 6fd7ef2b55906ff631058ac90d7345c7b76bdd4e Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Sun, 12 Apr 2026 22:42:58 +0800 Subject: [PATCH] Cache GPU RHS symbols and zero vacuum sources once --- AMSS_NCKU_source/bssn_gpu.cu | 161 ++++++++++++++++++++++++----------- 1 file changed, 109 insertions(+), 52 deletions(-) diff --git a/AMSS_NCKU_source/bssn_gpu.cu b/AMSS_NCKU_source/bssn_gpu.cu index 5fef7ec..0023a2e 100644 --- a/AMSS_NCKU_source/bssn_gpu.cu +++ b/AMSS_NCKU_source/bssn_gpu.cu @@ -158,6 +158,15 @@ struct GpuRhsCache const double *last_y = nullptr; const double *last_z = nullptr; bool meta_uploaded = false; + const double *rhs_symbol_x = nullptr; + const double *rhs_symbol_y = nullptr; + const double *rhs_symbol_z = nullptr; + int rhs_symbol_symmetry = -1; + int rhs_symbol_lev = -1; + int rhs_symbol_co = -1; + double rhs_symbol_eps = 0.0; + bool rhs_symbols_uploaded = false; + bool matter_sources_zeroed = false; static const int max_mapped_buffers = 512; const double *host_buffers[max_mapped_buffers] = {nullptr}; const double *device_buffers[max_mapped_buffers] = {nullptr}; @@ -639,6 +648,15 @@ void cleanup_gpu_rhs_cache() cache.last_x = nullptr; cache.last_y = nullptr; cache.last_z = nullptr; + cache.rhs_symbol_x = nullptr; + cache.rhs_symbol_y = nullptr; + cache.rhs_symbol_z = nullptr; + cache.rhs_symbol_symmetry = -1; + cache.rhs_symbol_lev = -1; + cache.rhs_symbol_co = -1; + cache.rhs_symbol_eps = 0.0; + cache.rhs_symbols_uploaded = false; + cache.matter_sources_zeroed = false; reset_buffer_map(cache); reset_external_buffer_map(external_buffer_registry()); @@ -804,6 +822,15 @@ bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex) cache.last_y = nullptr; cache.last_z = nullptr; cache.meta_uploaded = false; + cache.rhs_symbol_x = nullptr; + cache.rhs_symbol_y = nullptr; + cache.rhs_symbol_z = nullptr; + cache.rhs_symbol_symmetry = -1; + cache.rhs_symbol_lev = -1; + cache.rhs_symbol_co = -1; + cache.rhs_symbol_eps = 0.0; + cache.rhs_symbols_uploaded = false; + cache.matter_sources_zeroed = false; reset_buffer_map(cache); Meta *meta = &cache.meta; @@ -987,6 +1014,15 @@ bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex) cache.last_x = nullptr; cache.last_y = nullptr; cache.last_z = nullptr; + cache.rhs_symbol_x = nullptr; + cache.rhs_symbol_y = nullptr; + cache.rhs_symbol_z = nullptr; + cache.rhs_symbol_symmetry = -1; + cache.rhs_symbol_lev = -1; + cache.rhs_symbol_co = -1; + cache.rhs_symbol_eps = 0.0; + cache.rhs_symbols_uploaded = false; + cache.matter_sources_zeroed = false; reset_buffer_map(cache); return false; } @@ -3903,10 +3939,14 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y, {Mh_ Sy, static_cast(matrix_size)}, {Mh_ Sz, static_cast(matrix_size)}, }; - if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0]))) + if (!cache.matter_sources_zeroed) { - *meta = saved_meta; - return 1; + if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0]))) + { + *meta = saved_meta; + return 1; + } + cache.matter_sources_zeroed = true; } map_buffer(cache, chi, Mh_ chi); map_buffer(cache, trK, Mh_ trK); @@ -4012,22 +4052,14 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y, //3.1-----for compute_rhs_bssn--------- //cout<<"Size of Meta:"< 1 && abs[1] < dYh) {ijkmin_h[1] = -2; ijkmin2_h[1] = -3;} if(Symmetry > 0 && abs[2] < dZh) {ijkmin_h[2] = -2; ijkmin2_h[2] = -3;} - if(Symmetry > 2 && abs[0] < dXh) {ijkmin3_h[0] = -3;} - if(Symmetry > 2 && abs[1] < dYh) {ijkmin3_h[1] = -3;} - if(Symmetry > 0 && abs[2] < dZh) {ijkmin3_h[2] = -3;} - - cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int)); - cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int)); - cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int)); - cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int)); - - double d12dxyz_h[3] = {1.0,1.0,1.0}; - double d2dxyz_h[3] = {1.0,1.0,1.0}; - d12dxyz_h[0] /= 12; d12dxyz_h[1] /= 12; d12dxyz_h[2] /= 12; - d12dxyz_h[0] /= dXh; d12dxyz_h[1] /= dYh; d12dxyz_h[2] /= dZh; - d2dxyz_h[0] /= 2; d2dxyz_h[1] /= 2; d2dxyz_h[2] /= 2; - d2dxyz_h[0] /= dXh; d2dxyz_h[1] /= dYh; d2dxyz_h[2] /= dZh; - - cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double)); - cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double)); - -//3.3--------for fdderivs------------ - double Sdxdxh = 1.0 /( dXh * dXh ); - double Sdydyh = 1.0 /( dYh * dYh ); + if(Symmetry > 2 && abs[0] < dXh) {ijkmin3_h[0] = -3;} + if(Symmetry > 2 && abs[1] < dYh) {ijkmin3_h[1] = -3;} + if(Symmetry > 0 && abs[2] < dZh) {ijkmin3_h[2] = -3;} + + double d12dxyz_h[3] = {1.0,1.0,1.0}; + double d2dxyz_h[3] = {1.0,1.0,1.0}; + d12dxyz_h[0] /= 12; d12dxyz_h[1] /= 12; d12dxyz_h[2] /= 12; + d12dxyz_h[0] /= dXh; d12dxyz_h[1] /= dYh; d12dxyz_h[2] /= dZh; + d2dxyz_h[0] /= 2; d2dxyz_h[1] /= 2; d2dxyz_h[2] /= 2; + d2dxyz_h[0] /= dXh; d2dxyz_h[1] /= dYh; d2dxyz_h[2] /= dZh; + +//3.3--------for fdderivs------------ + double Sdxdxh = 1.0 /( dXh * dXh ); + double Sdydyh = 1.0 /( dYh * dYh ); double Sdzdzh = 1.0 /( dZh * dZh ); double Fdxdxh = 1.0 / 12.0 /( dXh * dXh ); double Fdydyh = 1.0 / 12.0 /( dYh * dYh ); double Fdzdzh = 1.0 / 12.0 /( dZh * dZh ); double Sdxdyh = 1.0/4.0 /( dXh * dYh ); double Sdxdzh = 1.0/4.0 /( dXh * dZh ); - double Sdydzh = 1.0/4.0 /( dYh * dZh ); - double Fdxdyh = 1.0/144.0 /( dXh * dYh ); - double Fdxdzh = 1.0/144.0 /( dXh * dZh ); - double Fdydzh = 1.0/144.0 /( dYh * dZh ); - cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double)); - cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double)); - cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double)); - cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double)); - cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double)); - cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double)); - cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double)); - cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double)); - cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double)); - cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double)); - cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double)); - cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double)); + double Sdydzh = 1.0/4.0 /( dYh * dZh ); + double Fdxdyh = 1.0/144.0 /( dXh * dYh ); + double Fdxdzh = 1.0/144.0 /( dXh * dZh ); + double Fdydzh = 1.0/144.0 /( dYh * dZh ); + const bool need_rhs_symbol_upload = + !cache.rhs_symbols_uploaded || + cache.rhs_symbol_x != X || + cache.rhs_symbol_y != Y || + cache.rhs_symbol_z != Z || + cache.rhs_symbol_symmetry != Symmetry || + cache.rhs_symbol_lev != Lev || + cache.rhs_symbol_co != effective_co || + cache.rhs_symbol_eps != eps; + if (need_rhs_symbol_upload) + { + cudaMemcpyToSymbol(Symmetry_c,&Symmetry, sizeof(int)); + cudaMemcpyToSymbol(Lev_c,&Lev, sizeof(int)); + cudaMemcpyToSymbol(co_c,&effective_co, sizeof(int)); + cudaMemcpyToSymbol(eps_c,&eps, sizeof(double)); + cudaMemcpyToSymbol(dX,&dXh, sizeof(double)); + cudaMemcpyToSymbol(dY,&dYh, sizeof(double)); + cudaMemcpyToSymbol(dZ,&dZh, sizeof(double)); + cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int)); + cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int)); + cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int)); + cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int)); + cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double)); + cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double)); + cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double)); + cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double)); + cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double)); + cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double)); + cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double)); + cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double)); + cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double)); + cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double)); + cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double)); + cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double)); + cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double)); + cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double)); + cache.rhs_symbol_x = X; + cache.rhs_symbol_y = Y; + cache.rhs_symbol_z = Z; + cache.rhs_symbol_symmetry = Symmetry; + cache.rhs_symbol_lev = Lev; + cache.rhs_symbol_co = effective_co; + cache.rhs_symbol_eps = eps; + cache.rhs_symbols_uploaded = true; + } //3.4---------for lopsided---------------------------