Cache GPU RHS symbols and zero vacuum sources once

This commit is contained in:
2026-04-12 22:42:58 +08:00
parent 7064ebd5b4
commit 6fd7ef2b55

View File

@@ -158,6 +158,15 @@ struct GpuRhsCache
const double *last_y = nullptr;
const double *last_z = nullptr;
bool meta_uploaded = false;
const double *rhs_symbol_x = nullptr;
const double *rhs_symbol_y = nullptr;
const double *rhs_symbol_z = nullptr;
int rhs_symbol_symmetry = -1;
int rhs_symbol_lev = -1;
int rhs_symbol_co = -1;
double rhs_symbol_eps = 0.0;
bool rhs_symbols_uploaded = false;
bool matter_sources_zeroed = false;
static const int max_mapped_buffers = 512;
const double *host_buffers[max_mapped_buffers] = {nullptr};
const double *device_buffers[max_mapped_buffers] = {nullptr};
@@ -639,6 +648,15 @@ void cleanup_gpu_rhs_cache()
cache.last_x = nullptr;
cache.last_y = nullptr;
cache.last_z = nullptr;
cache.rhs_symbol_x = nullptr;
cache.rhs_symbol_y = nullptr;
cache.rhs_symbol_z = nullptr;
cache.rhs_symbol_symmetry = -1;
cache.rhs_symbol_lev = -1;
cache.rhs_symbol_co = -1;
cache.rhs_symbol_eps = 0.0;
cache.rhs_symbols_uploaded = false;
cache.matter_sources_zeroed = false;
reset_buffer_map(cache);
reset_external_buffer_map(external_buffer_registry());
@@ -804,6 +822,15 @@ bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex)
cache.last_y = nullptr;
cache.last_z = nullptr;
cache.meta_uploaded = false;
cache.rhs_symbol_x = nullptr;
cache.rhs_symbol_y = nullptr;
cache.rhs_symbol_z = nullptr;
cache.rhs_symbol_symmetry = -1;
cache.rhs_symbol_lev = -1;
cache.rhs_symbol_co = -1;
cache.rhs_symbol_eps = 0.0;
cache.rhs_symbols_uploaded = false;
cache.matter_sources_zeroed = false;
reset_buffer_map(cache);
Meta *meta = &cache.meta;
@@ -987,6 +1014,15 @@ bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex)
cache.last_x = nullptr;
cache.last_y = nullptr;
cache.last_z = nullptr;
cache.rhs_symbol_x = nullptr;
cache.rhs_symbol_y = nullptr;
cache.rhs_symbol_z = nullptr;
cache.rhs_symbol_symmetry = -1;
cache.rhs_symbol_lev = -1;
cache.rhs_symbol_co = -1;
cache.rhs_symbol_eps = 0.0;
cache.rhs_symbols_uploaded = false;
cache.matter_sources_zeroed = false;
reset_buffer_map(cache);
return false;
}
@@ -3903,10 +3939,14 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
{Mh_ Sy, static_cast<size_t>(matrix_size)},
{Mh_ Sz, static_cast<size_t>(matrix_size)},
};
if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0])))
if (!cache.matter_sources_zeroed)
{
*meta = saved_meta;
return 1;
if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0])))
{
*meta = saved_meta;
return 1;
}
cache.matter_sources_zeroed = true;
}
map_buffer(cache, chi, Mh_ chi);
map_buffer(cache, trK, Mh_ trK);
@@ -4012,19 +4052,11 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
//3.1-----for compute_rhs_bssn---------
//cout<<"Size of Meta:"<<sizeof(Meta)<<endl;
cudaMemcpyToSymbol(T_c,&T, sizeof(double));
cudaMemcpyToSymbol(Symmetry_c,&Symmetry, sizeof(int));
cudaMemcpyToSymbol(Lev_c,&Lev, sizeof(int));
cudaMemcpyToSymbol(co_c,&effective_co, sizeof(int));
cudaMemcpyToSymbol(eps_c,&eps, sizeof(double));
double dXh = X[1] - X[0];
double dYh = Y[1] - Y[0];
double dZh = Z[1] - Z[0];
cudaMemcpyToSymbol(dX,&dXh, sizeof(double));
cudaMemcpyToSymbol(dY,&dYh, sizeof(double));
cudaMemcpyToSymbol(dZ,&dZh, sizeof(double));
//3.2--------for fderivs------------
int ijkmax_h[3] = {ex[0]-1,ex[1]-1,ex[2]-1};
@@ -4044,11 +4076,6 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
if(Symmetry > 2 && abs[1] < dYh) {ijkmin3_h[1] = -3;}
if(Symmetry > 0 && abs[2] < dZh) {ijkmin3_h[2] = -3;}
cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int));
cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int));
cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int));
cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int));
double d12dxyz_h[3] = {1.0,1.0,1.0};
double d2dxyz_h[3] = {1.0,1.0,1.0};
d12dxyz_h[0] /= 12; d12dxyz_h[1] /= 12; d12dxyz_h[2] /= 12;
@@ -4056,9 +4083,6 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
d2dxyz_h[0] /= 2; d2dxyz_h[1] /= 2; d2dxyz_h[2] /= 2;
d2dxyz_h[0] /= dXh; d2dxyz_h[1] /= dYh; d2dxyz_h[2] /= dZh;
cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double));
cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double));
//3.3--------for fdderivs------------
double Sdxdxh = 1.0 /( dXh * dXh );
double Sdydyh = 1.0 /( dYh * dYh );
@@ -4072,18 +4096,51 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
double Fdxdyh = 1.0/144.0 /( dXh * dYh );
double Fdxdzh = 1.0/144.0 /( dXh * dZh );
double Fdydzh = 1.0/144.0 /( dYh * dZh );
cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double));
cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double));
cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double));
cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double));
cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double));
cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double));
cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double));
cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double));
cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double));
cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double));
cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double));
cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double));
const bool need_rhs_symbol_upload =
!cache.rhs_symbols_uploaded ||
cache.rhs_symbol_x != X ||
cache.rhs_symbol_y != Y ||
cache.rhs_symbol_z != Z ||
cache.rhs_symbol_symmetry != Symmetry ||
cache.rhs_symbol_lev != Lev ||
cache.rhs_symbol_co != effective_co ||
cache.rhs_symbol_eps != eps;
if (need_rhs_symbol_upload)
{
cudaMemcpyToSymbol(Symmetry_c,&Symmetry, sizeof(int));
cudaMemcpyToSymbol(Lev_c,&Lev, sizeof(int));
cudaMemcpyToSymbol(co_c,&effective_co, sizeof(int));
cudaMemcpyToSymbol(eps_c,&eps, sizeof(double));
cudaMemcpyToSymbol(dX,&dXh, sizeof(double));
cudaMemcpyToSymbol(dY,&dYh, sizeof(double));
cudaMemcpyToSymbol(dZ,&dZh, sizeof(double));
cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int));
cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int));
cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int));
cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int));
cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double));
cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double));
cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double));
cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double));
cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double));
cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double));
cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double));
cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double));
cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double));
cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double));
cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double));
cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double));
cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double));
cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double));
cache.rhs_symbol_x = X;
cache.rhs_symbol_y = Y;
cache.rhs_symbol_z = Z;
cache.rhs_symbol_symmetry = Symmetry;
cache.rhs_symbol_lev = Lev;
cache.rhs_symbol_co = effective_co;
cache.rhs_symbol_eps = eps;
cache.rhs_symbols_uploaded = true;
}
//3.4---------for lopsided---------------------------