Cache GPU RHS symbols and zero vacuum sources once
This commit is contained in:
@@ -158,6 +158,15 @@ struct GpuRhsCache
|
||||
const double *last_y = nullptr;
|
||||
const double *last_z = nullptr;
|
||||
bool meta_uploaded = false;
|
||||
const double *rhs_symbol_x = nullptr;
|
||||
const double *rhs_symbol_y = nullptr;
|
||||
const double *rhs_symbol_z = nullptr;
|
||||
int rhs_symbol_symmetry = -1;
|
||||
int rhs_symbol_lev = -1;
|
||||
int rhs_symbol_co = -1;
|
||||
double rhs_symbol_eps = 0.0;
|
||||
bool rhs_symbols_uploaded = false;
|
||||
bool matter_sources_zeroed = false;
|
||||
static const int max_mapped_buffers = 512;
|
||||
const double *host_buffers[max_mapped_buffers] = {nullptr};
|
||||
const double *device_buffers[max_mapped_buffers] = {nullptr};
|
||||
@@ -639,6 +648,15 @@ void cleanup_gpu_rhs_cache()
|
||||
cache.last_x = nullptr;
|
||||
cache.last_y = nullptr;
|
||||
cache.last_z = nullptr;
|
||||
cache.rhs_symbol_x = nullptr;
|
||||
cache.rhs_symbol_y = nullptr;
|
||||
cache.rhs_symbol_z = nullptr;
|
||||
cache.rhs_symbol_symmetry = -1;
|
||||
cache.rhs_symbol_lev = -1;
|
||||
cache.rhs_symbol_co = -1;
|
||||
cache.rhs_symbol_eps = 0.0;
|
||||
cache.rhs_symbols_uploaded = false;
|
||||
cache.matter_sources_zeroed = false;
|
||||
reset_buffer_map(cache);
|
||||
reset_external_buffer_map(external_buffer_registry());
|
||||
|
||||
@@ -804,6 +822,15 @@ bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex)
|
||||
cache.last_y = nullptr;
|
||||
cache.last_z = nullptr;
|
||||
cache.meta_uploaded = false;
|
||||
cache.rhs_symbol_x = nullptr;
|
||||
cache.rhs_symbol_y = nullptr;
|
||||
cache.rhs_symbol_z = nullptr;
|
||||
cache.rhs_symbol_symmetry = -1;
|
||||
cache.rhs_symbol_lev = -1;
|
||||
cache.rhs_symbol_co = -1;
|
||||
cache.rhs_symbol_eps = 0.0;
|
||||
cache.rhs_symbols_uploaded = false;
|
||||
cache.matter_sources_zeroed = false;
|
||||
reset_buffer_map(cache);
|
||||
|
||||
Meta *meta = &cache.meta;
|
||||
@@ -987,6 +1014,15 @@ bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex)
|
||||
cache.last_x = nullptr;
|
||||
cache.last_y = nullptr;
|
||||
cache.last_z = nullptr;
|
||||
cache.rhs_symbol_x = nullptr;
|
||||
cache.rhs_symbol_y = nullptr;
|
||||
cache.rhs_symbol_z = nullptr;
|
||||
cache.rhs_symbol_symmetry = -1;
|
||||
cache.rhs_symbol_lev = -1;
|
||||
cache.rhs_symbol_co = -1;
|
||||
cache.rhs_symbol_eps = 0.0;
|
||||
cache.rhs_symbols_uploaded = false;
|
||||
cache.matter_sources_zeroed = false;
|
||||
reset_buffer_map(cache);
|
||||
return false;
|
||||
}
|
||||
@@ -3903,10 +3939,14 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
{Mh_ Sy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Sz, static_cast<size_t>(matrix_size)},
|
||||
};
|
||||
if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0])))
|
||||
if (!cache.matter_sources_zeroed)
|
||||
{
|
||||
*meta = saved_meta;
|
||||
return 1;
|
||||
if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0])))
|
||||
{
|
||||
*meta = saved_meta;
|
||||
return 1;
|
||||
}
|
||||
cache.matter_sources_zeroed = true;
|
||||
}
|
||||
map_buffer(cache, chi, Mh_ chi);
|
||||
map_buffer(cache, trK, Mh_ trK);
|
||||
@@ -4012,19 +4052,11 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
//3.1-----for compute_rhs_bssn---------
|
||||
//cout<<"Size of Meta:"<<sizeof(Meta)<<endl;
|
||||
cudaMemcpyToSymbol(T_c,&T, sizeof(double));
|
||||
cudaMemcpyToSymbol(Symmetry_c,&Symmetry, sizeof(int));
|
||||
cudaMemcpyToSymbol(Lev_c,&Lev, sizeof(int));
|
||||
cudaMemcpyToSymbol(co_c,&effective_co, sizeof(int));
|
||||
cudaMemcpyToSymbol(eps_c,&eps, sizeof(double));
|
||||
|
||||
double dXh = X[1] - X[0];
|
||||
double dYh = Y[1] - Y[0];
|
||||
double dZh = Z[1] - Z[0];
|
||||
|
||||
cudaMemcpyToSymbol(dX,&dXh, sizeof(double));
|
||||
cudaMemcpyToSymbol(dY,&dYh, sizeof(double));
|
||||
cudaMemcpyToSymbol(dZ,&dZh, sizeof(double));
|
||||
|
||||
|
||||
//3.2--------for fderivs------------
|
||||
int ijkmax_h[3] = {ex[0]-1,ex[1]-1,ex[2]-1};
|
||||
@@ -4044,11 +4076,6 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
if(Symmetry > 2 && abs[1] < dYh) {ijkmin3_h[1] = -3;}
|
||||
if(Symmetry > 0 && abs[2] < dZh) {ijkmin3_h[2] = -3;}
|
||||
|
||||
cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int));
|
||||
cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int));
|
||||
cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int));
|
||||
cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int));
|
||||
|
||||
double d12dxyz_h[3] = {1.0,1.0,1.0};
|
||||
double d2dxyz_h[3] = {1.0,1.0,1.0};
|
||||
d12dxyz_h[0] /= 12; d12dxyz_h[1] /= 12; d12dxyz_h[2] /= 12;
|
||||
@@ -4056,9 +4083,6 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
d2dxyz_h[0] /= 2; d2dxyz_h[1] /= 2; d2dxyz_h[2] /= 2;
|
||||
d2dxyz_h[0] /= dXh; d2dxyz_h[1] /= dYh; d2dxyz_h[2] /= dZh;
|
||||
|
||||
cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double));
|
||||
cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double));
|
||||
|
||||
//3.3--------for fdderivs------------
|
||||
double Sdxdxh = 1.0 /( dXh * dXh );
|
||||
double Sdydyh = 1.0 /( dYh * dYh );
|
||||
@@ -4072,18 +4096,51 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
double Fdxdyh = 1.0/144.0 /( dXh * dYh );
|
||||
double Fdxdzh = 1.0/144.0 /( dXh * dZh );
|
||||
double Fdydzh = 1.0/144.0 /( dYh * dZh );
|
||||
cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double));
|
||||
const bool need_rhs_symbol_upload =
|
||||
!cache.rhs_symbols_uploaded ||
|
||||
cache.rhs_symbol_x != X ||
|
||||
cache.rhs_symbol_y != Y ||
|
||||
cache.rhs_symbol_z != Z ||
|
||||
cache.rhs_symbol_symmetry != Symmetry ||
|
||||
cache.rhs_symbol_lev != Lev ||
|
||||
cache.rhs_symbol_co != effective_co ||
|
||||
cache.rhs_symbol_eps != eps;
|
||||
if (need_rhs_symbol_upload)
|
||||
{
|
||||
cudaMemcpyToSymbol(Symmetry_c,&Symmetry, sizeof(int));
|
||||
cudaMemcpyToSymbol(Lev_c,&Lev, sizeof(int));
|
||||
cudaMemcpyToSymbol(co_c,&effective_co, sizeof(int));
|
||||
cudaMemcpyToSymbol(eps_c,&eps, sizeof(double));
|
||||
cudaMemcpyToSymbol(dX,&dXh, sizeof(double));
|
||||
cudaMemcpyToSymbol(dY,&dYh, sizeof(double));
|
||||
cudaMemcpyToSymbol(dZ,&dZh, sizeof(double));
|
||||
cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int));
|
||||
cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int));
|
||||
cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int));
|
||||
cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int));
|
||||
cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double));
|
||||
cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double));
|
||||
cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double));
|
||||
cache.rhs_symbol_x = X;
|
||||
cache.rhs_symbol_y = Y;
|
||||
cache.rhs_symbol_z = Z;
|
||||
cache.rhs_symbol_symmetry = Symmetry;
|
||||
cache.rhs_symbol_lev = Lev;
|
||||
cache.rhs_symbol_co = effective_co;
|
||||
cache.rhs_symbol_eps = eps;
|
||||
cache.rhs_symbols_uploaded = true;
|
||||
}
|
||||
|
||||
//3.4---------for lopsided---------------------------
|
||||
|
||||
|
||||
Reference in New Issue
Block a user