Cache GPU RHS symbols and zero vacuum sources once
This commit is contained in:
@@ -158,6 +158,15 @@ struct GpuRhsCache
|
|||||||
const double *last_y = nullptr;
|
const double *last_y = nullptr;
|
||||||
const double *last_z = nullptr;
|
const double *last_z = nullptr;
|
||||||
bool meta_uploaded = false;
|
bool meta_uploaded = false;
|
||||||
|
const double *rhs_symbol_x = nullptr;
|
||||||
|
const double *rhs_symbol_y = nullptr;
|
||||||
|
const double *rhs_symbol_z = nullptr;
|
||||||
|
int rhs_symbol_symmetry = -1;
|
||||||
|
int rhs_symbol_lev = -1;
|
||||||
|
int rhs_symbol_co = -1;
|
||||||
|
double rhs_symbol_eps = 0.0;
|
||||||
|
bool rhs_symbols_uploaded = false;
|
||||||
|
bool matter_sources_zeroed = false;
|
||||||
static const int max_mapped_buffers = 512;
|
static const int max_mapped_buffers = 512;
|
||||||
const double *host_buffers[max_mapped_buffers] = {nullptr};
|
const double *host_buffers[max_mapped_buffers] = {nullptr};
|
||||||
const double *device_buffers[max_mapped_buffers] = {nullptr};
|
const double *device_buffers[max_mapped_buffers] = {nullptr};
|
||||||
@@ -639,6 +648,15 @@ void cleanup_gpu_rhs_cache()
|
|||||||
cache.last_x = nullptr;
|
cache.last_x = nullptr;
|
||||||
cache.last_y = nullptr;
|
cache.last_y = nullptr;
|
||||||
cache.last_z = nullptr;
|
cache.last_z = nullptr;
|
||||||
|
cache.rhs_symbol_x = nullptr;
|
||||||
|
cache.rhs_symbol_y = nullptr;
|
||||||
|
cache.rhs_symbol_z = nullptr;
|
||||||
|
cache.rhs_symbol_symmetry = -1;
|
||||||
|
cache.rhs_symbol_lev = -1;
|
||||||
|
cache.rhs_symbol_co = -1;
|
||||||
|
cache.rhs_symbol_eps = 0.0;
|
||||||
|
cache.rhs_symbols_uploaded = false;
|
||||||
|
cache.matter_sources_zeroed = false;
|
||||||
reset_buffer_map(cache);
|
reset_buffer_map(cache);
|
||||||
reset_external_buffer_map(external_buffer_registry());
|
reset_external_buffer_map(external_buffer_registry());
|
||||||
|
|
||||||
@@ -804,6 +822,15 @@ bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex)
|
|||||||
cache.last_y = nullptr;
|
cache.last_y = nullptr;
|
||||||
cache.last_z = nullptr;
|
cache.last_z = nullptr;
|
||||||
cache.meta_uploaded = false;
|
cache.meta_uploaded = false;
|
||||||
|
cache.rhs_symbol_x = nullptr;
|
||||||
|
cache.rhs_symbol_y = nullptr;
|
||||||
|
cache.rhs_symbol_z = nullptr;
|
||||||
|
cache.rhs_symbol_symmetry = -1;
|
||||||
|
cache.rhs_symbol_lev = -1;
|
||||||
|
cache.rhs_symbol_co = -1;
|
||||||
|
cache.rhs_symbol_eps = 0.0;
|
||||||
|
cache.rhs_symbols_uploaded = false;
|
||||||
|
cache.matter_sources_zeroed = false;
|
||||||
reset_buffer_map(cache);
|
reset_buffer_map(cache);
|
||||||
|
|
||||||
Meta *meta = &cache.meta;
|
Meta *meta = &cache.meta;
|
||||||
@@ -987,6 +1014,15 @@ bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex)
|
|||||||
cache.last_x = nullptr;
|
cache.last_x = nullptr;
|
||||||
cache.last_y = nullptr;
|
cache.last_y = nullptr;
|
||||||
cache.last_z = nullptr;
|
cache.last_z = nullptr;
|
||||||
|
cache.rhs_symbol_x = nullptr;
|
||||||
|
cache.rhs_symbol_y = nullptr;
|
||||||
|
cache.rhs_symbol_z = nullptr;
|
||||||
|
cache.rhs_symbol_symmetry = -1;
|
||||||
|
cache.rhs_symbol_lev = -1;
|
||||||
|
cache.rhs_symbol_co = -1;
|
||||||
|
cache.rhs_symbol_eps = 0.0;
|
||||||
|
cache.rhs_symbols_uploaded = false;
|
||||||
|
cache.matter_sources_zeroed = false;
|
||||||
reset_buffer_map(cache);
|
reset_buffer_map(cache);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -3903,10 +3939,14 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
|||||||
{Mh_ Sy, static_cast<size_t>(matrix_size)},
|
{Mh_ Sy, static_cast<size_t>(matrix_size)},
|
||||||
{Mh_ Sz, static_cast<size_t>(matrix_size)},
|
{Mh_ Sz, static_cast<size_t>(matrix_size)},
|
||||||
};
|
};
|
||||||
if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0])))
|
if (!cache.matter_sources_zeroed)
|
||||||
{
|
{
|
||||||
*meta = saved_meta;
|
if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0])))
|
||||||
return 1;
|
{
|
||||||
|
*meta = saved_meta;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
cache.matter_sources_zeroed = true;
|
||||||
}
|
}
|
||||||
map_buffer(cache, chi, Mh_ chi);
|
map_buffer(cache, chi, Mh_ chi);
|
||||||
map_buffer(cache, trK, Mh_ trK);
|
map_buffer(cache, trK, Mh_ trK);
|
||||||
@@ -4012,22 +4052,14 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
|||||||
//3.1-----for compute_rhs_bssn---------
|
//3.1-----for compute_rhs_bssn---------
|
||||||
//cout<<"Size of Meta:"<<sizeof(Meta)<<endl;
|
//cout<<"Size of Meta:"<<sizeof(Meta)<<endl;
|
||||||
cudaMemcpyToSymbol(T_c,&T, sizeof(double));
|
cudaMemcpyToSymbol(T_c,&T, sizeof(double));
|
||||||
cudaMemcpyToSymbol(Symmetry_c,&Symmetry, sizeof(int));
|
|
||||||
cudaMemcpyToSymbol(Lev_c,&Lev, sizeof(int));
|
|
||||||
cudaMemcpyToSymbol(co_c,&effective_co, sizeof(int));
|
|
||||||
cudaMemcpyToSymbol(eps_c,&eps, sizeof(double));
|
|
||||||
|
|
||||||
double dXh = X[1] - X[0];
|
double dXh = X[1] - X[0];
|
||||||
double dYh = Y[1] - Y[0];
|
double dYh = Y[1] - Y[0];
|
||||||
double dZh = Z[1] - Z[0];
|
double dZh = Z[1] - Z[0];
|
||||||
|
|
||||||
|
|
||||||
cudaMemcpyToSymbol(dX,&dXh, sizeof(double));
|
//3.2--------for fderivs------------
|
||||||
cudaMemcpyToSymbol(dY,&dYh, sizeof(double));
|
int ijkmax_h[3] = {ex[0]-1,ex[1]-1,ex[2]-1};
|
||||||
cudaMemcpyToSymbol(dZ,&dZh, sizeof(double));
|
|
||||||
|
|
||||||
|
|
||||||
//3.2--------for fderivs------------
|
|
||||||
int ijkmax_h[3] = {ex[0]-1,ex[1]-1,ex[2]-1};
|
|
||||||
int ijkmin_h[3] = {0,0,0};
|
int ijkmin_h[3] = {0,0,0};
|
||||||
int ijkmin2_h[3] = {0,0,0};
|
int ijkmin2_h[3] = {0,0,0};
|
||||||
int ijkmin3_h[3] = {0,0,0};
|
int ijkmin3_h[3] = {0,0,0};
|
||||||
@@ -4040,50 +4072,75 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
|||||||
if(Symmetry > 1 && abs[1] < dYh) {ijkmin_h[1] = -2; ijkmin2_h[1] = -3;}
|
if(Symmetry > 1 && abs[1] < dYh) {ijkmin_h[1] = -2; ijkmin2_h[1] = -3;}
|
||||||
if(Symmetry > 0 && abs[2] < dZh) {ijkmin_h[2] = -2; ijkmin2_h[2] = -3;}
|
if(Symmetry > 0 && abs[2] < dZh) {ijkmin_h[2] = -2; ijkmin2_h[2] = -3;}
|
||||||
|
|
||||||
if(Symmetry > 2 && abs[0] < dXh) {ijkmin3_h[0] = -3;}
|
if(Symmetry > 2 && abs[0] < dXh) {ijkmin3_h[0] = -3;}
|
||||||
if(Symmetry > 2 && abs[1] < dYh) {ijkmin3_h[1] = -3;}
|
if(Symmetry > 2 && abs[1] < dYh) {ijkmin3_h[1] = -3;}
|
||||||
if(Symmetry > 0 && abs[2] < dZh) {ijkmin3_h[2] = -3;}
|
if(Symmetry > 0 && abs[2] < dZh) {ijkmin3_h[2] = -3;}
|
||||||
|
|
||||||
cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int));
|
double d12dxyz_h[3] = {1.0,1.0,1.0};
|
||||||
cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int));
|
double d2dxyz_h[3] = {1.0,1.0,1.0};
|
||||||
cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int));
|
d12dxyz_h[0] /= 12; d12dxyz_h[1] /= 12; d12dxyz_h[2] /= 12;
|
||||||
cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int));
|
d12dxyz_h[0] /= dXh; d12dxyz_h[1] /= dYh; d12dxyz_h[2] /= dZh;
|
||||||
|
d2dxyz_h[0] /= 2; d2dxyz_h[1] /= 2; d2dxyz_h[2] /= 2;
|
||||||
double d12dxyz_h[3] = {1.0,1.0,1.0};
|
d2dxyz_h[0] /= dXh; d2dxyz_h[1] /= dYh; d2dxyz_h[2] /= dZh;
|
||||||
double d2dxyz_h[3] = {1.0,1.0,1.0};
|
|
||||||
d12dxyz_h[0] /= 12; d12dxyz_h[1] /= 12; d12dxyz_h[2] /= 12;
|
//3.3--------for fdderivs------------
|
||||||
d12dxyz_h[0] /= dXh; d12dxyz_h[1] /= dYh; d12dxyz_h[2] /= dZh;
|
double Sdxdxh = 1.0 /( dXh * dXh );
|
||||||
d2dxyz_h[0] /= 2; d2dxyz_h[1] /= 2; d2dxyz_h[2] /= 2;
|
double Sdydyh = 1.0 /( dYh * dYh );
|
||||||
d2dxyz_h[0] /= dXh; d2dxyz_h[1] /= dYh; d2dxyz_h[2] /= dZh;
|
|
||||||
|
|
||||||
cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double));
|
|
||||||
cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double));
|
|
||||||
|
|
||||||
//3.3--------for fdderivs------------
|
|
||||||
double Sdxdxh = 1.0 /( dXh * dXh );
|
|
||||||
double Sdydyh = 1.0 /( dYh * dYh );
|
|
||||||
double Sdzdzh = 1.0 /( dZh * dZh );
|
double Sdzdzh = 1.0 /( dZh * dZh );
|
||||||
double Fdxdxh = 1.0 / 12.0 /( dXh * dXh );
|
double Fdxdxh = 1.0 / 12.0 /( dXh * dXh );
|
||||||
double Fdydyh = 1.0 / 12.0 /( dYh * dYh );
|
double Fdydyh = 1.0 / 12.0 /( dYh * dYh );
|
||||||
double Fdzdzh = 1.0 / 12.0 /( dZh * dZh );
|
double Fdzdzh = 1.0 / 12.0 /( dZh * dZh );
|
||||||
double Sdxdyh = 1.0/4.0 /( dXh * dYh );
|
double Sdxdyh = 1.0/4.0 /( dXh * dYh );
|
||||||
double Sdxdzh = 1.0/4.0 /( dXh * dZh );
|
double Sdxdzh = 1.0/4.0 /( dXh * dZh );
|
||||||
double Sdydzh = 1.0/4.0 /( dYh * dZh );
|
double Sdydzh = 1.0/4.0 /( dYh * dZh );
|
||||||
double Fdxdyh = 1.0/144.0 /( dXh * dYh );
|
double Fdxdyh = 1.0/144.0 /( dXh * dYh );
|
||||||
double Fdxdzh = 1.0/144.0 /( dXh * dZh );
|
double Fdxdzh = 1.0/144.0 /( dXh * dZh );
|
||||||
double Fdydzh = 1.0/144.0 /( dYh * dZh );
|
double Fdydzh = 1.0/144.0 /( dYh * dZh );
|
||||||
cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double));
|
const bool need_rhs_symbol_upload =
|
||||||
cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double));
|
!cache.rhs_symbols_uploaded ||
|
||||||
cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double));
|
cache.rhs_symbol_x != X ||
|
||||||
cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double));
|
cache.rhs_symbol_y != Y ||
|
||||||
cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double));
|
cache.rhs_symbol_z != Z ||
|
||||||
cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double));
|
cache.rhs_symbol_symmetry != Symmetry ||
|
||||||
cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double));
|
cache.rhs_symbol_lev != Lev ||
|
||||||
cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double));
|
cache.rhs_symbol_co != effective_co ||
|
||||||
cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double));
|
cache.rhs_symbol_eps != eps;
|
||||||
cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double));
|
if (need_rhs_symbol_upload)
|
||||||
cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double));
|
{
|
||||||
cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double));
|
cudaMemcpyToSymbol(Symmetry_c,&Symmetry, sizeof(int));
|
||||||
|
cudaMemcpyToSymbol(Lev_c,&Lev, sizeof(int));
|
||||||
|
cudaMemcpyToSymbol(co_c,&effective_co, sizeof(int));
|
||||||
|
cudaMemcpyToSymbol(eps_c,&eps, sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(dX,&dXh, sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(dY,&dYh, sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(dZ,&dZh, sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int));
|
||||||
|
cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int));
|
||||||
|
cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int));
|
||||||
|
cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int));
|
||||||
|
cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double));
|
||||||
|
cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double));
|
||||||
|
cache.rhs_symbol_x = X;
|
||||||
|
cache.rhs_symbol_y = Y;
|
||||||
|
cache.rhs_symbol_z = Z;
|
||||||
|
cache.rhs_symbol_symmetry = Symmetry;
|
||||||
|
cache.rhs_symbol_lev = Lev;
|
||||||
|
cache.rhs_symbol_co = effective_co;
|
||||||
|
cache.rhs_symbol_eps = eps;
|
||||||
|
cache.rhs_symbols_uploaded = true;
|
||||||
|
}
|
||||||
|
|
||||||
//3.4---------for lopsided---------------------------
|
//3.4---------for lopsided---------------------------
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user