Cache repeated interpolation plans

This commit is contained in:
2026-04-09 15:21:01 +08:00
parent 06fa643365
commit 42e851d19a
4 changed files with 428 additions and 370 deletions

View File

@@ -2,13 +2,14 @@
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstdlib>
#include <cstdio>
#include <string>
#include <cmath>
#include <new>
#include <vector>
using namespace std;
#include <cstdlib>
#include <cstdio>
#include <string>
#include <cmath>
#include <new>
#include <map>
#include <vector>
using namespace std;
#include "misc.h"
#include "MPatch.h"
@@ -40,6 +41,44 @@ struct InterpVarDesc
double soa[dim];
};
struct InterpPlanKey
{
const Patch *patch;
const double *x;
const double *y;
const double *z;
int NN;
int Symmetry;
int myrank;
};
struct InterpPlanKeyLess
{
bool operator()(const InterpPlanKey &lhs, const InterpPlanKey &rhs) const
{
if (lhs.patch != rhs.patch) return lhs.patch < rhs.patch;
if (lhs.x != rhs.x) return lhs.x < rhs.x;
if (lhs.y != rhs.y) return lhs.y < rhs.y;
if (lhs.z != rhs.z) return lhs.z < rhs.z;
if (lhs.NN != rhs.NN) return lhs.NN < rhs.NN;
if (lhs.Symmetry != rhs.Symmetry) return lhs.Symmetry < rhs.Symmetry;
return lhs.myrank < rhs.myrank;
}
};
struct CachedInterpPlan
{
int nblocks;
vector<int> owner_rank;
vector<int> owner_block;
vector<vector<int> > block_points;
vector<vector<double> > block_px;
vector<vector<double> > block_py;
vector<vector<double> > block_pz;
CachedInterpPlan() : nblocks(0) {}
};
struct InterpBlockView
{
Block *bp;
@@ -229,10 +268,124 @@ bool should_try_cuda_interp(int ordn, int num_points, int num_var)
return num_points * num_var >= 256;
}
CachedInterpPlan &get_cached_interp_plan(Patch *patch,
int NN, double **XX,
int Symmetry, int myrank,
const double *DH,
const BlockBinIndex &block_index,
bool report_bounds_here,
bool allow_missing_points)
{
static map<InterpPlanKey, CachedInterpPlan, InterpPlanKeyLess> cache;
InterpPlanKey key;
key.patch = patch;
key.x = XX[0];
key.y = XX[1];
key.z = XX[2];
key.NN = NN;
key.Symmetry = Symmetry;
key.myrank = myrank;
map<InterpPlanKey, CachedInterpPlan, InterpPlanKeyLess>::iterator it = cache.find(key);
if (it != cache.end() && it->second.nblocks == static_cast<int>(block_index.views.size()))
return it->second;
CachedInterpPlan &plan = cache[key];
plan = CachedInterpPlan();
plan.nblocks = static_cast<int>(block_index.views.size());
plan.owner_rank.assign(NN, -1);
plan.owner_block.assign(NN, -1);
plan.block_points.resize(plan.nblocks);
plan.block_px.resize(plan.nblocks);
plan.block_py.resize(plan.nblocks);
plan.block_pz.resize(plan.nblocks);
for (int j = 0; j < NN; ++j)
{
double pox[dim];
for (int i = 0; i < dim; ++i)
{
pox[i] = XX[i][j];
if (report_bounds_here &&
(XX[i][j] < patch->bbox[i] + patch->lli[i] * DH[i] ||
XX[i][j] > patch->bbox[dim + i] - patch->uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; ++k)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
plan.owner_rank[j] = BP->rank;
plan.owner_block[j] = block_i;
if (BP->rank == myrank)
{
plan.block_points[block_i].push_back(j);
plan.block_px[block_i].push_back(XX[0][j]);
plan.block_py[block_i].push_back(XX[1][j]);
plan.block_pz[block_i].push_back(XX[2][j]);
}
}
}
if (!allow_missing_points && report_bounds_here)
{
for (int j = 0; j < NN; ++j)
{
if (plan.owner_rank[j] >= 0)
continue;
cout << "ERROR: Patch::Interp_Points fails to find point (";
for (int d = 0; d < dim; ++d)
{
cout << XX[d][j];
if (d < dim - 1)
cout << ",";
else
cout << ")";
}
cout << " on Patch (";
for (int d = 0; d < dim; ++d)
{
cout << patch->bbox[d] << "+" << patch->lli[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")--";
}
cout << "(";
for (int d = 0; d < dim; ++d)
{
cout << patch->bbox[dim + d] << "-" << patch->uui[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")" << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
return plan;
}
bool run_cuda_interp_for_block(Block *BP,
const vector<InterpVarDesc> &vars,
const vector<int> &point_ids,
double **XX,
const vector<double> &px,
const vector<double> &py,
const vector<double> &pz,
double *Shellf,
int num_var,
int ordn,
@@ -251,15 +404,6 @@ bool run_cuda_interp_for_block(Block *BP,
}
const int npts = static_cast<int>(point_ids.size());
vector<double> px(npts), py(npts), pz(npts);
for (int p = 0; p < npts; ++p)
{
const int j = point_ids[p];
px[p] = XX[0][j];
py[p] = XX[1][j];
pz[p] = XX[2][j];
}
vector<double> out(static_cast<size_t>(npts) * static_cast<size_t>(num_var));
if (bssn_cuda_interp_points_batch(BP->shape,
BP->X[0], BP->X[1], BP->X[2],
@@ -286,7 +430,9 @@ bool run_cuda_interp_for_block(Block *BP,
void run_cpu_interp_for_block(Block *BP,
const vector<InterpVarDesc> &vars,
const vector<int> &point_ids,
double **XX,
const vector<double> &px,
const vector<double> &py,
const vector<double> &pz,
double *Shellf,
int num_var,
int ordn,
@@ -295,9 +441,9 @@ void run_cpu_interp_for_block(Block *BP,
for (size_t p = 0; p < point_ids.size(); ++p)
{
const int j = point_ids[p];
double x = XX[0][j];
double y = XX[1][j];
double z = XX[2][j];
double x = px[p];
double y = py[p];
double z = pz[p];
int ordn_local = ordn;
int symmetry_local = Symmetry;
for (int v = 0; v < num_var; ++v)
@@ -310,33 +456,34 @@ void run_cpu_interp_for_block(Block *BP,
}
void interpolate_owned_points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry,
int myrank, int ordn,
int ordn,
const BlockBinIndex &block_index,
const int *owner_rank,
const int *owner_block)
const CachedInterpPlan &plan)
{
vector<InterpVarDesc> vars;
collect_interp_vars(VarList, vars);
const int num_var = static_cast<int>(vars.size());
vector<vector<int>> block_points(block_index.views.size());
for (int j = 0; j < NN; ++j)
for (size_t bi = 0; bi < plan.block_points.size(); ++bi)
{
if (owner_rank[j] == myrank && owner_block[j] >= 0)
block_points[owner_block[j]].push_back(j);
}
for (size_t bi = 0; bi < block_points.size(); ++bi)
{
if (block_points[bi].empty())
if (plan.block_points[bi].empty())
continue;
Block *BP = block_index.views[bi].bp;
bool done = run_cuda_interp_for_block(BP, vars, block_points[bi], XX, Shellf, num_var, ordn, Symmetry);
bool done = run_cuda_interp_for_block(BP, vars,
plan.block_points[bi],
plan.block_px[bi],
plan.block_py[bi],
plan.block_pz[bi],
Shellf, num_var, ordn, Symmetry);
if (!done)
run_cpu_interp_for_block(BP, vars, block_points[bi], XX, Shellf, num_var, ordn, Symmetry);
run_cpu_interp_for_block(BP, vars,
plan.block_points[bi],
plan.block_px[bi],
plan.block_py[bi],
plan.block_pz[bi],
Shellf, num_var, ordn, Symmetry);
}
}
} // namespace
@@ -684,55 +831,15 @@ void Patch::Interp_Points(MyList<var> *VarList,
memset(Shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] records which MPI rank owns point j
// All ranks traverse the same block list so they all agree on ownership
int *owner_rank;
owner_rank = new int[NN];
int *owner_block;
owner_block = new int[NN];
for (int j = 0; j < NN; j++)
{
owner_rank[j] = -1;
owner_block[j] = -1;
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points
{
double pox[dim];
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
owner_block[j] = block_i;
}
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, myrank == 0, false);
const int *owner_rank = plan.owner_rank.data();
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
// Replace MPI_Allreduce with per-owner MPI_Bcast:
// Group consecutive points by owner rank and broadcast each group.
@@ -788,8 +895,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
}
}
delete[] owner_rank;
delete[] owner_block;
}
void Patch::Interp_Points(MyList<var> *VarList,
int NN, double **XX,
@@ -818,98 +923,22 @@ void Patch::Interp_Points(MyList<var> *VarList,
memset(Shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] records which MPI rank owns point j
int *owner_rank;
owner_rank = new int[NN];
int *owner_block;
owner_block = new int[NN];
for (int j = 0; j < NN; j++)
{
owner_rank[j] = -1;
owner_block[j] = -1;
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
// --- Interpolation phase (identical to original) ---
for (int j = 0; j < NN; j++)
{
double pox[dim];
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
owner_block[j] = block_i;
}
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, myrank == 0, false);
const int *owner_rank = plan.owner_rank.data();
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
#ifdef INTERP_LB_PROFILE
double t_interp_end = MPI_Wtime();
double t_interp_local = t_interp_end - t_interp_start;
#endif
// --- Error check for unfound points ---
for (int j = 0; j < NN; j++)
{
if (owner_rank[j] < 0 && myrank == 0)
{
cout << "ERROR: Patch::Interp_Points fails to find point (";
for (int d = 0; d < dim; d++)
{
cout << XX[d][j];
if (d < dim - 1)
cout << ",";
else
cout << ")";
}
cout << " on Patch (";
for (int d = 0; d < dim; d++)
{
cout << bbox[d] << "+" << lli[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")--";
}
cout << "(";
for (int d = 0; d < dim; d++)
{
cout << bbox[dim + d] << "-" << uui[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")" << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// --- Targeted point-to-point communication phase ---
// --- Targeted point-to-point communication phase ---
// Compute consumer_rank[j] using the same deterministic formula as surface_integral
int *consumer_rank = new int[NN];
{
@@ -1028,8 +1057,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
delete[] send_count;
delete[] recv_count;
delete[] consumer_rank;
delete[] owner_rank;
delete[] owner_block;
#ifdef INTERP_LB_PROFILE
{
@@ -1077,59 +1104,20 @@ void Patch::Interp_Points(MyList<var> *VarList,
memset(Shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] stores the global rank that owns point j
int *owner_rank;
owner_rank = new int[NN];
int *owner_block;
owner_block = new int[NN];
for (int j = 0; j < NN; j++)
{
owner_rank[j] = -1;
owner_block[j] = -1;
}
// Build global-to-local rank translation for Comm_here
MPI_Group world_group, local_group;
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Comm_group(Comm_here, &local_group);
// Build global-to-local rank translation for Comm_here
MPI_Group world_group, local_group;
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Comm_group(Comm_here, &local_group);
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points
{
double pox[dim];
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (lmyrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
owner_block[j] = block_i;
}
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, lmyrank == 0, true);
const int *owner_rank = plan.owner_rank.data();
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
// Collect unique global owner ranks and translate to local ranks in Comm_here
// Then broadcast each owner's points via MPI_Bcast on Comm_here
@@ -1159,8 +1147,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
MPI_Group_free(&world_group);
MPI_Group_free(&local_group);
delete[] owner_rank;
delete[] owner_block;
}
void Patch::checkBlock()
{

View File

@@ -953,14 +953,36 @@ int bssn_cuda_interp_points_batch(const int *ex,
struct InterpBatchCache
{
struct StencilCacheEntry
{
const double *X;
const double *Y;
const double *Z;
const double *px;
const double *py;
const double *pz;
int nx;
int ny;
int nz;
int num_points;
int ordn;
int symmetry;
CachedBuffer weights;
CachedIntBuffer indices;
CachedIntBuffer reflect;
StencilCacheEntry()
: X(nullptr), Y(nullptr), Z(nullptr),
px(nullptr), py(nullptr), pz(nullptr),
nx(0), ny(0), nz(0), num_points(0), ordn(0), symmetry(0) {}
};
CachedBuffer out;
CachedBuffer soa;
CachedBuffer field_ptrs;
CachedBuffer weights;
CachedIntBuffer indices;
CachedIntBuffer reflect;
CachedIntBuffer error_flag;
std::vector<CachedBuffer> host_field_copies;
std::vector<StencilCacheEntry> stencil_entries;
};
static thread_local InterpBatchCache cache;
@@ -978,45 +1000,82 @@ int bssn_cuda_interp_points_batch(const int *ex,
const size_t indices_bytes = point_stencil_ints * sizeof(int);
bool ok = true;
std::vector<double> host_weights(point_stencil_doubles);
std::vector<int> host_indices(point_stencil_ints);
std::vector<int> host_reflect(point_stencil_ints);
const double dx = X[1] - X[0];
const double dy = Y[1] - Y[0];
const double dz = Z[1] - Z[0];
const int allow_reflect_x = (symmetry == 2 && std::fabs(X[0]) < dx);
const int allow_reflect_y = (symmetry == 2 && std::fabs(Y[0]) < dy);
const int allow_reflect_z = (symmetry != 0 && std::fabs(Z[0]) < dz);
for (int p = 0; p < num_points; ++p)
InterpBatchCache::StencilCacheEntry *stencil_cache = nullptr;
for (size_t i = 0; i < cache.stencil_entries.size(); ++i)
{
int start_x = 0, start_y = 0, start_z = 0;
double cx = 0.0, cy = 0.0, cz = 0.0;
const bool ok_x = compute_interp_window_host(px[p], X, nx, ordn, allow_reflect_x, &start_x, &cx);
const bool ok_y = compute_interp_window_host(py[p], Y, ny, ordn, allow_reflect_y, &start_y, &cy);
const bool ok_z = compute_interp_window_host(pz[p], Z, nz, ordn, allow_reflect_z, &start_z, &cz);
if (!ok_x || !ok_y || !ok_z)
return 1;
lagrange_weights_ord6_host(cx, host_weights.data() + p * 18);
lagrange_weights_ord6_host(cy, host_weights.data() + p * 18 + 6);
lagrange_weights_ord6_host(cz, host_weights.data() + p * 18 + 12);
for (int i = 0; i < 6; ++i)
InterpBatchCache::StencilCacheEntry &entry = cache.stencil_entries[i];
if (entry.X == X && entry.Y == Y && entry.Z == Z &&
entry.px == px && entry.py == py && entry.pz == pz &&
entry.nx == nx && entry.ny == ny && entry.nz == nz &&
entry.num_points == num_points && entry.ordn == ordn &&
entry.symmetry == symmetry)
{
if (!map_interp_index_host(start_x + i, nx, &host_indices[p * 18 + i], &host_reflect[p * 18 + i]))
return 1;
if (!map_interp_index_host(start_y + i, ny, &host_indices[p * 18 + 6 + i], &host_reflect[p * 18 + 6 + i]))
return 1;
if (!map_interp_index_host(start_z + i, nz, &host_indices[p * 18 + 12 + i], &host_reflect[p * 18 + 12 + i]))
return 1;
stencil_cache = &entry;
break;
}
}
if (!stencil_cache)
{
cache.stencil_entries.push_back(InterpBatchCache::StencilCacheEntry());
stencil_cache = &cache.stencil_entries.back();
stencil_cache->X = X;
stencil_cache->Y = Y;
stencil_cache->Z = Z;
stencil_cache->px = px;
stencil_cache->py = py;
stencil_cache->pz = pz;
stencil_cache->nx = nx;
stencil_cache->ny = ny;
stencil_cache->nz = nz;
stencil_cache->num_points = num_points;
stencil_cache->ordn = ordn;
stencil_cache->symmetry = symmetry;
std::vector<double> host_weights(point_stencil_doubles);
std::vector<int> host_indices(point_stencil_ints);
std::vector<int> host_reflect(point_stencil_ints);
const double dx = X[1] - X[0];
const double dy = Y[1] - Y[0];
const double dz = Z[1] - Z[0];
const int allow_reflect_x = (symmetry == 2 && std::fabs(X[0]) < dx);
const int allow_reflect_y = (symmetry == 2 && std::fabs(Y[0]) < dy);
const int allow_reflect_z = (symmetry != 0 && std::fabs(Z[0]) < dz);
for (int p = 0; p < num_points; ++p)
{
int start_x = 0, start_y = 0, start_z = 0;
double cx = 0.0, cy = 0.0, cz = 0.0;
const bool ok_x = compute_interp_window_host(px[p], X, nx, ordn, allow_reflect_x, &start_x, &cx);
const bool ok_y = compute_interp_window_host(py[p], Y, ny, ordn, allow_reflect_y, &start_y, &cy);
const bool ok_z = compute_interp_window_host(pz[p], Z, nz, ordn, allow_reflect_z, &start_z, &cz);
if (!ok_x || !ok_y || !ok_z)
return 1;
lagrange_weights_ord6_host(cx, host_weights.data() + p * 18);
lagrange_weights_ord6_host(cy, host_weights.data() + p * 18 + 6);
lagrange_weights_ord6_host(cz, host_weights.data() + p * 18 + 12);
for (int i = 0; i < 6; ++i)
{
if (!map_interp_index_host(start_x + i, nx, &host_indices[p * 18 + i], &host_reflect[p * 18 + i]))
return 1;
if (!map_interp_index_host(start_y + i, ny, &host_indices[p * 18 + 6 + i], &host_reflect[p * 18 + 6 + i]))
return 1;
if (!map_interp_index_host(start_z + i, nz, &host_indices[p * 18 + 12 + i], &host_reflect[p * 18 + 12 + i]))
return 1;
}
}
ok = ok &&
copy_to_device(stencil_cache->weights, host_weights.data(), weights_bytes) &&
copy_to_device(stencil_cache->indices, host_indices.data(), indices_bytes) &&
copy_to_device(stencil_cache->reflect, host_reflect.data(), indices_bytes);
if (!ok)
return 1;
}
ok = ok &&
copy_to_device(cache.soa, soa_flat, soa_bytes) &&
copy_to_device(cache.weights, host_weights.data(), weights_bytes) &&
copy_to_device(cache.indices, host_indices.data(), indices_bytes) &&
copy_to_device(cache.reflect, host_reflect.data(), indices_bytes) &&
ensure_capacity(cache.out, out_bytes) &&
ensure_capacity(cache.field_ptrs, ptr_bytes) &&
ensure_capacity(cache.error_flag, sizeof(int));
@@ -1063,9 +1122,9 @@ int bssn_cuda_interp_points_batch(const int *ex,
int ny_local = ny;
const double *dsoa = cache.soa.ptr;
const double *const *dfields = reinterpret_cast<const double *const *>(cache.field_ptrs.ptr);
const double *dweights = cache.weights.ptr;
const int *dindices = cache.indices.ptr;
const int *dreflect = cache.reflect.ptr;
const double *dweights = stencil_cache->weights.ptr;
const int *dindices = stencil_cache->indices.ptr;
const int *dreflect = stencil_cache->reflect.ptr;
double *dout = cache.out.ptr;
int *derror = cache.error_flag.ptr;

View File

@@ -180,19 +180,64 @@ surface_integral::surface_integral(int iSymmetry) : Symmetry(iSymmetry)
//|============================================================================
//| Destructor
//|============================================================================
surface_integral::~surface_integral()
{
delete[] nx_g;
delete[] ny_g;
delete[] nz_g;
delete[] arcostheta;
#ifdef GaussInt
delete[] wtcostheta;
#endif
}
//|----------------------------------------------------------------
// spin weighted spinw component of psi4, general routine
// l takes from spinw to maxl; m takes from -l to l
surface_integral::~surface_integral()
{
release_cached_buffers();
delete[] nx_g;
delete[] ny_g;
delete[] nz_g;
delete[] arcostheta;
#ifdef GaussInt
delete[] wtcostheta;
#endif
}
void surface_integral::get_surface_points(double rex, double **pox)
{
SpherePointCache &cache = sphere_point_cache[rex];
if (!cache.pox[0])
{
for (int i = 0; i < 3; ++i)
cache.pox[i] = new double[n_tot];
for (int n = 0; n < n_tot; ++n)
{
cache.pox[0][n] = rex * nx_g[n];
cache.pox[1][n] = rex * ny_g[n];
cache.pox[2][n] = rex * nz_g[n];
}
}
pox[0] = cache.pox[0];
pox[1] = cache.pox[1];
pox[2] = cache.pox[2];
}
double *surface_integral::get_shellf_buffer(int num_var)
{
double *&buffer = shellf_cache[num_var];
if (!buffer)
buffer = new double[n_tot * num_var];
return buffer;
}
void surface_integral::release_cached_buffers()
{
for (map<double, SpherePointCache>::iterator it = sphere_point_cache.begin(); it != sphere_point_cache.end(); ++it)
{
delete[] it->second.pox[0];
delete[] it->second.pox[1];
delete[] it->second.pox[2];
it->second.pox[0] = it->second.pox[1] = it->second.pox[2] = 0;
}
sphere_point_cache.clear();
for (map<int, double *>::iterator it = shellf_cache.begin(); it != shellf_cache.end(); ++it)
delete[] it->second;
shellf_cache.clear();
}
//|----------------------------------------------------------------
// spin weighted spinw component of psi4, general routine
// l takes from spinw to maxl; m takes from -l to l
//|----------------------------------------------------------------
void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,
int spinw, int maxl, int NN, double *RP, double *IP,
@@ -209,16 +254,9 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
MyList<var> *DG_List = new MyList<var>(Rpsi4);
DG_List->insert(Ipsi4);
int n;
double *pox[3];
for (int i = 0; i < 3; i++)
pox[i] = new double[n_tot];
for (n = 0; n < n_tot; n++)
{
pox[0][n] = rex * nx_g[n];
pox[1][n] = rex * ny_g[n];
pox[2][n] = rex * nz_g[n];
}
int n;
double *pox[3];
get_surface_points(rex, pox);
int mp, Lp, Nmin, Nmax;
mp = n_tot / cpusize;
@@ -234,8 +272,7 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
Nmax = Nmin + mp - 1;
}
double *shellf;
shellf = new double[n_tot * InList];
double *shellf = get_shellf_buffer(InList);
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax);
@@ -375,14 +412,10 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
//|------= Free memory.
delete[] pox[0];
delete[] pox[1];
delete[] pox[2];
delete[] shellf;
delete[] RP_out;
delete[] IP_out;
DG_List->clearList();
}
delete[] RP_out;
delete[] IP_out;
DG_List->clearList();
}
void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,
int spinw, int maxl, int NN, double *RP, double *IP,
monitor *Monitor, MPI_Comm Comm_here) // NN is the length of RP and IP
@@ -402,19 +435,11 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
MyList<var> *DG_List = new MyList<var>(Rpsi4);
DG_List->insert(Ipsi4);
int n;
double *pox[3];
for (int i = 0; i < 3; i++)
pox[i] = new double[n_tot];
for (n = 0; n < n_tot; n++)
{
pox[0][n] = rex * nx_g[n];
pox[1][n] = rex * ny_g[n];
pox[2][n] = rex * nz_g[n];
}
double *shellf;
shellf = new double[n_tot * InList];
int n;
double *pox[3];
get_surface_points(rex, pox);
double *shellf = get_shellf_buffer(InList);
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Interp_Points");
@@ -577,14 +602,10 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
//|------= Free memory.
delete[] pox[0];
delete[] pox[1];
delete[] pox[2];
delete[] shellf;
delete[] RP_out;
delete[] IP_out;
DG_List->clearList();
}
delete[] RP_out;
delete[] IP_out;
DG_List->clearList();
}
//|----------------------------------------------------------------
// for shell patch
//|----------------------------------------------------------------
@@ -597,19 +618,11 @@ void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, var *Rpsi4
MyList<var> *DG_List = new MyList<var>(Rpsi4);
DG_List->insert(Ipsi4);
int n;
double *pox[3];
for (int i = 0; i < 3; i++)
pox[i] = new double[n_tot];
for (n = 0; n < n_tot; n++)
{
pox[0][n] = rex * nx_g[n];
pox[1][n] = rex * ny_g[n];
pox[2][n] = rex * nz_g[n];
}
int n;
double *pox[3];
get_surface_points(rex, pox);
double *shellf;
shellf = new double[n_tot * InList];
double *shellf = get_shellf_buffer(InList);
GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry);
@@ -2570,12 +2583,8 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
Rout[5] = sy;
Rout[6] = sz;
delete[] pox[0];
delete[] pox[1];
delete[] pox[2];
delete[] shellf;
DG_List->clearList();
}
DG_List->clearList();
}
void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
@@ -2637,19 +2646,11 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
DG_List->insert(Ayz);
DG_List->insert(Azz);
int n;
double *pox[3];
for (int i = 0; i < 3; i++)
pox[i] = new double[n_tot];
for (n = 0; n < n_tot; n++)
{
pox[0][n] = rex * nx_g[n];
pox[1][n] = rex * ny_g[n];
pox[2][n] = rex * nz_g[n];
}
double *shellf;
shellf = new double[n_tot * InList];
int n;
double *pox[3];
get_surface_points(rex, pox);
double *shellf = get_shellf_buffer(InList);
// we have assumed there is only one box on this level,
// so we do not need loop boxes
@@ -2839,12 +2840,8 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
Rout[5] = sy;
Rout[6] = sz;
delete[] pox[0];
delete[] pox[1];
delete[] pox[2];
delete[] shellf;
DG_List->clearList();
}
DG_List->clearList();
}
//|----------------------------------------------------------------
// for shell patch
//|----------------------------------------------------------------

View File

@@ -20,25 +20,41 @@ using namespace std;
#include "cgh.h"
#include "ShellPatch.h"
#include "NullShellPatch.h"
#include "NullShellPatch2.h"
#include "var.h"
#include "monitor.h"
#include "NullShellPatch2.h"
#include "var.h"
#include "monitor.h"
#include <map>
class surface_integral
{
private:
int Symmetry, factor;
int N_theta, N_phi; // Number of points in Theta & Phi directions
double dphi, dcostheta;
double *arcostheta, *wtcostheta;
int n_tot; // size of arrays
double *nx_g, *ny_g, *nz_g; // global list of unit normals
int myrank, cpusize;
public:
surface_integral(int iSymmetry);
private:
struct SpherePointCache
{
double *pox[3];
SpherePointCache()
{
pox[0] = pox[1] = pox[2] = 0;
}
};
int Symmetry, factor;
int N_theta, N_phi; // Number of points in Theta & Phi directions
double dphi, dcostheta;
double *arcostheta, *wtcostheta;
int n_tot; // size of arrays
double *nx_g, *ny_g, *nz_g; // global list of unit normals
int myrank, cpusize;
map<double, SpherePointCache> sphere_point_cache;
map<int, double *> shellf_cache;
void get_surface_points(double rex, double **pox);
double *get_shellf_buffer(int num_var);
void release_cached_buffers();
public:
surface_integral(int iSymmetry);
~surface_integral();
void surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,