Files
AMSS-NCKU/AMSS_NCKU_source/bssn_class.C
CGH0S7 c4194214c6 Enable Z4C + Shell-Patch GPU coexistence (Phase 3)
Remove the compile-time #error that blocked USE_CUDA_Z4C + WithShell.
Add GPU-to-CPU state sync at the start of both Z4C Step functions
(non-CPBC and CPBC) so shell CPU consumers read valid field data
after Cartesian GPU RHS with resident state.

Move bssn_cuda_use_resident_sync and bssn_cuda_download_level_state
_if_present from anonymous namespace to file scope in bssn_class.C
so derived classes (Z4C) can call them. Declare both in
bssn_rhs_cuda.h. Include bssn_rhs_cuda.h in Z4c_class.C.

Z4C shell RHS remains on CPU (Fortran Z4c_rhs_ss.f90) pending
future GPU kernel implementation.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-10 12:08:02 +08:00

10261 lines
390 KiB
C

#ifdef newc
#include <typeinfo>
#include <sstream>
#include <cstdio>
#include <cstdlib>
#include <map>
#include <string>
#include <cstring>
#include <iostream>
#include <vector>
using namespace std;
#else
#include <stdio.h>
#include <map.h>
#include <string.h>
#endif
#include <time.h>
#include "macrodef.h"
#include "misc.h"
#include "Ansorg.h"
#include "fmisc.h"
#include "Parallel.h"
#include "bssn_class.h"
#include "bssn_rhs.h"
#if USE_CUDA_BSSN
#include "bssn_rhs_cuda.h"
#ifdef WithShell
#include "bssn_gpu.h"
#endif
#endif
#if USE_CUDA_BSSN && defined(WithShell)
// GPU-accelerated shell RHS: same parameter signature as f_compute_rhs_bssn_ss.
// Internally calls gpu_rhs_ss with calledby=0, mpi_rank=0 (device 0).
extern "C" {
static int cuda_compute_rhs_bssn_ss(
int *ex, double &T, double *crho, double *sigma, double *R,
double *X, double *Y, double *Z,
double *drhodx, double *drhody, double *drhodz,
double *dsigmadx, double *dsigmady, double *dsigmadz,
double *dRdx, double *dRdy, double *dRdz,
double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz,
double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz,
double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz,
double *chi, double *trK,
double *gxx, double *gxy, double *gxz, double *gyy, double *gyz, double *gzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
int &Symmetry, int &Lev, double &eps, int &sst, int &co)
{
return gpu_rhs_ss(0, 0, // calledby=ABE_main, mpi_rank=device_0
ex, T, crho, sigma, R, X, Y, Z,
drhodx, drhody, drhodz,
dsigmadx, dsigmady, dsigmadz,
dRdx, dRdy, dRdz,
drhodxx, drhodxy, drhodxz, drhodyy, drhodyz, drhodzz,
dsigmadxx, dsigmadxy, dsigmadxz, dsigmadyy, dsigmadyz, dsigmadzz,
dRdxx, dRdxy, dRdxz, dRdyy, dRdyz, dRdzz,
chi, trK,
gxx, gxy, gxz, gyy, gyz, gzz,
Axx, Axy, Axz, Ayy, Ayz, Azz,
Gamx, Gamy, Gamz,
Lap, betax, betay, betaz,
dtSfx, dtSfy, dtSfz,
chi_rhs, trK_rhs,
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
Gamx_rhs, Gamy_rhs, Gamz_rhs,
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
rho, Sx, Sy, Sz,
Sxx, Sxy, Sxz, Syy, Syz, Szz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
ham_Res, movx_Res, movy_Res, movz_Res,
Gmx_Res, Gmy_Res, Gmz_Res,
Symmetry, Lev, eps, sst, co);
}
}
// Use GPU for shell RHS in Step and SHStep.
// Compute_Constraint / Interp_Constraint / Constraint_Out keep
// CPU Fortran (GPU alloc-per-call overhead dominates there).
#define f_compute_rhs_bssn_ss cuda_compute_rhs_bssn_ss
#endif
#include "initial_puncture.h"
#include "enforce_algebra.h"
#include "rungekutta4_rout.h"
#include "sommerfeld_rout.h"
#include "getnp4.h"
#include "shellfunctions.h"
#include "parameters.h"
#ifdef With_AHF
#include "derivatives.h"
#include "myglobal.h"
#endif
#include "perf.h"
#include "derivatives.h"
#include "ricci_gamma.h"
namespace
{
bool amss_analysis_timing_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_ANALYSIS_TIMING");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
int amss_analysis_map_every()
{
static int every = -1;
if (every < 0)
{
const char *env = getenv("AMSS_ANALYSIS_MAP_EVERY");
every = (env && atoi(env) > 0) ? atoi(env) : 1;
}
return every;
}
bool amss_constraint_out_enabled_for_step(int step)
{
static int every = -1;
if (every < 0)
{
const char *env = getenv("AMSS_CONSTRAINT_OUT_EVERY");
every = (env && atoi(env) > 0) ? atoi(env) : 1;
}
return every <= 1 || (step > 0 && step % every == 0);
}
bool amss_rp_timing_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_RP_TIMING");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool amss_rp_detail_timing_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_RP_DETAIL_TIMING");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool amss_env_flag_enabled(const char *name)
{
const char *env = getenv(name);
return env && atoi(env) != 0;
}
bool amss_cached_rp_restrict_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
#if (ABEtype == 1)
enabled = 1;
#else
enabled = 0;
#endif
if (amss_env_flag_enabled("AMSS_RP_CACHED_RESTRICT"))
enabled = 1;
}
return enabled != 0;
}
bool amss_cached_rp_outbd_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
#if (ABEtype == 1)
enabled = 1;
#else
enabled = 0;
#endif
if (amss_env_flag_enabled("AMSS_RP_CACHED_OUTBD"))
enabled = 1;
}
return enabled != 0;
}
bool amss_cached_rp_fine_sync_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
#if (ABEtype == 1)
enabled = 1;
#else
enabled = 0;
#endif
if (amss_env_flag_enabled("AMSS_RP_CACHED_FINE_SYNC"))
enabled = 1;
}
return enabled != 0;
}
bool amss_cached_rp_coarse_sync_enabled()
{
static int enabled = -1;
if (enabled < 0)
enabled = amss_env_flag_enabled("AMSS_RP_CACHED_COARSE_SYNC") ? 1 : 0;
return enabled != 0;
}
bool amss_rp_skip_coarse_sync_enabled()
{
static int enabled = -1;
if (enabled < 0)
enabled = amss_env_flag_enabled("AMSS_RP_SKIP_COARSE_SYNC") ? 1 : 0;
return enabled != 0;
}
bool amss_evolve_timing_enabled()
{
static int enabled = -1;
if (enabled < 0)
enabled = amss_env_flag_enabled("AMSS_EVOLVE_TIMING") ? 1 : 0;
return enabled != 0;
}
struct AmssEvolveTimingStats
{
double step;
double rp;
double regrid;
double constraint;
};
AmssEvolveTimingStats &amss_evolve_timing_stats()
{
static AmssEvolveTimingStats stats = {};
return stats;
}
void amss_evolve_timing_reset()
{
AmssEvolveTimingStats &stats = amss_evolve_timing_stats();
stats.step = 0.0;
stats.rp = 0.0;
stats.regrid = 0.0;
stats.constraint = 0.0;
}
void amss_evolve_timing_add_step(double sec)
{
amss_evolve_timing_stats().step += sec;
}
void amss_evolve_timing_add_rp(double sec)
{
amss_evolve_timing_stats().rp += sec;
}
void amss_evolve_timing_add_regrid(double sec)
{
amss_evolve_timing_stats().regrid += sec;
}
void amss_evolve_timing_add_constraint(double sec)
{
amss_evolve_timing_stats().constraint += sec;
}
}
// Compile-time switch for per-timestep memory usage collection/printing.
// Default is OFF to reduce overhead in production runs.
#ifndef BSSN_ENABLE_MEM_USAGE_LOG
#define BSSN_ENABLE_MEM_USAGE_LOG 0
#endif
#ifndef BSSN_FINE_TIMING
#define BSSN_FINE_TIMING 0
#endif
#ifndef BSSN_FINE_TIMING_EVERY
#define BSSN_FINE_TIMING_EVERY 1
#endif
#ifndef BSSN_FINE_TIMING_TOPN
#define BSSN_FINE_TIMING_TOPN 8
#endif
#ifndef BSSN_KERNEL_FINE_TIMING
#define BSSN_KERNEL_FINE_TIMING 0
#endif
#ifndef BSSN_ENABLE_STDIN_ABORT_POLL
#define BSSN_ENABLE_STDIN_ABORT_POLL 0
#endif
#if BSSN_FINE_TIMING
namespace step_timing
{
enum Bucket
{
TB_ANALYSIS_PSI4 = 0,
TB_ANALYSIS_SURFACE,
TB_ANALYSIS_IO,
TB_BH_PREDICTOR,
TB_PREDICTOR_RHS,
TB_PREDICTOR_SYNC,
TB_BH_CORRECTOR,
TB_CORRECTOR_RHS,
TB_CORRECTOR_SYNC,
TB_STATE_SWAP,
TB_RESTRICT_PROLONG,
TB_CONSTRAINT_OUT,
TB_DUMP_3D,
TB_DUMP_2D,
TB_CHECKPOINT,
TB_REGRID,
TB_COUNT
};
static double local_bucket_seconds[TB_COUNT];
static const char *bucket_labels[TB_COUNT] =
{
"analysis_psi4",
"analysis_surface",
"analysis_io",
"bh_predictor",
"predictor_rhs",
"predictor_sync",
"bh_corrector",
"corrector_rhs",
"corrector_sync",
"state_swap",
"restrict_prolong",
"constraint_out",
"dump_3d",
"dump_2d",
"checkpoint",
"regrid"
};
void reset()
{
for (int i = 0; i < TB_COUNT; i++)
local_bucket_seconds[i] = 0.0;
}
void add(Bucket bucket, double seconds)
{
local_bucket_seconds[int(bucket)] += seconds;
}
void report(int myrank, int nprocs, monitor *TimingMonitor,
int step_index, double phys_time, double step_wall_seconds)
{
double max_bucket_seconds[TB_COUNT];
double avg_bucket_seconds[TB_COUNT];
MPI_Reduce(local_bucket_seconds, max_bucket_seconds, TB_COUNT, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce(local_bucket_seconds, avg_bucket_seconds, TB_COUNT, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (myrank != 0)
return;
for (int i = 0; i < TB_COUNT; i++)
avg_bucket_seconds[i] /= Mymax(1, nprocs);
if (TimingMonitor)
{
double row[2 + 2 * TB_COUNT];
row[0] = double(step_index);
row[1] = step_wall_seconds;
for (int i = 0; i < TB_COUNT; i++)
{
row[2 + i] = max_bucket_seconds[i];
row[2 + TB_COUNT + i] = avg_bucket_seconds[i];
}
TimingMonitor->writefile(phys_time, 2 + 2 * TB_COUNT, row);
}
}
}
#define STEP_TIMER_DECL(var_name) const double var_name = MPI_Wtime()
#define STEP_TIMER_ADD(bucket_name, var_name) step_timing::add(step_timing::bucket_name, MPI_Wtime() - (var_name))
#else
#define STEP_TIMER_DECL(var_name)
#define STEP_TIMER_ADD(bucket_name, var_name)
#endif
#if BSSN_KERNEL_FINE_TIMING
namespace rhs_kernel_timing_report
{
void report(int myrank, int nprocs, int step_index, double step_wall_seconds)
{
const int bucket_count = f_bssn_rhs_kernel_timing_bucket_count();
const double *local_bucket_seconds = f_bssn_rhs_kernel_timing_local_seconds();
if (bucket_count <= 0 || !local_bucket_seconds)
return;
double *max_bucket_seconds = new double[bucket_count];
double *avg_bucket_seconds = new double[bucket_count];
int *order = new int[bucket_count];
MPI_Reduce((void *)local_bucket_seconds, max_bucket_seconds, bucket_count, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce((void *)local_bucket_seconds, avg_bucket_seconds, bucket_count, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (myrank == 0)
{
double kernel_total = 0.0;
for (int i = 0; i < bucket_count; ++i)
{
avg_bucket_seconds[i] /= Mymax(1, nprocs);
order[i] = i;
kernel_total += max_bucket_seconds[i];
}
for (int i = 0; i < bucket_count - 1; ++i)
for (int j = i + 1; j < bucket_count; ++j)
if (max_bucket_seconds[order[j]] > max_bucket_seconds[order[i]])
{
int tmp = order[i];
order[i] = order[j];
order[j] = tmp;
}
ios::fmtflags old_flags = cout.flags();
streamsize old_precision = cout.precision();
const double kernel_frac = (step_wall_seconds > 0.0) ? (100.0 * kernel_total / step_wall_seconds) : 0.0;
cout << " RHS kernel split (max-rank accumulated over step " << step_index << "): total "
<< setprecision(6) << kernel_total << " s (" << setprecision(4)
<< kernel_frac << "% of coarse step)" << endl;
const int topn = Mymin(BSSN_FINE_TIMING_TOPN, bucket_count);
for (int i = 0; i < topn; ++i)
{
const int ib = order[i];
const double frac = (kernel_total > 0.0) ? (100.0 * max_bucket_seconds[ib] / kernel_total) : 0.0;
cout << " "
<< setw(20) << left << f_bssn_rhs_kernel_timing_label(ib)
<< " = " << setw(10) << right << setprecision(6) << max_bucket_seconds[ib]
<< " s (" << setw(6) << setprecision(4) << frac << "% of kernel)" << endl;
}
cout << endl;
cout.flags(old_flags);
cout.precision(old_precision);
}
delete[] max_bucket_seconds;
delete[] avg_bucket_seconds;
delete[] order;
}
}
#endif
#if USE_CUDA_BSSN
namespace {
static const int k_bssn_cuda_bh_state_indices[3] = {18, 19, 20};
bool fill_bssn_cuda_views(Block *cg, MyList<var> *vars,
double **host_views,
double *propspeeds = nullptr,
double *soa_flat = nullptr)
{
int idx = 0;
while (vars && idx < BSSN_CUDA_STATE_COUNT)
{
host_views[idx] = cg->fgfs[vars->data->sgfn];
if (propspeeds)
propspeeds[idx] = vars->data->propspeed;
if (soa_flat)
{
soa_flat[3 * idx + 0] = vars->data->SoA[0];
soa_flat[3 * idx + 1] = vars->data->SoA[1];
soa_flat[3 * idx + 2] = vars->data->SoA[2];
}
vars = vars->next;
++idx;
}
return idx == BSSN_CUDA_STATE_COUNT && vars == 0;
}
int count_bssn_cuda_state_list(MyList<var> *vars)
{
int count = 0;
while (vars)
{
++count;
vars = vars->next;
if (count > BSSN_EM_CUDA_STATE_COUNT)
return -1;
}
return count;
}
bool fill_bssn_cuda_views_count(Block *cg, MyList<var> *vars,
int state_count,
double **host_views)
{
if (!cg || !host_views ||
state_count <= 0 || state_count > BSSN_EM_CUDA_STATE_COUNT)
return false;
int idx = 0;
while (vars && idx < state_count)
{
host_views[idx] = cg->fgfs[vars->data->sgfn];
vars = vars->next;
++idx;
}
return idx == state_count && vars == 0;
}
} // namespace
bool bssn_cuda_use_resident_sync(int lev)
{
(void)lev;
return true;
}
bool bssn_cuda_keep_resident_after_step(int lev, int trfls_in, int analysis_lev)
{
static int keep_all_levels = -1;
if (keep_all_levels < 0)
{
const char *env = getenv("AMSS_CUDA_KEEP_ALL_LEVELS");
keep_all_levels = (env && atoi(env) != 0) ? 1 : 0;
}
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_CUDA_KEEP_RESIDENT_AFTER_STEP");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
if (!enabled)
return false;
if (lev == analysis_lev)
return false;
if (keep_all_levels)
return true;
// Conservative default: high time-refinement levels still have scattered
// CPU consumers outside the RK4/AMR exchange path.
return lev < trfls_in;
}
bool bssn_constraint_recompute_from_state(int lev, bool level0_cache_valid)
{
#if USE_CUDA_BSSN
return lev > 0 || !level0_cache_valid;
#else
(void)level0_cache_valid;
return lev > 0;
#endif
}
bool bssn_cuda_sync_subset(Block *cg,
int subset_count,
const int *state_indices,
double **host_views,
bool upload)
{
if (!cg || subset_count <= 0)
return true;
if (!bssn_cuda_has_resident_state(cg))
return true;
if (upload)
return bssn_cuda_upload_state_subset(cg, cg->shape, subset_count, state_indices, host_views) == 0;
return bssn_cuda_download_state_subset(cg, cg->shape, subset_count, state_indices, host_views) == 0;
}
bool bssn_cuda_sync_bh_fields(Block *cg, var *forx, var *fory, var *forz, bool upload)
{
double *bh_fields[3] = {
cg->fgfs[forx->sgfn], cg->fgfs[fory->sgfn], cg->fgfs[forz->sgfn]
};
return bssn_cuda_sync_subset(cg, 3, k_bssn_cuda_bh_state_indices, bh_fields, upload);
}
bool bssn_cuda_patch_contains_point(Patch *patch, const double *point)
{
if (!patch)
return false;
for (int d = 0; d < dim; d++)
{
const double h = patch->getdX(d);
const double lo = patch->bbox[d] + patch->lli[d] * h;
const double hi = patch->bbox[dim + d] - patch->uui[d] * h;
if (point[d] < lo || point[d] > hi)
return false;
}
return true;
}
bool bssn_cuda_point_in_block(Patch *patch, Block *block,
const double *point, const double *DH)
{
if (!patch || !block)
return false;
for (int d = 0; d < dim; d++)
{
double llb;
double uub;
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
llb = (feq(block->bbox[d], patch->bbox[d], DH[d] / 2))
? block->bbox[d] + patch->lli[d] * DH[d]
: block->bbox[d] + (ghost_width - 0.5) * DH[d];
uub = (feq(block->bbox[dim + d], patch->bbox[dim + d], DH[d] / 2))
? block->bbox[dim + d] - patch->uui[d] * DH[d]
: block->bbox[dim + d] - (ghost_width - 0.5) * DH[d];
#else
#ifdef Cell
llb = (feq(block->bbox[d], patch->bbox[d], DH[d] / 2))
? block->bbox[d] + patch->lli[d] * DH[d]
: block->bbox[d] + ghost_width * DH[d];
uub = (feq(block->bbox[dim + d], patch->bbox[dim + d], DH[d] / 2))
? block->bbox[dim + d] - patch->uui[d] * DH[d]
: block->bbox[dim + d] - ghost_width * DH[d];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (point[d] - llb < -DH[d] / 2 || point[d] - uub > DH[d] / 2)
return false;
}
return true;
}
int bssn_cuda_interp_tile_start(const double *coords, int n, double x, double dx, int ordn)
{
if (!coords || n <= ordn)
return 0;
int cxi = int((x - coords[0]) / dx + 0.4) + 1;
int start = cxi - ordn / 2;
if (start < 0)
start = 0;
const int max_start = n - ordn;
if (start > max_start)
start = max_start;
return start;
}
bool bssn_cuda_interp_bh_point_resident(MyList<Patch> *PatL,
int myrank,
const double *point,
var *forx, var *fory, var *forz,
int Symmetry,
double *shellf)
{
const int ordn = 2 * ghost_width;
int owner_rank = -1;
shellf[0] = shellf[1] = shellf[2] = 0.0;
MyList<Patch> *PL = PatL;
while (PL)
{
Patch *patch = PL->data;
if (!bssn_cuda_patch_contains_point(patch, point))
{
PL = PL->next;
continue;
}
double DH[dim];
for (int d = 0; d < dim; d++)
DH[d] = patch->getdX(d);
MyList<Block> *BP = patch->blb;
while (BP)
{
Block *block = BP->data;
if (bssn_cuda_point_in_block(patch, block, point, DH))
{
owner_rank = block->rank;
if (myrank == owner_rank)
{
int interp_ordn = ordn;
int interp_sym = Symmetry;
double x = point[0];
double y = point[1];
double z = point[2];
if (bssn_cuda_has_resident_state(block) &&
block->shape[0] >= ordn && block->shape[1] >= ordn && block->shape[2] >= ordn)
{
var *vars[3] = {forx, fory, forz};
double *bh_host_key[3] = {
block->fgfs[forx->sgfn],
block->fgfs[fory->sgfn],
block->fgfs[forz->sgfn]
};
double soa3[9];
for (int f = 0; f < 3; f++)
{
soa3[3 * f + 0] = vars[f]->SoA[0];
soa3[3 * f + 1] = vars[f]->SoA[1];
soa3[3 * f + 2] = vars[f]->SoA[2];
}
if (bssn_cuda_interp_state_point3(block, block->shape,
k_bssn_cuda_bh_state_indices[0],
k_bssn_cuda_bh_state_indices[1],
k_bssn_cuda_bh_state_indices[2],
block->X[0][0], block->X[1][0], block->X[2][0],
DH[0], DH[1], DH[2],
x, y, z,
interp_ordn, interp_sym,
bh_host_key,
soa3, shellf) != 0)
{
const int sx = ordn;
const int sy = ordn;
const int sz = ordn;
const int region_all = sx * sy * sz;
const int i0 = bssn_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
const int j0 = bssn_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
const int k0 = bssn_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
double packed_fields[3 * region_all];
for (int f = 0; f < 3; f++)
{
if (bssn_cuda_pack_state_region_to_host_buffer(block,
k_bssn_cuda_bh_state_indices[f],
packed_fields + f * region_all,
block->shape,
i0, j0, k0,
sx, sy, sz) != 0)
{
cout << "CUDA BH tile download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
int tile_shape[3] = {sx, sy, sz};
f_global_interp(tile_shape,
block->X[0] + i0,
block->X[1] + j0,
block->X[2] + k0,
packed_fields + f * region_all,
shellf[f],
x, y, z,
interp_ordn,
vars[f]->SoA,
interp_sym);
}
}
}
else
{
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
block->fgfs[forx->sgfn], shellf[0],
x, y, z, interp_ordn, forx->SoA, interp_sym);
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
block->fgfs[fory->sgfn], shellf[1],
x, y, z, interp_ordn, fory->SoA, interp_sym);
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
block->fgfs[forz->sgfn], shellf[2],
x, y, z, interp_ordn, forz->SoA, interp_sym);
}
}
break;
}
if (BP == patch->ble)
break;
BP = BP->next;
}
if (owner_rank >= 0)
break;
PL = PL->next;
}
if (owner_rank < 0)
return false;
MPI_Bcast(shellf, 3, MPI_DOUBLE, owner_rank, MPI_COMM_WORLD);
return true;
}
void bssn_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars, int myrank, bool release_ctx)
{
const int state_count = count_bssn_cuda_state_list(vars);
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
{
double *state_out[BSSN_EM_CUDA_STATE_COUNT];
if (!fill_bssn_cuda_views_count(cg, vars, state_count, state_out))
{
cout << "CUDA BSSN state list mismatch on resident state download" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
const int rc = (state_count == BSSN_ESCALAR_CUDA_STATE_COUNT)
? bssn_escalar_cuda_download_resident_state(cg, cg->shape, state_out)
: ((state_count == BSSN_CUDA_STATE_COUNT)
? bssn_cuda_download_resident_state(cg, cg->shape, state_out)
: bssn_cuda_download_resident_state_count_if_present(cg, cg->shape, state_out, state_count));
if (rc)
{
cout << "CUDA resident state download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
if (release_ctx)
bssn_cuda_release_step_ctx(cg);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
void bssn_cuda_download_level_state_if_present(MyList<Patch> *PatL, MyList<var> *vars, int myrank)
{
const int state_count = count_bssn_cuda_state_list(vars);
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
{
double *state_out[BSSN_EM_CUDA_STATE_COUNT];
if (!fill_bssn_cuda_views_count(cg, vars, state_count, state_out))
{
cout << "CUDA BSSN state list mismatch on resident state conditional download" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
if (bssn_cuda_download_resident_state_count_if_present(cg, cg->shape, state_out, state_count))
{
cout << "CUDA resident state conditional download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
void bssn_cuda_release_level_state(MyList<Patch> *PatL, int myrank)
{
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
bssn_cuda_release_step_ctx(cg);
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
void bssn_cuda_flush_level_before_regrid(MyList<Patch> *PatL,
MyList<var> *corL,
MyList<var> *oldL,
MyList<var> *stateL,
MyList<var> *preL,
int myrank)
{
bssn_cuda_download_level_state_if_present(PatL, corL, myrank);
bssn_cuda_download_level_state_if_present(PatL, oldL, myrank);
bssn_cuda_download_level_state_if_present(PatL, stateL, myrank);
bssn_cuda_download_level_state_if_present(PatL, preL, myrank);
bssn_cuda_release_level_state(PatL, myrank);
}
bool bssn_cuda_regrid_flush_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_CUDA_AMR_RESTRICT_DEVICE");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool bssn_cuda_regrid_flush_always_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_CUDA_REGRID_FLUSH_ALWAYS");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool bssn_cuda_will_regrid_onelevel(cgh *GH, int lev, int Symmetry, int BH_num, double **Porg0)
{
if (!GH || lev < GH->movls || lev >= GH->levels || !GH->PatL[lev])
return false;
if (!GH->PatL[lev]->data || !GH->PatL[lev]->data->blb || !GH->PatL[lev]->data->blb->data)
return true;
const int do_every = 2;
const double dX = GH->PatL[lev]->data->blb->data->getdX(0);
const double dY = GH->PatL[lev]->data->blb->data->getdX(1);
const double dZ = GH->PatL[lev]->data->blb->data->getdX(2);
for (int grd = 0; grd < GH->grids[lev]; grd++)
{
int bhi = 0;
for (bhi = 0; bhi < BH_num; bhi++)
{
if (feq(GH->Porgls[lev][bhi][0], GH->handle[lev][grd][0], 2 * do_every * dX) &&
feq(GH->Porgls[lev][bhi][1], GH->handle[lev][grd][1], 2 * do_every * dY) &&
feq(GH->Porgls[lev][bhi][2], GH->handle[lev][grd][2], 2 * do_every * dZ))
break;
}
if (bhi == BH_num)
{
if (feq(0, GH->bbox[lev][grd][0], dX / 2) &&
feq(0, GH->bbox[lev][grd][1], dY / 2) &&
feq(0, GH->bbox[lev][grd][2], dZ / 2))
continue;
if (BH_num == 1)
bhi = 0;
else
return true;
}
double rr = (Porg0[bhi][0] - GH->handle[lev][grd][0]) / dX;
int flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
rr = flag * do_every * dX;
if (Symmetry == 2 && GH->bbox[lev][grd][0] + rr < 0)
rr = -GH->bbox[lev][grd][0];
if (fabs(rr) > dX / 2)
return true;
rr = (Porg0[bhi][1] - GH->handle[lev][grd][1]) / dY;
flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
rr = flag * do_every * dY;
if (Symmetry == 2 && GH->bbox[lev][grd][1] + rr < 0)
rr = -GH->bbox[lev][grd][1];
if (fabs(rr) > dY / 2)
return true;
rr = (Porg0[bhi][2] - GH->handle[lev][grd][2]) / dZ;
flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
rr = flag * do_every * dZ;
if (Symmetry > 0 && GH->bbox[lev][grd][2] + rr < 0)
rr = -GH->bbox[lev][grd][1];
if (fabs(rr) > dZ / 2)
return true;
}
return false;
}
bool bssn_cuda_should_flush_before_regrid(cgh *GH, int lev, int Symmetry, int BH_num, double **Porg0)
{
if (!bssn_cuda_regrid_flush_enabled())
return false;
if (bssn_cuda_regrid_flush_always_enabled())
return GH && lev >= GH->movls && lev < GH->levels && GH->PatL[lev];
return bssn_cuda_will_regrid_onelevel(GH, lev, Symmetry, BH_num, Porg0);
}
void bssn_cuda_sync_level_bh_fields(MyList<Patch> *PatL,
int myrank,
var *forx, var *fory, var *forz)
{
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && !bssn_cuda_sync_bh_fields(cg, forx, fory, forz, false))
{
cout << "CUDA BH state subset download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
#endif
#if !USE_CUDA_BSSN
namespace {
bool bssn_constraint_recompute_from_state(int lev, bool level0_cache_valid)
{
(void)level0_cache_valid;
return lev > 0;
}
} // namespace
#endif
#if USE_CUDA_BSSN
bool bssn_cuda_bh_interp_resident_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_CUDA_BH_INTERP_RESIDENT");
if (env)
enabled = (atoi(env) != 0) ? 1 : 0;
#if (ABEtype == 1)
else
enabled = 1;
#else
else
enabled = 1;
#endif
}
return enabled != 0;
}
#endif
//================================================================================================
// define bssn_class
//================================================================================================
bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
int Symmetryi, int checkruni, char *checkfilenamei,
double numepssi, double numepsbi, double numepshi,
int a_levi, int maxli, int decni, double maxrexi, double drexi)
: Courant(Couranti), StartTime(StartTimei), TotalTime(TotalTimei),
DumpTime(DumpTimei), d2DumpTime(d2DumpTimei), CheckTime(CheckTimei), AnasTime(AnasTimei),
cuda_level0_constraint_cache_valid(false),
ConstraintRefreshLevels(0),
Symmetry(Symmetryi), checkrun(checkruni), numepss(numepssi), numepsb(numepsbi), numepsh(numepshi),
#ifdef With_AHF
xc(0), yc(0), zc(0), xr(0), yr(0), zr(0), trigger(0), dTT(0), dumpid(0),
#endif
a_lev(a_levi), maxl(maxli), decn(decni), maxrex(maxrexi), drex(drexi),
CheckPoint(0)
// CheckPoint(0)
{
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
// setup Monitors
{
stringstream a_stream;
a_stream.setf(ios::left);
a_stream << "# Error log information";
ErrorMonitor = new monitor("Error.log", myrank, a_stream.str());
ErrorMonitor->print_message("Warning: we always assume intput parameter in cell center style.");
a_stream.clear();
a_stream.str("");
a_stream << setw(15) << "# time";
char str[50];
for (int pl = 2; pl < maxl + 1; pl++)
for (int pm = -pl; pm < pl + 1; pm++)
{
sprintf(str, "R%02dm%03d", pl, pm);
a_stream << setw(16) << str;
sprintf(str, "I%02dm%03d", pl, pm);
a_stream << setw(16) << str;
}
Psi4Monitor = new monitor("bssn_psi4.dat", myrank, a_stream.str());
a_stream.clear();
a_stream.str("");
a_stream << setw(15) << "# time";
BHMonitor = new monitor("bssn_BH.dat", myrank, a_stream.str());
a_stream.clear();
a_stream.str("");
a_stream << setw(15) << "# time ADMmass ADMPx ADMPy ADMPz ADMSx ADMSy ADMSz";
MAPMonitor = new monitor("bssn_ADMQs.dat", myrank, a_stream.str());
a_stream.clear();
a_stream.str("");
a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz";
ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str());
#if BSSN_FINE_TIMING
a_stream.clear();
a_stream.str("");
a_stream << setw(8) << "# step";
a_stream << setw(14) << "wall";
for (int ib = 0; ib < step_timing::TB_COUNT; ib++)
a_stream << setw(18) << step_timing::bucket_labels[ib];
for (int ib = 0; ib < step_timing::TB_COUNT; ib++)
{
char str_avg[64];
sprintf(str_avg, "avg_%s", step_timing::bucket_labels[ib]);
a_stream << setw(18) << str_avg;
}
TimingMonitor = new monitor("bssn_step_timing.dat", myrank, a_stream.str());
#else
TimingMonitor = 0;
#endif
}
// setup sphere integration engine
Waveshell = new surface_integral(Symmetry);
trfls = 0;
chitiny = 0;
// read parameter from file
{
char filename[50];
{
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
if (iter != parameters::str_par.end())
{
strcpy(filename, (iter->second).c_str());
}
else
{
cout << "Error inputpar" << endl;
exit(0);
}
}
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename
<< " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && skey == "chitiny")
chitiny = atof(sval.c_str());
else if (sgrp == "BSSN" && skey == "time refinement start from level")
trfls = atoi(sval.c_str());
#ifdef With_AHF
else if (sgrp == "AHF" && skey == "AHfindevery")
AHfindevery = atoi(sval.c_str());
else if (sgrp == "AHF" && skey == "AHdumptime")
AHdumptime = atof(sval.c_str());
#endif
}
inf.close();
}
if (myrank == 0)
{
// echo information of lower bound of chi
cout << " chitiny = " << chitiny << endl;
cout << " time refinement start from level #" << trfls << endl;
#ifdef With_AHF
cout << " parameters for AHF:" << endl;
cout << " AHfindevery = " << AHfindevery << endl;
cout << " AHdumptime = " << AHdumptime << endl;
#endif
}
chitiny = chitiny - 1; // because we have subtracted one from chi
strcpy(checkfilename, checkfilenamei);
ngfs = 0;
phio = new var("phio", ngfs++, 1, 1, 1);
trKo = new var("trKo", ngfs++, 1, 1, 1);
gxxo = new var("gxxo", ngfs++, 1, 1, 1);
gxyo = new var("gxyo", ngfs++, -1, -1, 1);
gxzo = new var("gxzo", ngfs++, -1, 1, -1);
gyyo = new var("gyyo", ngfs++, 1, 1, 1);
gyzo = new var("gyzo", ngfs++, 1, -1, -1);
gzzo = new var("gzzo", ngfs++, 1, 1, 1);
Axxo = new var("Axxo", ngfs++, 1, 1, 1);
Axyo = new var("Axyo", ngfs++, -1, -1, 1);
Axzo = new var("Axzo", ngfs++, -1, 1, -1);
Ayyo = new var("Ayyo", ngfs++, 1, 1, 1);
Ayzo = new var("Ayzo", ngfs++, 1, -1, -1);
Azzo = new var("Azzo", ngfs++, 1, 1, 1);
Gmxo = new var("Gmxo", ngfs++, -1, 1, 1);
Gmyo = new var("Gmyo", ngfs++, 1, -1, 1);
Gmzo = new var("Gmzo", ngfs++, 1, 1, -1);
Lapo = new var("Lapo", ngfs++, 1, 1, 1);
Sfxo = new var("Sfxo", ngfs++, -1, 1, 1);
Sfyo = new var("Sfyo", ngfs++, 1, -1, 1);
Sfzo = new var("Sfzo", ngfs++, 1, 1, -1);
dtSfxo = new var("dtSfxo", ngfs++, -1, 1, 1);
dtSfyo = new var("dtSfyo", ngfs++, 1, -1, 1);
dtSfzo = new var("dtSfzo", ngfs++, 1, 1, -1);
phi0 = new var("phi0", ngfs++, 1, 1, 1);
trK0 = new var("trK0", ngfs++, 1, 1, 1);
gxx0 = new var("gxx0", ngfs++, 1, 1, 1);
gxy0 = new var("gxy0", ngfs++, -1, -1, 1);
gxz0 = new var("gxz0", ngfs++, -1, 1, -1);
gyy0 = new var("gyy0", ngfs++, 1, 1, 1);
gyz0 = new var("gyz0", ngfs++, 1, -1, -1);
gzz0 = new var("gzz0", ngfs++, 1, 1, 1);
Axx0 = new var("Axx0", ngfs++, 1, 1, 1);
Axy0 = new var("Axy0", ngfs++, -1, -1, 1);
Axz0 = new var("Axz0", ngfs++, -1, 1, -1);
Ayy0 = new var("Ayy0", ngfs++, 1, 1, 1);
Ayz0 = new var("Ayz0", ngfs++, 1, -1, -1);
Azz0 = new var("Azz0", ngfs++, 1, 1, 1);
Gmx0 = new var("Gmx0", ngfs++, -1, 1, 1);
Gmy0 = new var("Gmy0", ngfs++, 1, -1, 1);
Gmz0 = new var("Gmz0", ngfs++, 1, 1, -1);
Lap0 = new var("Lap0", ngfs++, 1, 1, 1);
Sfx0 = new var("Sfx0", ngfs++, -1, 1, 1);
Sfy0 = new var("Sfy0", ngfs++, 1, -1, 1);
Sfz0 = new var("Sfz0", ngfs++, 1, 1, -1);
dtSfx0 = new var("dtSfx0", ngfs++, -1, 1, 1);
dtSfy0 = new var("dtSfy0", ngfs++, 1, -1, 1);
dtSfz0 = new var("dtSfz0", ngfs++, 1, 1, -1);
phi = new var("phi", ngfs++, 1, 1, 1);
trK = new var("trK", ngfs++, 1, 1, 1);
gxx = new var("gxx", ngfs++, 1, 1, 1);
gxy = new var("gxy", ngfs++, -1, -1, 1);
gxz = new var("gxz", ngfs++, -1, 1, -1);
gyy = new var("gyy", ngfs++, 1, 1, 1);
gyz = new var("gyz", ngfs++, 1, -1, -1);
gzz = new var("gzz", ngfs++, 1, 1, 1);
Axx = new var("Axx", ngfs++, 1, 1, 1);
Axy = new var("Axy", ngfs++, -1, -1, 1);
Axz = new var("Axz", ngfs++, -1, 1, -1);
Ayy = new var("Ayy", ngfs++, 1, 1, 1);
Ayz = new var("Ayz", ngfs++, 1, -1, -1);
Azz = new var("Azz", ngfs++, 1, 1, 1);
Gmx = new var("Gmx", ngfs++, -1, 1, 1);
Gmy = new var("Gmy", ngfs++, 1, -1, 1);
Gmz = new var("Gmz", ngfs++, 1, 1, -1);
Lap = new var("Lap", ngfs++, 1, 1, 1);
Sfx = new var("Sfx", ngfs++, -1, 1, 1);
Sfy = new var("Sfy", ngfs++, 1, -1, 1);
Sfz = new var("Sfz", ngfs++, 1, 1, -1);
dtSfx = new var("dtSfx", ngfs++, -1, 1, 1);
dtSfy = new var("dtSfy", ngfs++, 1, -1, 1);
dtSfz = new var("dtSfz", ngfs++, 1, 1, -1);
phi1 = new var("phi1", ngfs++, 1, 1, 1);
trK1 = new var("trK1", ngfs++, 1, 1, 1);
gxx1 = new var("gxx1", ngfs++, 1, 1, 1);
gxy1 = new var("gxy1", ngfs++, -1, -1, 1);
gxz1 = new var("gxz1", ngfs++, -1, 1, -1);
gyy1 = new var("gyy1", ngfs++, 1, 1, 1);
gyz1 = new var("gyz1", ngfs++, 1, -1, -1);
gzz1 = new var("gzz1", ngfs++, 1, 1, 1);
Axx1 = new var("Axx1", ngfs++, 1, 1, 1);
Axy1 = new var("Axy1", ngfs++, -1, -1, 1);
Axz1 = new var("Axz1", ngfs++, -1, 1, -1);
Ayy1 = new var("Ayy1", ngfs++, 1, 1, 1);
Ayz1 = new var("Ayz1", ngfs++, 1, -1, -1);
Azz1 = new var("Azz1", ngfs++, 1, 1, 1);
Gmx1 = new var("Gmx1", ngfs++, -1, 1, 1);
Gmy1 = new var("Gmy1", ngfs++, 1, -1, 1);
Gmz1 = new var("Gmz1", ngfs++, 1, 1, -1);
Lap1 = new var("Lap1", ngfs++, 1, 1, 1);
Sfx1 = new var("Sfx1", ngfs++, -1, 1, 1);
Sfy1 = new var("Sfy1", ngfs++, 1, -1, 1);
Sfz1 = new var("Sfz1", ngfs++, 1, 1, -1);
dtSfx1 = new var("dtSfx1", ngfs++, -1, 1, 1);
dtSfy1 = new var("dtSfy1", ngfs++, 1, -1, 1);
dtSfz1 = new var("dtSfz1", ngfs++, 1, 1, -1);
phi_rhs = new var("phi_rhs", ngfs++, 1, 1, 1);
trK_rhs = new var("trK_rhs", ngfs++, 1, 1, 1);
gxx_rhs = new var("gxx_rhs", ngfs++, 1, 1, 1);
gxy_rhs = new var("gxy_rhs", ngfs++, -1, -1, 1);
gxz_rhs = new var("gxz_rhs", ngfs++, -1, 1, -1);
gyy_rhs = new var("gyy_rhs", ngfs++, 1, 1, 1);
gyz_rhs = new var("gyz_rhs", ngfs++, 1, -1, -1);
gzz_rhs = new var("gzz_rhs", ngfs++, 1, 1, 1);
Axx_rhs = new var("Axx_rhs", ngfs++, 1, 1, 1);
Axy_rhs = new var("Axy_rhs", ngfs++, -1, -1, 1);
Axz_rhs = new var("Axz_rhs", ngfs++, -1, 1, -1);
Ayy_rhs = new var("Ayy_rhs", ngfs++, 1, 1, 1);
Ayz_rhs = new var("Ayz_rhs", ngfs++, 1, -1, -1);
Azz_rhs = new var("Azz_rhs", ngfs++, 1, 1, 1);
Gmx_rhs = new var("Gmx_rhs", ngfs++, -1, 1, 1);
Gmy_rhs = new var("Gmy_rhs", ngfs++, 1, -1, 1);
Gmz_rhs = new var("Gmz_rhs", ngfs++, 1, 1, -1);
Lap_rhs = new var("Lap_rhs", ngfs++, 1, 1, 1);
Sfx_rhs = new var("Sfx_rhs", ngfs++, -1, 1, 1);
Sfy_rhs = new var("Sfy_rhs", ngfs++, 1, -1, 1);
Sfz_rhs = new var("Sfz_rhs", ngfs++, 1, 1, -1);
dtSfx_rhs = new var("dtSfx_rhs", ngfs++, -1, 1, 1);
dtSfy_rhs = new var("dtSfy_rhs", ngfs++, 1, -1, 1);
dtSfz_rhs = new var("dtSfz_rhs", ngfs++, 1, 1, -1);
rho = new var("rho", ngfs++, 1, 1, 1);
Sx = new var("Sx", ngfs++, -1, 1, 1);
Sy = new var("Sy", ngfs++, 1, -1, 1);
Sz = new var("Sz", ngfs++, 1, 1, -1);
Sxx = new var("Sxx", ngfs++, 1, 1, 1);
Sxy = new var("Sxy", ngfs++, -1, -1, 1);
Sxz = new var("Sxz", ngfs++, -1, 1, -1);
Syy = new var("Syy", ngfs++, 1, 1, 1);
Syz = new var("Syz", ngfs++, 1, -1, -1);
Szz = new var("Szz", ngfs++, 1, 1, 1);
Gamxxx = new var("Gamxxx", ngfs++, -1, 1, 1);
Gamxxy = new var("Gamxxy", ngfs++, 1, -1, 1);
Gamxxz = new var("Gamxxz", ngfs++, 1, 1, -1);
Gamxyy = new var("Gamxyy", ngfs++, -1, 1, 1);
Gamxyz = new var("Gamxyz", ngfs++, -1, -1, -1);
Gamxzz = new var("Gamxzz", ngfs++, -1, 1, 1);
Gamyxx = new var("Gamyxx", ngfs++, 1, -1, 1);
Gamyxy = new var("Gamyxy", ngfs++, -1, 1, 1);
Gamyxz = new var("Gamyxz", ngfs++, -1, -1, -1);
Gamyyy = new var("Gamyyy", ngfs++, 1, -1, 1);
Gamyyz = new var("Gamyyz", ngfs++, 1, 1, -1);
Gamyzz = new var("Gamyzz", ngfs++, 1, -1, 1);
Gamzxx = new var("Gamzxx", ngfs++, 1, 1, -1);
Gamzxy = new var("Gamzxy", ngfs++, -1, -1, -1);
Gamzxz = new var("Gamzxz", ngfs++, -1, 1, 1);
Gamzyy = new var("Gamzyy", ngfs++, 1, 1, -1);
Gamzyz = new var("Gamzyz", ngfs++, 1, -1, 1);
Gamzzz = new var("Gamzzz", ngfs++, 1, 1, -1);
Rxx = new var("Rxx", ngfs++, 1, 1, 1);
Rxy = new var("Rxy", ngfs++, -1, -1, 1);
Rxz = new var("Rxz", ngfs++, -1, 1, -1);
Ryy = new var("Ryy", ngfs++, 1, 1, 1);
Ryz = new var("Ryz", ngfs++, 1, -1, -1);
Rzz = new var("Rzz", ngfs++, 1, 1, 1);
// refer to PRD, 77, 024027 (2008)
Rpsi4 = new var("Rpsi4", ngfs++, 1, 1, 1);
Ipsi4 = new var("Ipsi4", ngfs++, -1, -1, -1);
t1Rpsi4 = new var("t1Rpsi4", ngfs++, 1, 1, 1);
t1Ipsi4 = new var("t1Ipsi4", ngfs++, -1, -1, -1);
t2Rpsi4 = new var("t2Rpsi4", ngfs++, 1, 1, 1);
t2Ipsi4 = new var("t2Ipsi4", ngfs++, -1, -1, -1);
// constraint violation monitor variables
Cons_Ham = new var("Cons_Ham", ngfs++, 1, 1, 1);
Cons_Px = new var("Cons_Px", ngfs++, -1, 1, 1);
Cons_Py = new var("Cons_Py", ngfs++, 1, -1, 1);
Cons_Pz = new var("Cons_Pz", ngfs++, 1, 1, -1);
Cons_Gx = new var("Cons_Gx", ngfs++, -1, 1, 1);
Cons_Gy = new var("Cons_Gy", ngfs++, 1, -1, 1);
Cons_Gz = new var("Cons_Gz", ngfs++, 1, 1, -1);
#ifdef Point_Psi4
phix = new var("phix", ngfs++, -1, 1, 1);
phiy = new var("phiy", ngfs++, 1, -1, 1);
phiz = new var("phiz", ngfs++, 1, 1, -1);
trKx = new var("trKx", ngfs++, -1, 1, 1);
trKy = new var("trKy", ngfs++, 1, -1, 1);
trKz = new var("trKz", ngfs++, 1, 1, -1);
Axxx = new var("Axxx", ngfs++, -1, 1, 1);
Axxy = new var("Axxy", ngfs++, 1, -1, 1);
Axxz = new var("Axxz", ngfs++, 1, 1, -1);
Axyx = new var("Axyx", ngfs++, 1, -1, 1);
Axyy = new var("Axyy", ngfs++, -1, 1, 1);
Axyz = new var("Axyz", ngfs++, -1, -1, -1);
Axzx = new var("Axzx", ngfs++, 1, 1, -1);
Axzy = new var("Axzy", ngfs++, -1, -1, -1);
Axzz = new var("Axzz", ngfs++, -1, 1, 1);
Ayyx = new var("Ayyx", ngfs++, -1, 1, 1);
Ayyy = new var("Ayyy", ngfs++, 1, -1, 1);
Ayyz = new var("Ayyz", ngfs++, 1, 1, -1);
Ayzx = new var("Ayzx", ngfs++, -1, -1, -1);
Ayzy = new var("Ayzy", ngfs++, 1, 1, -1);
Ayzz = new var("Ayzz", ngfs++, 1, -1, 1);
Azzx = new var("Azzx", ngfs++, -1, 1, 1);
Azzy = new var("Azzy", ngfs++, 1, -1, 1);
Azzz = new var("Azzz", ngfs++, 1, 1, -1);
#endif
// specific properspeed for 1+log slice
{
const double vl = sqrt(2);
trKo->setpropspeed(vl);
trK0->setpropspeed(vl);
trK->setpropspeed(vl);
trK1->setpropspeed(vl);
trK_rhs->setpropspeed(vl);
phio->setpropspeed(vl);
phi0->setpropspeed(vl);
phi->setpropspeed(vl);
phi1->setpropspeed(vl);
phi_rhs->setpropspeed(vl);
Lapo->setpropspeed(vl);
Lap0->setpropspeed(vl);
Lap->setpropspeed(vl);
Lap1->setpropspeed(vl);
Lap_rhs->setpropspeed(vl);
}
OldStateList = new MyList<var>(phio);
OldStateList->insert(trKo);
OldStateList->insert(gxxo);
OldStateList->insert(gxyo);
OldStateList->insert(gxzo);
OldStateList->insert(gyyo);
OldStateList->insert(gyzo);
OldStateList->insert(gzzo);
OldStateList->insert(Axxo);
OldStateList->insert(Axyo);
OldStateList->insert(Axzo);
OldStateList->insert(Ayyo);
OldStateList->insert(Ayzo);
OldStateList->insert(Azzo);
OldStateList->insert(Gmxo);
OldStateList->insert(Gmyo);
OldStateList->insert(Gmzo);
OldStateList->insert(Lapo);
OldStateList->insert(Sfxo);
OldStateList->insert(Sfyo);
OldStateList->insert(Sfzo);
OldStateList->insert(dtSfxo);
OldStateList->insert(dtSfyo);
OldStateList->insert(dtSfzo);
StateList = new MyList<var>(phi0);
StateList->insert(trK0);
StateList->insert(gxx0);
StateList->insert(gxy0);
StateList->insert(gxz0);
StateList->insert(gyy0);
StateList->insert(gyz0);
StateList->insert(gzz0);
StateList->insert(Axx0);
StateList->insert(Axy0);
StateList->insert(Axz0);
StateList->insert(Ayy0);
StateList->insert(Ayz0);
StateList->insert(Azz0);
StateList->insert(Gmx0);
StateList->insert(Gmy0);
StateList->insert(Gmz0);
StateList->insert(Lap0);
StateList->insert(Sfx0);
StateList->insert(Sfy0);
StateList->insert(Sfz0);
StateList->insert(dtSfx0);
StateList->insert(dtSfy0);
StateList->insert(dtSfz0);
RHSList = new MyList<var>(phi_rhs);
RHSList->insert(trK_rhs);
RHSList->insert(gxx_rhs);
RHSList->insert(gxy_rhs);
RHSList->insert(gxz_rhs);
RHSList->insert(gyy_rhs);
RHSList->insert(gyz_rhs);
RHSList->insert(gzz_rhs);
RHSList->insert(Axx_rhs);
RHSList->insert(Axy_rhs);
RHSList->insert(Axz_rhs);
RHSList->insert(Ayy_rhs);
RHSList->insert(Ayz_rhs);
RHSList->insert(Azz_rhs);
RHSList->insert(Gmx_rhs);
RHSList->insert(Gmy_rhs);
RHSList->insert(Gmz_rhs);
RHSList->insert(Lap_rhs);
RHSList->insert(Sfx_rhs);
RHSList->insert(Sfy_rhs);
RHSList->insert(Sfz_rhs);
RHSList->insert(dtSfx_rhs);
RHSList->insert(dtSfy_rhs);
RHSList->insert(dtSfz_rhs);
SynchList_pre = new MyList<var>(phi);
SynchList_pre->insert(trK);
SynchList_pre->insert(gxx);
SynchList_pre->insert(gxy);
SynchList_pre->insert(gxz);
SynchList_pre->insert(gyy);
SynchList_pre->insert(gyz);
SynchList_pre->insert(gzz);
SynchList_pre->insert(Axx);
SynchList_pre->insert(Axy);
SynchList_pre->insert(Axz);
SynchList_pre->insert(Ayy);
SynchList_pre->insert(Ayz);
SynchList_pre->insert(Azz);
SynchList_pre->insert(Gmx);
SynchList_pre->insert(Gmy);
SynchList_pre->insert(Gmz);
SynchList_pre->insert(Lap);
SynchList_pre->insert(Sfx);
SynchList_pre->insert(Sfy);
SynchList_pre->insert(Sfz);
SynchList_pre->insert(dtSfx);
SynchList_pre->insert(dtSfy);
SynchList_pre->insert(dtSfz);
SynchList_cor = new MyList<var>(phi1);
SynchList_cor->insert(trK1);
SynchList_cor->insert(gxx1);
SynchList_cor->insert(gxy1);
SynchList_cor->insert(gxz1);
SynchList_cor->insert(gyy1);
SynchList_cor->insert(gyz1);
SynchList_cor->insert(gzz1);
SynchList_cor->insert(Axx1);
SynchList_cor->insert(Axy1);
SynchList_cor->insert(Axz1);
SynchList_cor->insert(Ayy1);
SynchList_cor->insert(Ayz1);
SynchList_cor->insert(Azz1);
SynchList_cor->insert(Gmx1);
SynchList_cor->insert(Gmy1);
SynchList_cor->insert(Gmz1);
SynchList_cor->insert(Lap1);
SynchList_cor->insert(Sfx1);
SynchList_cor->insert(Sfy1);
SynchList_cor->insert(Sfz1);
SynchList_cor->insert(dtSfx1);
SynchList_cor->insert(dtSfy1);
SynchList_cor->insert(dtSfz1);
DumpList = new MyList<var>(phi0);
DumpList->insert(trK0);
DumpList->insert(gxx0);
DumpList->insert(gxy0);
DumpList->insert(gxz0);
DumpList->insert(gyy0);
DumpList->insert(gyz0);
DumpList->insert(gzz0);
// DumpList->insert(Axx0);
// DumpList->insert(Axy0);
// DumpList->insert(Axz0);
// DumpList->insert(Ayy0);
// DumpList->insert(Ayz0);
// DumpList->insert(Azz0);
// DumpList->insert(Gmx0);
// DumpList->insert(Gmy0);
// DumpList->insert(Gmz0);
DumpList->insert(Lap0);
// DumpList->insert(Sfx0);
// DumpList->insert(Sfy0);
// DumpList->insert(Sfz0);
// DumpList->insert(dtSfx0);
// DumpList->insert(dtSfy0);
// DumpList->insert(dtSfz0);
// DumpList->insert(Rpsi4);
// DumpList->insert(Ipsi4);
DumpList->insert(Cons_Ham);
DumpList->insert(Cons_Px);
DumpList->insert(Cons_Py);
DumpList->insert(Cons_Pz);
// DumpList->insert(Cons_Gx);
// DumpList->insert(Cons_Gy);
// DumpList->insert(Cons_Gz);
ConstraintList = new MyList<var>(Cons_Ham);
ConstraintList->insert(Cons_Px);
ConstraintList->insert(Cons_Py);
ConstraintList->insert(Cons_Pz);
ConstraintList->insert(Cons_Gx);
ConstraintList->insert(Cons_Gy);
ConstraintList->insert(Cons_Gz);
#ifdef With_AHF
// setup kinds of var list
// List for AparentHorizonFinderDirect
// special attension is payed to symmetry type
// gij gij,x gij,y gij,z
AHList = new MyList<var>(gxx0);
AHList->insert(Gamxxx);
AHList->insert(Gamyxx);
AHList->insert(Gamzxx);
AHList->insert(gxy0);
AHList->insert(Gamxxy);
AHList->insert(Gamyxy);
AHList->insert(Gamzxy);
AHList->insert(gxz0);
AHList->insert(Gamxxz);
AHList->insert(Gamyxz);
AHList->insert(Gamzxz);
AHList->insert(gyy0);
AHList->insert(Gamxyy);
AHList->insert(Gamyyy);
AHList->insert(Gamzyy);
AHList->insert(gyz0);
AHList->insert(Gamxyz);
AHList->insert(Gamyyz);
AHList->insert(Gamzyz);
AHList->insert(gzz0);
AHList->insert(Gamxzz);
AHList->insert(Gamyzz);
AHList->insert(Gamzzz);
// phi phi,x phi,y phi,z
AHList->insert(phi0);
AHList->insert(dtSfx_rhs);
AHList->insert(dtSfy_rhs);
AHList->insert(dtSfz_rhs);
// Aij
AHList->insert(Axx0);
AHList->insert(Axy0);
AHList->insert(Axz0);
AHList->insert(Ayy0);
AHList->insert(Ayz0);
AHList->insert(Azz0);
// trK
AHList->insert(trK0);
// gij,x gij,y gij,z
AHDList = new MyList<var>(Gamxxx);
AHDList->insert(Gamyxx);
AHDList->insert(Gamzxx);
AHDList->insert(Gamxxy);
AHDList->insert(Gamyxy);
AHDList->insert(Gamzxy);
AHDList->insert(Gamxxz);
AHDList->insert(Gamyxz);
AHDList->insert(Gamzxz);
AHDList->insert(Gamxyy);
AHDList->insert(Gamyyy);
AHDList->insert(Gamzyy);
AHDList->insert(Gamxyz);
AHDList->insert(Gamyyz);
AHDList->insert(Gamzyz);
AHDList->insert(Gamxzz);
AHDList->insert(Gamyzz);
AHDList->insert(Gamzzz);
// phi,x phi,y phi,z
AHDList->insert(dtSfx_rhs);
AHDList->insert(dtSfy_rhs);
AHDList->insert(dtSfz_rhs);
GaugeList = new MyList<var>(Lap0);
GaugeList->insert(Sfx0);
GaugeList->insert(Sfy0);
GaugeList->insert(Sfz0);
#endif
// Note: the first checkpoint-class variable is `bool` while the local variable is `int`;
// an explicit conversion may be required in some contexts.
// bool checkrun00 = checkrun;
// Note: the second checkpoint-class variable is `const char*` while the local variable is `char*`;
// an explicit conversion may be required.
// const char* checkfilename00 = checkfilename;
CheckPoint = new checkpoint(checkrun, checkfilename, myrank);
if (myrank==0) {
cout << " BSSN class successfully created " << endl;
}
}
//================================================================================================
//================================================================================================
// This member function initializes the class
//================================================================================================
void bssn_class::Initialize()
{
if (myrank == 0)
cout << " you have setted " << ngfs << " grid functions." << endl;
CheckPoint->addvariablelist(StateList);
CheckPoint->addvariablelist(OldStateList);
char pname[50];
{
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
if (iter != parameters::str_par.end())
{
strcpy(pname, (iter->second).c_str());
}
else
{
cout << "Error inputpar" << endl;
exit(0);
}
}
GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor);
ConstraintRefreshLevels = new int[GH->levels];
for (int il = 0; il < GH->levels; il++)
ConstraintRefreshLevels[il] = 0;
if (checkrun)
CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry);
else
GH->compose_cgh(nprocs);
#ifdef WithShell
if (myrank == 0) cout << " [Init] ShellPatch new... " << flush;
double _t0 = MPI_Wtime();
SH = new ShellPatch(0, ngfs, pname, Symmetry, myrank, ErrorMonitor);
SH->matchcheck(GH->PatL[0]);
if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl;
if (myrank == 0) cout << " [Init] compose_sh... " << flush; _t0 = MPI_Wtime();
SH->compose_sh(nprocs);
if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl;
if (myrank == 0) cout << " [Init] setupcordtrans... " << flush; _t0 = MPI_Wtime();
SH->setupcordtrans();
if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl;
SH->Dump_xyz(0, 0, 1);
if (myrank == 0) cout << " [Init] setupintintstuff... " << flush; _t0 = MPI_Wtime();
SH->setupintintstuff(nprocs, GH->PatL[0], Symmetry);
if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl;
if (checkrun)
CheckPoint->readcheck_sh(SH, myrank);
#else
SH = 0;
#endif
double h = GH->PatL[0]->data->blb->data->getdX(0);
for (int i = 1; i < dim; i++)
h = Mymin(h, GH->PatL[0]->data->blb->data->getdX(i));
dT = Courant * h;
if (checkrun)
{
CheckPoint->read_Black_Hole_position(BH_num_input, BH_num, Porg0, Pmom, Spin, Mass, Porgbr, Porg, Porg1, Porg_rhs);
setpbh(BH_num, Porg0, Mass, BH_num_input);
}
else
{
PhysTime = StartTime;
Setup_Black_Hole_position();
}
// Initialize sync caches (per-level, for predictor and corrector)
sync_cache_pre = new Parallel::SyncCache[GH->levels];
sync_cache_cor = new Parallel::SyncCache[GH->levels];
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
}
//================================================================================================
//================================================================================================
// This member function is the destructor; it releases allocated variables
//================================================================================================
bssn_class::~bssn_class()
{
#ifdef With_AHF
AHList->clearList();
AHDList->clearList();
GaugeList->clearList();
if (lastahdumpid)
delete[] lastahdumpid;
if (findeveryl)
delete[] findeveryl;
if (xc)
{
delete[] xc;
delete[] yc;
delete[] zc;
delete[] xr;
delete[] yr;
delete[] zr;
delete[] trigger;
delete[] dumpid;
delete[] dTT;
}
AHFinderDirect::AHFinderDirect_cleanup();
#endif
StateList->clearList();
RHSList->clearList();
OldStateList->clearList();
SynchList_pre->clearList();
SynchList_cor->clearList();
DumpList->clearList();
ConstraintList->clearList();
delete[] ConstraintRefreshLevels;
delete phio;
delete trKo;
delete gxxo;
delete gxyo;
delete gxzo;
delete gyyo;
delete gyzo;
delete gzzo;
delete Axxo;
delete Axyo;
delete Axzo;
delete Ayyo;
delete Ayzo;
delete Azzo;
delete Gmxo;
delete Gmyo;
delete Gmzo;
delete Lapo;
delete Sfxo;
delete Sfyo;
delete Sfzo;
delete dtSfxo;
delete dtSfyo;
delete dtSfzo;
delete phi0;
delete trK0;
delete gxx0;
delete gxy0;
delete gxz0;
delete gyy0;
delete gyz0;
delete gzz0;
delete Axx0;
delete Axy0;
delete Axz0;
delete Ayy0;
delete Ayz0;
delete Azz0;
delete Gmx0;
delete Gmy0;
delete Gmz0;
delete Lap0;
delete Sfx0;
delete Sfy0;
delete Sfz0;
delete dtSfx0;
delete dtSfy0;
delete dtSfz0;
delete phi;
delete trK;
delete gxx;
delete gxy;
delete gxz;
delete gyy;
delete gyz;
delete gzz;
delete Axx;
delete Axy;
delete Axz;
delete Ayy;
delete Ayz;
delete Azz;
delete Gmx;
delete Gmy;
delete Gmz;
delete Lap;
delete Sfx;
delete Sfy;
delete Sfz;
delete dtSfx;
delete dtSfy;
delete dtSfz;
delete phi1;
delete trK1;
delete gxx1;
delete gxy1;
delete gxz1;
delete gyy1;
delete gyz1;
delete gzz1;
delete Axx1;
delete Axy1;
delete Axz1;
delete Ayy1;
delete Ayz1;
delete Azz1;
delete Gmx1;
delete Gmy1;
delete Gmz1;
delete Lap1;
delete Sfx1;
delete Sfy1;
delete Sfz1;
delete dtSfx1;
delete dtSfy1;
delete dtSfz1;
delete phi_rhs;
delete trK_rhs;
delete gxx_rhs;
delete gxy_rhs;
delete gxz_rhs;
delete gyy_rhs;
delete gyz_rhs;
delete gzz_rhs;
delete Axx_rhs;
delete Axy_rhs;
delete Axz_rhs;
delete Ayy_rhs;
delete Ayz_rhs;
delete Azz_rhs;
delete Gmx_rhs;
delete Gmy_rhs;
delete Gmz_rhs;
delete Lap_rhs;
delete Sfx_rhs;
delete Sfy_rhs;
delete Sfz_rhs;
delete dtSfx_rhs;
delete dtSfy_rhs;
delete dtSfz_rhs;
delete rho;
delete Sx;
delete Sy;
delete Sz;
delete Sxx;
delete Sxy;
delete Sxz;
delete Syy;
delete Syz;
delete Szz;
delete Gamxxx;
delete Gamxxy;
delete Gamxxz;
delete Gamxyy;
delete Gamxyz;
delete Gamxzz;
delete Gamyxx;
delete Gamyxy;
delete Gamyxz;
delete Gamyyy;
delete Gamyyz;
delete Gamyzz;
delete Gamzxx;
delete Gamzxy;
delete Gamzxz;
delete Gamzyy;
delete Gamzyz;
delete Gamzzz;
delete Rxx;
delete Rxy;
delete Rxz;
delete Ryy;
delete Ryz;
delete Rzz;
delete Rpsi4;
delete Ipsi4;
delete t1Rpsi4;
delete t1Ipsi4;
delete t2Rpsi4;
delete t2Ipsi4;
delete Cons_Ham;
delete Cons_Px;
delete Cons_Py;
delete Cons_Pz;
delete Cons_Gx;
delete Cons_Gy;
delete Cons_Gz;
#ifdef Point_Psi4
delete phix;
delete phiy;
delete phiz;
delete trKx;
delete trKy;
delete trKz;
delete Axxx;
delete Axxy;
delete Axxz;
delete Axyx;
delete Axyy;
delete Axyz;
delete Axzx;
delete Axzy;
delete Axzz;
delete Ayyx;
delete Ayyy;
delete Ayyz;
delete Ayzx;
delete Ayzy;
delete Ayzz;
delete Azzx;
delete Azzy;
delete Azzz;
#endif
// Destroy sync caches before GH
if (sync_cache_pre)
{
for (int i = 0; i < GH->levels; i++)
sync_cache_pre[i].destroy();
delete[] sync_cache_pre;
}
if (sync_cache_cor)
{
for (int i = 0; i < GH->levels; i++)
sync_cache_cor[i].destroy();
delete[] sync_cache_cor;
}
if (sync_cache_rp_coarse)
{
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_coarse[i].destroy();
delete[] sync_cache_rp_coarse;
}
if (sync_cache_rp_fine)
{
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_fine[i].destroy();
delete[] sync_cache_rp_fine;
}
delete GH;
#ifdef WithShell
delete SH;
#endif
for (int i = 0; i < BH_num; i++)
{
delete[] Porg0[i];
delete[] Porgbr[i];
delete[] Porg[i];
delete[] Porg1[i];
delete[] Porg_rhs[i];
}
delete[] Porg0;
delete[] Porgbr;
delete[] Porg;
delete[] Porg1;
delete[] Porg_rhs;
delete[] Mass;
delete[] Spin;
delete[] Pmom;
delete ErrorMonitor;
delete Psi4Monitor;
delete BHMonitor;
delete MAPMonitor;
delete ConVMonitor;
delete TimingMonitor;
delete Waveshell;
delete CheckPoint;
}
//================================================================================================
//================================================================================================
// This member function computes initial data using Lousto's analytic method
//================================================================================================
void bssn_class::Setup_Initial_Data_Lousto()
{
if (!checkrun)
{
if (myrank == 0)
{
cout << endl;
cout << " Setup initial data with Lousto's analytical formula. " << endl;
cout << endl;
}
char filename[50];
{
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
if (iter != parameters::str_par.end())
{
strcpy(filename, (iter->second).c_str());
}
else
{
cout << "Error inputpar" << endl;
exit(0);
}
}
int BH_NM;
double *Porg_here, *Pmom_here, *Spin_here, *Mass_here;
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && skey == "BH_num")
{
BH_NM = atoi(sval.c_str());
break;
}
}
inf.close();
}
Porg_here = new double[3 * BH_NM];
Pmom_here = new double[3 * BH_NM];
Spin_here = new double[3 * BH_NM];
Mass_here = new double[BH_NM];
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && sind < BH_NM)
{
if (skey == "Mass")
Mass_here[sind] = atof(sval.c_str());
else if (skey == "Porgx")
Porg_here[sind * 3] = atof(sval.c_str());
else if (skey == "Porgy")
Porg_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Porgz")
Porg_here[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Spinx")
Spin_here[sind * 3] = atof(sval.c_str());
else if (skey == "Spiny")
Spin_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Spinz")
Spin_here[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Pmomx")
Pmom_here[sind * 3] = atof(sval.c_str());
else if (skey == "Pmomy")
Pmom_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Pmomz")
Pmom_here[sind * 3 + 2] = atof(sval.c_str());
}
}
inf.close();
}
// set initial data
for (int lev = 0; lev < GH->levels; lev++)
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
// Use Lousto's analytic formulas to compute initial data
f_get_lousto_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM);
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
}
// dump read_in initial data
for (int lev = 0; lev < GH->levels; lev++)
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT);
#ifdef WithShell
// ShellPatch part
MyList<ss_patch> *Pp = SH->PatL;
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
f_get_initial_nbhs_sh(cg->shape,
cg->fgfs[Pp->data->fngfs + ShellPatch::gx],
cg->fgfs[Pp->data->fngfs + ShellPatch::gy],
cg->fgfs[Pp->data->fngfs + ShellPatch::gz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM);
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
// dump read_in initial data
SH->Dump_Data(StateList, 0, PhysTime, dT);
#endif
delete[] Porg_here;
delete[] Mass_here;
delete[] Pmom_here;
delete[] Spin_here;
// SH->Synch(GH->PatL[0],StateList,Symmetry);
// exit(0);
}
}
//================================================================================================
//================================================================================================
// This member function computes initial data using Cao's analytic formulas
//================================================================================================
void bssn_class::Setup_Initial_Data_Cao()
{
if (!checkrun)
{
if (myrank == 0)
{
cout << endl;
cout << " Setup initial data with Cao's analytical formula. " << endl;
cout << endl;
}
char filename[50];
{
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
if (iter != parameters::str_par.end())
{
strcpy(filename, (iter->second).c_str());
}
else
{
cout << "Error inputpar" << endl;
exit(0);
}
}
int BH_NM;
double *Porg_here, *Pmom_here, *Spin_here, *Mass_here;
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && skey == "BH_num")
{
BH_NM = atoi(sval.c_str());
break;
}
}
inf.close();
}
Porg_here = new double[3 * BH_NM];
Pmom_here = new double[3 * BH_NM];
Spin_here = new double[3 * BH_NM];
Mass_here = new double[BH_NM];
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && sind < BH_NM)
{
if (skey == "Mass")
Mass_here[sind] = atof(sval.c_str());
else if (skey == "Porgx")
Porg_here[sind * 3] = atof(sval.c_str());
else if (skey == "Porgy")
Porg_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Porgz")
Porg_here[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Spinx")
Spin_here[sind * 3] = atof(sval.c_str());
else if (skey == "Spiny")
Spin_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Spinz")
Spin_here[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Pmomx")
Pmom_here[sind * 3] = atof(sval.c_str());
else if (skey == "Pmomy")
Pmom_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Pmomz")
Pmom_here[sind * 3 + 2] = atof(sval.c_str());
}
}
inf.close();
}
// set initial data
for (int lev = 0; lev < GH->levels; lev++)
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
// Use Cao's analytic formulas to compute initial data
f_get_initial_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM);
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
}
// dump read_in initial data
for (int lev = 0; lev < GH->levels; lev++)
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT);
#ifdef WithShell
// ShellPatch part
MyList<ss_patch> *Pp = SH->PatL;
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
f_get_initial_nbhs_sh(cg->shape,
cg->fgfs[Pp->data->fngfs + ShellPatch::gx],
cg->fgfs[Pp->data->fngfs + ShellPatch::gy],
cg->fgfs[Pp->data->fngfs + ShellPatch::gz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM);
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
// dump read_in initial data
SH->Dump_Data(StateList, 0, PhysTime, dT);
#endif
delete[] Porg_here;
delete[] Mass_here;
delete[] Pmom_here;
delete[] Spin_here;
// SH->Synch(GH->PatL[0],StateList,Symmetry);
// exit(0);
}
}
//================================================================================================
//================================================================================================
// This member function computes Kerr-Schild initial data via an analytic method
//================================================================================================
void bssn_class::Setup_KerrSchild()
{
if (!checkrun)
{
if (myrank == 0)
{
cout << endl;
cout << " Setup initial data with Kerr-Schild formula. " << endl;
cout << endl;
}
// set initial data
for (int lev = 0; lev < GH->levels; lev++)
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
f_get_initial_kerrschild(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn]);
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
}
#ifdef WithShell
// ShellPatch part
MyList<ss_patch> *Pp = SH->PatL;
while (Pp)
{
int lev = 0, fngfs = Pp->data->fngfs;
MyList<Block> *BL = Pp->data->blb;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
f_get_initial_kerrschild_ss(cg->shape,
cg->fgfs[Pp->data->fngfs + ShellPatch::gx],
cg->fgfs[Pp->data->fngfs + ShellPatch::gy],
cg->fgfs[Pp->data->fngfs + ShellPatch::gz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn]);
/*
f_fderivs_shc(cg->shape,cg->fgfs[phi0->sgfn],
cg->fgfs[Sfx_rhs->sgfn],
cg->fgfs[Sfy_rhs->sgfn],
cg->fgfs[Sfz_rhs->sgfn],
cg->X[0],cg->X[1],cg->X[2],
phi0->SoA[0],phi0->SoA[1],phi0->SoA[2],
Symmetry,lev,Pp->data->sst,
cg->fgfs[fngfs+ShellPatch::drhodx],
cg->fgfs[fngfs+ShellPatch::drhody],
cg->fgfs[fngfs+ShellPatch::drhodz],
cg->fgfs[fngfs+ShellPatch::dsigmadx],
cg->fgfs[fngfs+ShellPatch::dsigmady],
cg->fgfs[fngfs+ShellPatch::dsigmadz],
cg->fgfs[fngfs+ShellPatch::dRdx],
cg->fgfs[fngfs+ShellPatch::dRdy],
cg->fgfs[fngfs+ShellPatch::dRdz]);
f_fdderivs_shc(cg->shape,cg->fgfs[phi0->sgfn],
cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],
cg->X[0],cg->X[1],cg->X[2],
phi0->SoA[0],phi0->SoA[1],phi0->SoA[2],
Symmetry,lev,Pp->data->sst,
cg->fgfs[fngfs+ShellPatch::drhodx],
cg->fgfs[fngfs+ShellPatch::drhody],
cg->fgfs[fngfs+ShellPatch::drhodz],
cg->fgfs[fngfs+ShellPatch::dsigmadx],
cg->fgfs[fngfs+ShellPatch::dsigmady],
cg->fgfs[fngfs+ShellPatch::dsigmadz],
cg->fgfs[fngfs+ShellPatch::dRdx],
cg->fgfs[fngfs+ShellPatch::dRdy],
cg->fgfs[fngfs+ShellPatch::dRdz],
cg->fgfs[fngfs+ShellPatch::drhodxx],
cg->fgfs[fngfs+ShellPatch::drhodxy],
cg->fgfs[fngfs+ShellPatch::drhodxz],
cg->fgfs[fngfs+ShellPatch::drhodyy],
cg->fgfs[fngfs+ShellPatch::drhodyz],
cg->fgfs[fngfs+ShellPatch::drhodzz],
cg->fgfs[fngfs+ShellPatch::dsigmadxx],
cg->fgfs[fngfs+ShellPatch::dsigmadxy],
cg->fgfs[fngfs+ShellPatch::dsigmadxz],
cg->fgfs[fngfs+ShellPatch::dsigmadyy],
cg->fgfs[fngfs+ShellPatch::dsigmadyz],
cg->fgfs[fngfs+ShellPatch::dsigmadzz],
cg->fgfs[fngfs+ShellPatch::dRdxx],
cg->fgfs[fngfs+ShellPatch::dRdxy],
cg->fgfs[fngfs+ShellPatch::dRdxz],
cg->fgfs[fngfs+ShellPatch::dRdyy],
cg->fgfs[fngfs+ShellPatch::dRdyz],
cg->fgfs[fngfs+ShellPatch::dRdzz]);
*/
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
#endif
// dump read_in initial data
// SH->Synch(GH->PatL[0],StateList,Symmetry);
// for(int lev=0;lev<GH->levels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT);
// SH->Dump_Data(StateList,0,PhysTime,dT);
// exit(0);
/*
{
MyList<var> * DG_List=new MyList<var>(Sfx_rhs);
DG_List->insert(Sfy_rhs);
DG_List->insert(Sfz_rhs);
DG_List->insert(Axx_rhs);
DG_List->insert(Axy_rhs);
DG_List->insert(Axz_rhs);
DG_List->insert(Ayy_rhs);
DG_List->insert(Ayz_rhs);
DG_List->insert(Azz_rhs);
SH->Synch(DG_List,Symmetry);
SH->Dump_Data(DG_List,0,PhysTime,dT);
DG_List->clearList();
exit(0);
}
*/
}
}
//================================================================================================
//================================================================================================
// This member function reads initial data produced by Pablo Galaviz's Olliptic program
//================================================================================================
// Read initial data solved by Pablo's Olliptic Phys.Rev.D 82 024005 (2010)
//|----------------------------------------------------------------------------
// read ASCII file with the style of Pablo
//|----------------------------------------------------------------------------
bool bssn_class::read_Pablo_file(int *ext, double *datain, char *filename)
{
if (myrank == 0)
{
cout << endl;
cout << " Setup initial data with Pablo_file. " << endl;
cout << endl;
}
int nx = ext[0], ny = ext[1], nz = ext[2];
int i, j, k;
double x, y, z;
//|--->open in put file
ifstream infile;
infile.open(filename);
if (!infile)
{
cout << "bssn_class: read_Pablo_file can't open " << filename << " for input." << endl;
return false;
}
for (k = 0; k < nz; k++)
for (j = 0; j < ny; j++)
for (i = 0; i < nx; i++)
{
infile >> x >> y >> z >> datain[i + j * nx + k * nx * ny];
}
infile.close();
return true;
}
//================================================================================================
//================================================================================================
// This member function writes initial data file in the style of Pablo Galaviz's Olliptic program
//================================================================================================
//|----------------------------------------------------------------------------
// write ASCII file with the style of Pablo
//|----------------------------------------------------------------------------
void bssn_class::write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
char *filename)
{
int nx = ext[0], ny = ext[1], nz = ext[2];
int i, j, k;
double *X, *Y, *Z;
X = new double[nx];
Y = new double[ny];
Z = new double[nz];
double dX, dY, dZ;
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
dX = (xmax - xmin) / (nx - 1);
for (i = 0; i < nx; i++)
X[i] = xmin + i * dX;
dY = (ymax - ymin) / (ny - 1);
for (j = 0; j < ny; j++)
Y[j] = ymin + j * dY;
dZ = (zmax - zmin) / (nz - 1);
for (k = 0; k < nz; k++)
Z[k] = zmin + k * dZ;
#else
#ifdef Cell
dX = (xmax - xmin) / nx;
for (i = 0; i < nx; i++)
X[i] = xmin + (i + 0.5) * dX;
dY = (ymax - ymin) / ny;
for (j = 0; j < ny; j++)
Y[j] = ymin + (j + 0.5) * dY;
dZ = (zmax - zmin) / nz;
for (k = 0; k < nz; k++)
Z[k] = zmin + (k + 0.5) * dZ;
#else
#error Not define Vertex nor Cell
#endif
#endif
//|--->open out put file
ofstream outfile;
outfile.open(filename);
if (!outfile)
{
cout << "bssn_class: write_Pablo_file can't open " << filename << " for output." << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
outfile.setf(ios::scientific, ios::floatfield);
outfile.precision(16);
for (k = 0; k < nz; k++)
for (j = 0; j < ny; j++)
for (i = 0; i < nx; i++)
{
outfile << X[i] << " " << Y[j] << " " << Z[k] << " "
<< 0 << endl;
}
outfile.close();
delete[] X;
delete[] Y;
delete[] Z;
}
//================================================================================================
//================================================================================================
// Read initial data solved by Ansorg, PRD 70, 064011 (2004)
void bssn_class::Read_Ansorg()
{
if (!checkrun)
{
if (myrank == 0)
{
cout << endl;
cout << " Read initial data from Ansorg's solver,"
<< " please be sure the input parameters for black holes are puncture parameters!! " << endl;
cout << endl;
}
char filename[50];
{
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
if (iter != parameters::str_par.end())
{
strcpy(filename, (iter->second).c_str());
}
else
{
cout << "Error inputpar" << endl;
exit(0);
}
}
int BH_NM;
double *Porg_here, *Pmom_here, *Spin_here, *Mass_here;
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && skey == "BH_num")
{
BH_NM = atoi(sval.c_str());
break;
}
}
inf.close();
}
Porg_here = new double[3 * BH_NM];
Pmom_here = new double[3 * BH_NM];
Spin_here = new double[3 * BH_NM];
Mass_here = new double[BH_NM];
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && sind < BH_NM)
{
if (skey == "Mass")
Mass_here[sind] = atof(sval.c_str());
else if (skey == "Porgx")
Porg_here[sind * 3] = atof(sval.c_str());
else if (skey == "Porgy")
Porg_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Porgz")
Porg_here[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Spinx")
Spin_here[sind * 3] = atof(sval.c_str());
else if (skey == "Spiny")
Spin_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Spinz")
Spin_here[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Pmomx")
Pmom_here[sind * 3] = atof(sval.c_str());
else if (skey == "Pmomy")
Pmom_here[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Pmomz")
Pmom_here[sind * 3 + 2] = atof(sval.c_str());
}
}
inf.close();
}
int order = 6;
Ansorg read_ansorg("Ansorg.psid", order);
// set initial data
for (int lev = 0; lev < GH->levels; lev++)
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
for (int k = 0; k < cg->shape[2]; k++)
for (int j = 0; j < cg->shape[1]; j++)
for (int i = 0; i < cg->shape[0]; i++)
cg->fgfs[phi0->sgfn][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]] =
read_ansorg.ps_u_at_xyz(cg->X[0][i], cg->X[1][j], cg->X[2][k]);
f_get_ansorg_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM);
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
}
#ifdef WithShell
// ShellPatch part
MyList<ss_patch> *Pp = SH->PatL;
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
for (int k = 0; k < cg->shape[2]; k++)
for (int j = 0; j < cg->shape[1]; j++)
for (int i = 0; i < cg->shape[0]; i++)
cg->fgfs[phi0->sgfn][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]] =
read_ansorg.ps_u_at_xyz(cg->fgfs[Pp->data->fngfs + ShellPatch::gx][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]],
cg->fgfs[Pp->data->fngfs + ShellPatch::gy][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]],
cg->fgfs[Pp->data->fngfs + ShellPatch::gz][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]]);
f_get_ansorg_nbhs_ss(cg->shape,
cg->fgfs[Pp->data->fngfs + ShellPatch::gx],
cg->fgfs[Pp->data->fngfs + ShellPatch::gy],
cg->fgfs[Pp->data->fngfs + ShellPatch::gz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM);
#if 0
// for check fderivs_sh
f_fderivs_sh(cg->shape,cg->fgfs[Ayz0->sgfn],
cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],
cg->X[0],cg->X[1],cg->X[2],
Ayz0->SoA[0],Ayz0->SoA[1],Ayz0->SoA[2],
Symmetry,Pp->data->sst,Pp->data->sst);
#endif
#if 0
// for check fderivs_shc
int fngfs = Pp->data->fngfs;
f_fderivs_shc(cg->shape,cg->fgfs[Ayz0->sgfn],
cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],
cg->X[0],cg->X[1],cg->X[2],
Ayz0->SoA[0],Ayz0->SoA[1],Ayz0->SoA[2],
Symmetry,Pp->data->sst,Pp->data->sst,
cg->fgfs[fngfs+ShellPatch::drhodx],
cg->fgfs[fngfs+ShellPatch::drhody],
cg->fgfs[fngfs+ShellPatch::drhodz],
cg->fgfs[fngfs+ShellPatch::dsigmadx],
cg->fgfs[fngfs+ShellPatch::dsigmady],
cg->fgfs[fngfs+ShellPatch::dsigmadz],
cg->fgfs[fngfs+ShellPatch::dRdx],
cg->fgfs[fngfs+ShellPatch::dRdy],
cg->fgfs[fngfs+ShellPatch::dRdz]);
#endif
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
#endif
delete[] Porg_here;
delete[] Mass_here;
delete[] Pmom_here;
delete[] Spin_here;
if (myrank == 0) cout << " [Read_Ansorg] before Compute_Constraint: " << MPI_Wtime() << " s" << endl;
Compute_Constraint();
if (myrank == 0) cout << " [Read_Ansorg] after Compute_Constraint: " << MPI_Wtime() << " s" << endl;
// dump read_in initial data
for (int lev = 0; lev < GH->levels; lev++)
Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT);
#ifdef WithShell
SH->Dump_Data(DumpList, 0, PhysTime, dT);
#endif
// if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1);
}
}
//================================================================================================
//================================================================================================
// This member function sets up the time evolution for the entire process
//================================================================================================
void bssn_class::Evolve(int Steps)
{
clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0;
#if 0
//initial checkpoint for special uasge
{
CheckPoint->write_Black_Hole_position(BH_num_input,BH_num,Porg0,Porgbr,Mass);
CheckPoint->writecheck_cgh(PhysTime,GH);
#ifdef WithShell
CheckPoint->writecheck_sh(PhysTime,SH);
#endif
CheckPoint->write_bssn(LastDump,Last2dDump,LastAnas);
misc::tillherecheck("complete initialization preparation"); // we need synchronization here
if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1);
}
#endif
// for step 0 constraint interpolation
Interp_Constraint(true);
#ifdef With_AHF
// setup apparent horizon finder direct of thornburg
{
HN_num = BH_num;
for (int ia = 0; ia < BH_num; ia++)
for (int ib = ia + 1; ib < BH_num; ib++)
HN_num++;
AHFinderDirect::AHFinderDirect_setup(AHList, GaugeList,
this,
Symmetry, HN_num, &PhysTime);
lastahdumpid = new int[HN_num];
findeveryl = new int[HN_num];
xc = new double[HN_num];
yc = new double[HN_num];
zc = new double[HN_num];
xr = new double[HN_num];
yr = new double[HN_num];
zr = new double[HN_num];
dTT = new double[HN_num];
trigger = new bool[HN_num];
dumpid = new int[HN_num];
for (int ihn = 0; ihn < HN_num; ihn++)
{
lastahdumpid[ihn] = 0;
findeveryl[ihn] = AHfindevery;
}
}
#endif
if (checkrun)
CheckPoint->read_bssn(LastDump, Last2dDump, LastAnas);
double dT_mon = dT * pow(0.5, Mymax(0, trfls));
/*
#ifdef With_AHF
//initial apparent horizon finding
{
double gam;
double massmin=Mass[0];
for(int ihn=1;ihn<BH_num;ihn++) massmin=Mymin(massmin,Mass[ihn]);
for(int ihn=0;ihn<BH_num;ihn++)
{
xc[ihn] = Porg0[ihn][0];
yc[ihn] = Porg0[ihn][1];
zc[ihn] = Porg0[ihn][2];
gam = fabs(Pmom[ihn*3])/(Mass[ihn]);
gam = sqrt(1-gam*gam);
xr[ihn] = Mass[ihn]*gam;
gam = fabs(Pmom[ihn*3+1])/(Mass[ihn]);
gam = sqrt(1-gam*gam);
yr[ihn] = Mass[ihn]*gam;
gam = fabs(Pmom[ihn*3+2])/(Mass[ihn]);
gam = sqrt(1-gam*gam);
zr[ihn] = Mass[ihn]*gam;
findeveryl[ihn] = findeveryl[ihn]*(Mymax(int(Mass[ihn]/massmin),1));
}
int ihn = BH_num;
for(int ia=0;ia<BH_num;ia++)
for(int ib=ia+1;ib<BH_num;ib++) {
xc[ihn] = (Porg0[ia][0] + Porg0[ib][0])/2;
yc[ihn] = (Porg0[ia][1] + Porg0[ib][1])/2;
zc[ihn] = (Porg0[ia][2] + Porg0[ib][2])/2;
xr[ihn] = yr[ihn] = zr[ihn] = (Mass[ia])+(Mass[ib]);
findeveryl[ihn] = findeveryl[ihn]*(int(xr[ihn]/Mass[0]));
ihn++;
}
AHFinderDirect::AHFinderDirect_enforcefind(HN_num,xc,yc,zc,xr,yr,zr);
// note rhs has been used as temp storage space
delete[] xc; delete[] yc; delete[] zc; delete[] xr; delete[] yr; delete[] zr;
}
#endif
*/
#if BSSN_ENABLE_MEM_USAGE_LOG
perf bssn_perf;
size_t current_min, current_avg, current_max, peak_min, peak_avg, peak_max;
#endif
for (int lev = 0; lev < GH->levels; lev++)
GH->Lt[lev] = PhysTime;
GH->settrfls(trfls);
for (int ncount = 1; ncount < Steps + 1; ncount++)
{
const bool evolve_timing = amss_evolve_timing_enabled();
const double evolve_t0 = evolve_timing ? MPI_Wtime() : 0.0;
if (evolve_timing)
amss_evolve_timing_reset();
cuda_level0_constraint_cache_valid = false;
#if BSSN_FINE_TIMING
step_timing::reset();
#endif
#if BSSN_KERNEL_FINE_TIMING
f_bssn_rhs_kernel_timing_reset();
#endif
#if (BSSN_FINE_TIMING || BSSN_KERNEL_FINE_TIMING)
const double step_wall_start = MPI_Wtime();
#endif
// special for large mass ratio consideration
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
// { GH->levels=GH->movls; }
if (myrank == 0)
curr_clock = clock();
#if (PSTR == 0)
RecursiveStep(0);
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
// data analysis part
// Warning NOTE: the variables1 are used as temp storege room
AnalysisStuff(a_lev, dT_mon);
ParallelStep();
#endif
// misc::tillherecheck("before Constraint_Out");
if (amss_constraint_out_enabled_for_step(ncount))
{
const double constraint_t0 = evolve_timing ? MPI_Wtime() : 0.0;
STEP_TIMER_DECL(timer_constraint_out);
Constraint_Out(); // this will affect the Dump_List
STEP_TIMER_ADD(TB_CONSTRAINT_OUT, timer_constraint_out);
if (evolve_timing)
amss_evolve_timing_add_constraint(MPI_Wtime() - constraint_t0);
}
LastDump += dT_mon;
Last2dDump += dT_mon;
LastCheck += dT_mon;
// When LastDump >= DumpTime, output corresponding binary data
if (LastDump >= DumpTime)
{
STEP_TIMER_DECL(timer_dump3d);
// misc::tillherecheck("before Dump_Data");
for (int lev = 0; lev < GH->levels; lev++)
Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
#ifdef WithShell
SH->Dump_Data(DumpList, 0, PhysTime, dT_mon);
#endif
STEP_TIMER_ADD(TB_DUMP_3D, timer_dump3d);
LastDump = 0;
if (myrank == 0)
{
cout << " Dump done. " << endl;
}
}
// When Last2dDump >= d2DumpTime, output corresponding 2D data
if (Last2dDump >= d2DumpTime)
{
STEP_TIMER_DECL(timer_dump2d);
// misc::tillherecheck("before 2dDump_Data");
for (int lev = 0; lev < GH->levels; lev++)
Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
STEP_TIMER_ADD(TB_DUMP_2D, timer_dump2d);
Last2dDump = 0;
if (myrank == 0)
{
cout << " 2d Dump done. " << endl;
}
}
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << endl;
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
<< " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
// cout << endl;
}
if (PhysTime >= TotalTime)
break;
#if (REGLEV == 1)
STEP_TIMER_DECL(timer_regrid);
#if USE_CUDA_BSSN
for (int il = 0; il < GH->levels; il++)
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
#endif
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
// GH->Regrid_fake(Symmetry,BH_num,Porgbr,Porg0,
// SynchList_cor,OldStateList,StateList,SynchList_pre,
// fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor);
#endif
#if BSSN_ENABLE_MEM_USAGE_LOG
// Retrieve memory usage information used during computation; master process prints it
bssn_perf.MemoryUsage(&current_min, &current_avg, &current_max,
&peak_min, &peak_avg, &peak_max, nprocs);
if (myrank == 0)
{
printf(" Memory usage: current %0.4lg/%0.4lg/%0.4lgMB, "
"peak %0.4lg/%0.4lg/%0.4lgMB\n",
(double)current_min / (1024.0 * 1024.0),
(double)current_avg / (1024.0 * 1024.0),
(double)current_max / (1024.0 * 1024.0),
(double)peak_min / (1024.0 * 1024.0),
(double)peak_avg / (1024.0 * 1024.0),
(double)peak_max / (1024.0 * 1024.0));
cout << endl;
}
#endif
// Output puncture positions at each step
if (myrank == 0)
{
for (int i_count=0; i_count<BH_num; i_count++)
{
cout << " puncture position: no."
<< setw(2) << setfill(' ') << i_count
<< " = (" << Porg0[i_count][0] << " "
<< Porg0[i_count][1] << " "
<< Porg0[i_count][2] << ")"
<< endl;
}
cout << endl;
#if BSSN_ENABLE_STDIN_ABORT_POLL
cout << " If you think the physical evolution time is enough for this simulation, please input 'stop' in the terminal to stop the MPI processes in the next evolution step ! " << endl;
#endif
// cout << endl;
}
#if BSSN_ENABLE_STDIN_ABORT_POLL
////////////////////////////////////////////////////////////
// If an "abort" command is detected on stdin, terminate MPI processes
////////////////////////////////////////////////////////////
bool shouldAbort = false;
// Only rank 0 checks stdin
if (myrank == 0) {
if (check_Stdin_Abort()) {
shouldAbort = true;
}
}
// Broadcast abort flag to all ranks
int abortFlag = shouldAbort ? 1 : 0;
MPI_Bcast(&abortFlag, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (abortFlag && myrank == 0) {
cout << endl;
cout << " Aborting the MPI Process due to 'abort' command !! " << endl;
cout << endl;
MPI_Abort(MPI_COMM_WORLD, 1); // terminate MPI processes
// MPI_Finalize();
}
////////////////////////////////////////////////////////////
#endif
// When LastCheck >= CheckTime, perform runtime checks and output status data
if (LastCheck >= CheckTime)
{
STEP_TIMER_DECL(timer_checkpoint);
LastCheck = 0;
CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass);
CheckPoint->writecheck_cgh(PhysTime, GH);
#ifdef WithShell
CheckPoint->writecheck_sh(PhysTime, SH);
#endif
CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas);
STEP_TIMER_ADD(TB_CHECKPOINT, timer_checkpoint);
}
#if BSSN_FINE_TIMING
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
step_timing::report(myrank, nprocs, TimingMonitor, ncount, PhysTime, MPI_Wtime() - step_wall_start);
#endif
#if BSSN_KERNEL_FINE_TIMING
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
rhs_kernel_timing_report::report(myrank, nprocs, ncount, MPI_Wtime() - step_wall_start);
#endif
if (evolve_timing)
{
const AmssEvolveTimingStats &stats = amss_evolve_timing_stats();
const double local[4] = {stats.step, stats.rp, stats.regrid, stats.constraint};
double maxv[4] = {};
MPI_Reduce((void *)local, maxv, 4, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (myrank == 0)
{
const double wall = MPI_Wtime() - evolve_t0;
const double known = maxv[0] + maxv[1] + maxv[2] + maxv[3];
fprintf(stderr,
"[AMSS-EVOLVE-TIMING] step=%d wall=%.6f step_fn=%.6f rp=%.6f "
"regrid=%.6f constraint=%.6f other=%.6f\n",
ncount, wall, maxv[0], maxv[1], maxv[2], maxv[3], wall - known);
}
}
}
/*
#ifdef With_AHF
// final apparent horizon finding
{
double gam;
for(int ihn=0;ihn<BH_num;ihn++)
{
xc[ihn] = Porg0[ihn][0];
yc[ihn] = Porg0[ihn][1];
zc[ihn] = Porg0[ihn][2];
gam = fabs(Pmom[ihn*3] )/(Mass[ihn]);
gam = sqrt(1-gam*gam);
xr[ihn] = Mass[ihn]*gam;
gam = fabs(Pmom[ihn*3+1])/(Mass[ihn]);
gam = sqrt(1-gam*gam);
yr[ihn] = Mass[ihn]*gam;
gam = fabs(Pmom[ihn*3+2])/(Mass[ihn]);
gam = sqrt(1-gam*gam);
zr[ihn] = Mass[ihn]*gam;
}
int ihn = BH_num;
for(int ia=0;ia<BH_num;ia++)
for(int ib=ia+1;ib<BH_num;ib++)
{
xc[ihn] = (Porg0[ia][0] + Porg0[ib][0])/2;
yc[ihn] = (Porg0[ia][1] + Porg0[ib][1])/2;
zc[ihn] = (Porg0[ia][2] + Porg0[ib][2])/2;
xr[ihn] = yr[ihn] = zr[ihn] = (Mass[ia])+(Mass[ib]);
ihn++;
}
AHFinderDirect::AHFinderDirect_enforcefind(HN_num,xc,yc,zc,xr,yr,zr);
// note rhs has been used as temp storage space
delete[] xc; delete[] yc; delete[] zc; delete[] xr; delete[] yr; delete[] zr;
}
#endif
*/
}
//================================================================================================
//================================================================================================
// This member function implements recursive time-stepping across AMR levels
// This variant is for the case PSTR == 0
//================================================================================================
#if (PSTR == 0)
void bssn_class::RecursiveStep(int lev)
{
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
int NoIterations = 1, YN;
if (lev <= trfls)
NoIterations = 1;
else
NoIterations = 2;
for (int i = 0; i < NoIterations; i++)
{
// if(myrank==0) cout<<"level now = "<<lev<<" NoIteration = "<<i<<endl;
YN = (i == NoIterations - 1) ? 1 : 0; // 1: same time level for coarse level and fine level
const bool evolve_timing = amss_evolve_timing_enabled();
const double step_t0 = evolve_timing ? MPI_Wtime() : 0.0;
Step(lev, YN);
if (evolve_timing)
amss_evolve_timing_add_step(MPI_Wtime() - step_t0);
#if (AGM == 2)
if (GH->levels == 1)
{
Enforce_algcon(lev, 0);
}
#endif
GH->Lt[lev] += dT_lev;
if (lev < GH->levels - 1)
{
int lf = lev + 1;
RecursiveStep(lf);
}
else
PhysTime += dT * pow(0.5, lev);
#if (AGM == 2)
if (lev > 0)
{
Enforce_algcon(lev, 0);
if (YN == 1)
Enforce_algcon(lev - 1, 0);
}
#endif
#if (RPS == 1)
// mesh refinement boundary part
//
// till here the PhysTime has updated dT_lev
// if(myrank==0) cout<<"level now = "<<lev<<", "<<fgt(PhysTime-dT_lev,StartTime,dT_lev/2)<<endl;
const double rp_t0 = evolve_timing ? MPI_Wtime() : 0.0;
RestrictProlong(lev, YN, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor);
if (evolve_timing)
amss_evolve_timing_add_rp(MPI_Wtime() - rp_t0);
// RestrictProlong(lev,YN,false,StateList,OldStateList,SynchList_cor);
#ifdef WithShell
#if USE_CUDA_BSSN
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->CS_Inter(StateList, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl;
}
}
#endif
#endif
// Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev);
}
#if 0
if(lev>0) Parallel::Restrict_after(GH->PatL[lev-1],GH->PatL[lev],StateList,StateList,Symmetry);
#endif
#if (REGLEV == 0)
const bool evolve_timing = amss_evolve_timing_enabled();
const double regrid_t0 = evolve_timing ? MPI_Wtime() : 0.0;
STEP_TIMER_DECL(timer_regrid_onelevel);
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
{
if (ConstraintRefreshLevels)
ConstraintRefreshLevels[lev] = 1;
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
if (evolve_timing)
amss_evolve_timing_add_regrid(MPI_Wtime() - regrid_t0);
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
#endif
}
//================================================================================================
//================================================================================================
// This member function implements recursive time-stepping across AMR levels
// This variant handles the cases PSTR == 1 and PSTR == 2
//================================================================================================
#elif (PSTR == 1 || PSTR == 2)
void bssn_class::RecursiveStep(int lev)
{
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
int NoIterations = 1, YN;
if (lev <= trfls)
NoIterations = 1;
else
NoIterations = 2;
for (int i = 0; i < NoIterations; i++)
{
// if(myrank==0) cout<<"level now = "<<lev<<" NoIteration = "<<i<<endl;
YN = (i == NoIterations - 1) ? 1 : 0; // 1: same time level for coarse level and fine level
if (GH->mylev == lev)
{
Step(lev, YN);
#if (AGM == 2)
if (GH->levels == 1)
{
Enforce_algcon(lev, 0);
}
else if (lev > 0)
{
Enforce_algcon(lev, 0);
if (YN == 1)
Enforce_algcon(lev - 1, 0);
}
#endif
}
if (GH->mylev == lev || GH->mylev == lev - 1)
{
#if (RPS == 1)
// mesh refinement boundary part
//
// till here the PhysTime has updated dT_lev
RestrictProlong(lev, YN, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor);
#endif
}
GH->Lt[lev] += dT_lev;
PhysTime += dT * pow(0.5, lev);
}
if (lev > 0)
{
int lc = lev - 1;
RecursiveStep(lc);
}
}
//================================================================================================
//================================================================================================
// ParallelStep performs time evolution across AMR levels (parallelized)
//================================================================================================
#if 1
void bssn_class::ParallelStep()
{
int YN, NoIterations = 1;
if (GH->mylev <= trfls)
NoIterations = 1;
else
NoIterations = int(pow(2.0, GH->mylev - trfls));
double dT_lev = dT * pow(0.5, Mymax(GH->mylev, trfls));
for (int i = 0; i < NoIterations; i++)
{
YN = (i % 2) ? 1 : 0; // 1: same time level for coarse level and fine level
Step(GH->mylev, YN);
#if (RPS == 1)
// mesh refinement boundary part
//
// till here the PhysTime has updated dT_lev
if (GH->mylev < GH->levels - 1)
{
RestrictProlong(GH->mylev + 1, 0, fgt(PhysTime - dT_lev / 2, StartTime, dT_lev / 4), StateList, OldStateList, SynchList_cor);
RestrictProlong(GH->mylev + 1, 1, fgt(PhysTime - dT_lev / 2, StartTime, dT_lev / 4), StateList, OldStateList, SynchList_cor);
}
if (GH->mylev > 0)
RestrictProlong(GH->mylev, YN, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor);
#endif
#ifdef WithShell
#if USE_CUDA_BSSN
{
const int lev0 = 0;
if (bssn_cuda_use_resident_sync(lev0))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
}
#endif
SHStep();
#if (RPS == 1)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->CS_Inter(StateList, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl;
}
}
#endif
#endif
PhysTime += dT_lev;
}
// stringstream a_stream;
// a_stream.setf(ios::left);
double *tporg, *tporgo;
tporg = new double[3 * BH_num];
tporgo = new double[3 * BH_num];
{
int lev = GH->mylev;
MPI_Status status;
// receive
if (lev < GH->levels - 1)
{
if (myrank == GH->start_rank[lev])
{
MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev + 1], 1, MPI_COMM_WORLD, &status);
// cout<<tporgo[0]<<","<<tporgo[1]<<","<<tporgo[2]<<endl;
}
else
for (int i = 0; i < 3 * BH_num; i++)
tporgo[i] = 0;
MPI_Allreduce(tporgo, tporg, 3 * BH_num, MPI_DOUBLE, MPI_SUM, GH->Commlev[lev]);
for (int i = 0; i < BH_num; i++)
for (int j = 0; j < 3; j++)
Porg0[i][j] = tporg[3 * i + j];
// if(myrank==GH->start_rank[lev]) cout<<Porg0[0][0]<<","<<Porg0[0][1]<<","<<Porg0[0][2]<<endl;
}
// send
if (lev > 0 && myrank == GH->start_rank[lev])
{
for (int i = 0; i < BH_num; i++)
for (int j = 0; j < 3; j++)
tporg[3 * i + j] = Porg0[i][j];
MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev - 1], 1, MPI_COMM_WORLD);
}
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after PBH Sync";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
}
delete[] tporg;
delete[] tporgo;
#if (REGLEV == 0)
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, GH->mylev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[GH->mylev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
{
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
#endif
}
//================================================================================================
//================================================================================================
// ParallelStep performs time evolution across AMR levels (parallelized)
// This is an alternate implementation
//================================================================================================
#else
void bssn_class::ParallelStep()
{
// stringstream a_stream;
// a_stream.setf(ios::left);
double *tporg, *tporgo;
tporg = new double[3 * BH_num];
tporgo = new double[3 * BH_num];
int lev = GH->mylev;
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
double dT_levp1 = dT * pow(0.5, Mymax(lev + 1, trfls));
double dT_levm1 = dT * pow(0.5, Mymax(lev - 1, trfls));
int NoIterations = 1, YN;
if (lev <= trfls)
NoIterations = 1;
else
NoIterations = int(pow(2.0, lev - trfls));
for (int i = 0; i < NoIterations; i++)
{
// if(myrank==GH->start_rank[lev]) cout<<"level now = "<<lev<<" NoIteration = "<<i<<endl;
if (NoIterations == 1)
YN = 1;
else if (i % 2 == 0)
YN = 0;
else
YN = 1; // 1: same time level for coarse level and fine level
// a_stream<<lev<<": before calling Step";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
Step(lev, YN);
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after calling Step";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
#if (AGM == 2)
if (GH->levels == 1)
{
Enforce_algcon(lev, 0);
}
#endif
GH->Lt[lev] += dT_lev;
PhysTime += dT_lev;
#if (AGM == 2)
if (lev > 0)
{
Enforce_algcon(lev, 0);
if (YN == 1)
Enforce_algcon(lev - 1, 0);
}
#endif
#if (RPS == 1)
// mesh refinement boundary part
//
// till here the PhysTime has updated dT_lev
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": before RestrictProlong";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
if (lev < GH->levels - 1)
{
if (lev + 1 <= trfls)
{
// RestrictProlong_aux(lev,1,fgt(PhysTime-dT_lev,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor);
RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_lev, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor);
}
else
{
// if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<", "<<YN<<endl;
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"between RestrictProlong");
// RestrictProlong_aux(lev,0,fgt(PhysTime-dT_lev,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor);
// RestrictProlong_aux(lev,1,fgt(PhysTime-dT_levp1,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor);
RestrictProlong(lev + 1, 0, fgt(PhysTime - dT_lev, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor);
RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor);
}
}
// if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<", "<<YN<<endl;
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": between RestrictProlong";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
RestrictProlong(lev, YN, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor);
// RestrictProlong(lev,YN,false,StateList,OldStateList,SynchList_cor);
// if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<endl;
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after RestrictProlong";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
#endif
// Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev);
{
MPI_Status status;
// receive
if (lev < GH->levels - 1)
{
if (myrank == GH->start_rank[lev])
{
MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev + 1], 1, MPI_COMM_WORLD, &status);
// cout<<tporgo[0]<<","<<tporgo[1]<<","<<tporgo[2]<<endl;
}
else
for (int i = 0; i < 3 * BH_num; i++)
tporgo[i] = 0;
MPI_Allreduce(tporgo, tporg, 3 * BH_num, MPI_DOUBLE, MPI_SUM, GH->Commlev[lev]);
for (int i = 0; i < BH_num; i++)
for (int j = 0; j < 3; j++)
Porg0[i][j] = tporg[3 * i + j];
// if(myrank==GH->start_rank[lev]) cout<<Porg0[0][0]<<","<<Porg0[0][1]<<","<<Porg0[0][2]<<endl;
}
// send
if (lev > 0 && YN == 1 && myrank == GH->start_rank[lev])
{
for (int i = 0; i < BH_num; i++)
for (int j = 0; j < 3; j++)
tporg[3 * i + j] = Porg0[i][j];
MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev - 1], 1, MPI_COMM_WORLD);
}
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after PBH Sync";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
}
#if (REGLEV == 0)
// for higher level
if (lev < GH->levels - 1)
{
if (lev + 1 >= GH->movls)
{
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev + 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev + 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
{
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after calling GH->Regrid_Onelevel_aux for higher level";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
}
}
// for this level
if (YN == 1)
{
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
{
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after calling GH->Regrid_Onelevel";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
}
// for lower level
if (lev - 1 >= GH->movls)
{
if (lev - 1 <= trfls)
{
if (YN == 1)
{
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
{
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after calling GH->Regrid_Onelevel_aux for lower level";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
}
}
else
{
if (i % 4 == 3)
{
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
{
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after calling GH->Regrid_Onelevel_aux for lower level";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
}
}
}
#endif
}
#ifdef WithShell
#if USE_CUDA_BSSN
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
SHStep();
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after calling SHStep";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
#if (RPS == 1)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->CS_Inter(StateList, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
// a_stream.clear();
// a_stream.str("");
// a_stream<<lev<<": after calling shell cartisian sync";
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str());
}
#endif
#endif
#if 0
if(lev>0) Parallel::Restrict_after(GH->PatL[lev-1],GH->PatL[lev],StateList,StateList,Symmetry);
#endif
delete[] tporg;
delete[] tporgo;
}
#endif
//================================================================================================
//================================================================================================
// ParallelStep performs time evolution across AMR levels (parallelized)
// This is another implementation, for the case PSTR == 3
//================================================================================================
#elif (PSTR == 3)
#warning "remember do not use Shell"
void bssn_class::ParallelStep()
{
// stringstream a_stream;
// a_stream.setf(ios::left);
double *tporg, *tporgo;
tporg = new double[3 * BH_num];
tporgo = new double[3 * BH_num];
int lev = GH->mylev;
double dT_lev = dT * pow(0.5, Mymax(GH->levels - 1, trfls));
if (lev == 1)
{
lev = GH->levels - 1;
for (int i = 0; i < misc::MYpow2(lev); i++)
{
Step(lev, i % 2);
PhysTime += dT_lev;
// if(myrank==nprocs-1) cout<<"OOO level now = "<<lev<<", "<<i%2<<", "<<fgt(PhysTime-dT_lev,StartTime,dT_lev/2)<<endl;
RestrictProlong(lev, i % 2, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor);
}
}
else
{
lev = GH->levels - 2;
for (int i = 1; i < misc::MYpow2(lev + 1); i++)
{
RecursiveStep(lev, i);
PhysTime += dT_lev;
if (i % 2 == 0)
{
// if(myrank==0) cout<<"level now = "<<lev+1<<", "<<fgt(PhysTime-dT_lev,StartTime,dT_lev/2)<<endl;
RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor);
}
}
PhysTime += dT_lev;
// if(myrank==0) cout<<"level now = "<<lev+1<<", "<<fgt(PhysTime-dT_lev,StartTime,dT_lev/2)<<endl;
RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor);
}
lev = GH->mylev;
if (lev == -1)
lev = 0;
else
lev = GH->levels - 1;
{
MPI_Status status;
// receive
if (lev == 0)
{
if (myrank == GH->start_rank[lev])
{
MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[GH->levels - 1], 1, MPI_COMM_WORLD, &status);
// cout<<tporgo[0]<<","<<tporgo[1]<<","<<tporgo[2]<<endl;
}
else
for (int i = 0; i < 3 * BH_num; i++)
tporgo[i] = 0;
MPI_Allreduce(tporgo, tporg, 3 * BH_num, MPI_DOUBLE, MPI_SUM, GH->Commlev[lev]);
for (int i = 0; i < BH_num; i++)
for (int j = 0; j < 3; j++)
Porg0[i][j] = tporg[3 * i + j];
// if(myrank==GH->start_rank[lev]) cout<<Porg0[0][0]<<","<<Porg0[0][1]<<","<<Porg0[0][2]<<endl;
}
// send
else if (myrank == GH->start_rank[lev])
{
for (int i = 0; i < BH_num; i++)
for (int j = 0; j < 3; j++)
tporg[3 * i + j] = Porg0[i][j];
MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[0], 1, MPI_COMM_WORLD);
}
}
delete[] tporg;
delete[] tporgo;
}
//================================================================================================
//================================================================================================
// This member function implements recursive time-stepping across AMR levels
//================================================================================================
void bssn_class::RecursiveStep(int lev, int num) // in all 2^(lev+1)-1 steps
{
if (trfls > 0)
cout << "error: bssn_class::RecursiveStep does not support trfls > 0 yet" << endl;
if (num / 2 * 2 == num)
RecursiveStep(lev - 1, num / 2);
else
{
Step(lev, 0);
double dT_lev = dT * pow(0.5, Mymax(lev + 1, trfls));
if (myrank == 0)
cout << "level now = " << lev + 1 << ", " << (num - 1) % 2 << ", "
<< fgt(PhysTime - dT_lev, StartTime, dT_lev / 2) << endl;
RestrictProlong(lev + 1, (num - 1) % 2, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor);
}
}
#endif
//================================================================================================
//================================================================================================
// This member function configures a single time-step evolution for each grid level.
// Applicable for the case PSTR == 0
//================================================================================================
#if (PSTR == 0)
#if 1
void bssn_class::Step(int lev, int YN)
{
setpbh(BH_num, Porg0, Mass, BH_num_input);
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
#if USE_CUDA_BSSN
const bool use_cuda_resident_sync = bssn_cuda_use_resident_sync(lev);
#else
const bool use_cuda_resident_sync = false;
#endif
const bool need_cuda_level0_constraint_cache =
(lev == 0) && (LastConsOut + dT * pow(0.5, Mymax(0, trfls)) >= AnasTime);
// new code 2013-2-15, zjcao
#if (MAPBH == 1)
STEP_TIMER_DECL(timer_bh_predictor);
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
for (int ith = 0; ith < 3; ith++)
Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev;
if (Symmetry > 0)
Porg1[ithBH][2] = fabs(Porg1[ithBH][2]);
if (Symmetry == 2)
{
Porg1[ithBH][0] = fabs(Porg1[ithBH][0]);
Porg1[ithBH][1] = fabs(Porg1[ithBH][1]);
}
if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2]))
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "predictor step finds NaN for BH's position from ("
<< Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl;
MyList<var> *DG_List = new MyList<var>(Sfx0);
DG_List->insert(Sfx0);
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
// data analysis part
// Warning NOTE: the variables1 are used as temp storege room
if (lev == a_lev)
{
STEP_TIMER_DECL(timer_analysis_surface);
AnalysisStuff(lev, dT_lev);
STEP_TIMER_ADD(TB_ANALYSIS_SURFACE, timer_analysis_surface);
}
#endif
#ifdef With_AHF
AH_Step_Find(lev, dT_lev);
#endif
bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
double ndeps = numepss;
if (lev < GH->movls)
ndeps = numepsb;
double TRK4 = PhysTime;
int iter_count = 0; // count RK4 substeps
int pre = 0, cor = 1;
int ERROR = 0;
MyList<ss_patch> *sPp;
// Predictor
STEP_TIMER_DECL(timer_predictor_rhs);
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
if (!use_cuda_resident_sync)
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
bool used_gpu_substep = false;
bool used_gpu_resident_state = false;
#if USE_CUDA_BSSN
{
double *state_in[BSSN_CUDA_STATE_COUNT];
double *state_out[BSSN_CUDA_STATE_COUNT];
double *matter[BSSN_CUDA_MATTER_COUNT] = {
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn]};
double propspeed[BSSN_CUDA_STATE_COUNT];
double soa_flat[3 * BSSN_CUDA_STATE_COUNT];
if (!fill_bssn_cuda_views(cg, StateList, state_in, propspeed, soa_flat) ||
!fill_bssn_cuda_views(cg, SynchList_pre, state_out))
{
cout << "CUDA BSSN state list mismatch on predictor step" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
int apply_bam_bc = 0;
// bssn_class does not evolve matter source fields; they remain zero-initialized.
int use_zero_matter = 1;
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
int apply_enforce_ga = 0;
#if (AGM == 0)
apply_enforce_ga = 1;
#endif
#if (SommerType == 0)
#ifndef WithShell
apply_bam_bc = (lev == 0) ? 1 : 0;
#endif
#endif
if (bssn_cuda_rk4_substep(cg,
cg->shape, cg->X[0], cg->X[1], cg->X[2],
state_in, state_out, matter,
propspeed, soa_flat, Pp->data->bbox,
dT_lev, TRK4, iter_count, apply_bam_bc,
Symmetry, lev, ndeps, pre,
use_zero_matter,
keep_resident_state, apply_enforce_ga, chitiny))
{
cout << "CUDA predictor substep failed in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
if (need_cuda_level0_constraint_cache)
{
double *constraint_out[7] = {
cg->fgfs[Cons_Ham->sgfn], cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn]};
if (bssn_cuda_download_constraint_outputs(cg->shape, constraint_out))
{
cout << "CUDA predictor constraint download failed in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
else
cuda_level0_constraint_cache_valid = true;
}
used_gpu_substep = true;
used_gpu_resident_state = (keep_resident_state != 0);
}
#endif
if (!used_gpu_substep)
{
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
}
// rk4 substep boundary fix
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here
while (varl0)
{
#if !USE_CUDA_BSSN
#if (SommerType == 0)
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
cg->fgfs[varl0->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
#endif
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#endif
if (!used_gpu_substep)
{
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
}
#if (SommerType == 1)
#warning "shell part still bam type"
if (lev == 0) // Shibata type sommerfeld
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, pre);
#endif
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
}
}
if (!used_gpu_substep)
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
#ifdef WithShell
#if USE_CUDA_BSSN
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
// evolve Shell Patches
if (lev == 0)
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, pre))
{
cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
// sommerfeld indeed for outter boudary while fix BD for inner boundary
f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
cg->fgfs[varl0->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
#if 0
// check rhs
{
SH->Dump_Data(RHSList,0,PhysTime,dT_lev);
if(myrank == 0)
{
cout<<"check rhs"<<endl;
MPI_Abort(MPI_COMM_WORLD,1);
}
}
#endif
}
STEP_TIMER_ADD(TB_PREDICTOR_RHS, timer_predictor_rhs);
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
}
#endif
STEP_TIMER_DECL(timer_predictor_sync);
Parallel::AsyncSyncState async_pre;
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#endif
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#endif
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
#if USE_CUDA_BSSN
const bool need_analysis_state_after_predictor =
(lev == a_lev) && (LastAnas + dT_lev >= AnasTime);
if (use_cuda_resident_sync && need_analysis_state_after_predictor)
bssn_cuda_download_level_state(GH->PatL[lev], SynchList_pre, myrank, false);
#endif
#ifdef WithShell
// Complete non-blocking error reduction and check
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
#if (MAPBH == 0)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count);
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count);
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count);
if (Symmetry > 0)
Porg[ithBH][2] = fabs(Porg[ithBH][2]);
if (Symmetry == 2)
{
Porg[ithBH][0] = fabs(Porg[ithBH][0]);
Porg[ithBH][1] = fabs(Porg[ithBH][1]);
}
if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2]))
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "predictor step finds NaN for BH's position from ("
<< Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl;
MyList<var> *DG_List = new MyList<var>(Sfx0);
DG_List->insert(Sfx0);
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
// data analysis part
// Warning NOTE: the variables1 are used as temp storege room
if (lev == a_lev)
{
#if USE_CUDA_BSSN
const bool need_analysis_state_before_predictor =
(LastAnas + dT_lev >= AnasTime);
if (use_cuda_resident_sync && need_analysis_state_before_predictor)
bssn_cuda_download_level_state(GH->PatL[lev], StateList, myrank, false);
#endif
AnalysisStuff(lev, dT_lev);
}
#endif
// corrector
for (iter_count = 1; iter_count < 4; iter_count++)
{
STEP_TIMER_DECL(timer_corrector_rhs);
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
if (iter_count == 1 || iter_count == 3)
TRK4 += dT_lev / 2;
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
if (!use_cuda_resident_sync)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#elif (AGM == 1)
if (iter_count == 3 && !use_cuda_resident_sync)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
bool used_gpu_substep = false;
bool used_gpu_resident_state = false;
#if USE_CUDA_BSSN
{
double *state_in[BSSN_CUDA_STATE_COUNT];
double *state_out[BSSN_CUDA_STATE_COUNT];
double *matter[BSSN_CUDA_MATTER_COUNT] = {
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn]};
double propspeed[BSSN_CUDA_STATE_COUNT];
double soa_flat[3 * BSSN_CUDA_STATE_COUNT];
if (!fill_bssn_cuda_views(cg, SynchList_pre, state_in, propspeed, soa_flat) ||
!fill_bssn_cuda_views(cg, SynchList_cor, state_out))
{
cout << "CUDA BSSN state list mismatch on corrector step" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
int apply_bam_bc = 0;
// bssn_class does not evolve matter source fields; they remain zero-initialized.
int use_zero_matter = 1;
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
int apply_enforce_ga = 0;
#if (AGM == 0)
apply_enforce_ga = 1;
#elif (AGM == 1)
apply_enforce_ga = use_cuda_resident_sync ? 1 : ((iter_count == 3) ? 1 : 0);
#endif
#if (SommerType == 0)
#ifndef WithShell
apply_bam_bc = (lev == 0) ? 1 : 0;
#endif
#endif
if (bssn_cuda_rk4_substep(cg,
cg->shape, cg->X[0], cg->X[1], cg->X[2],
state_in, state_out, matter,
propspeed, soa_flat, Pp->data->bbox,
dT_lev, TRK4, iter_count, apply_bam_bc,
Symmetry, lev, ndeps, cor,
use_zero_matter,
keep_resident_state, apply_enforce_ga, chitiny))
{
cout << "CUDA corrector substep failed in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
used_gpu_substep = true;
used_gpu_resident_state = (keep_resident_state != 0);
}
#endif
if (!used_gpu_substep)
{
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
cg->fgfs[Lap->sgfn],
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
cg->fgfs[Lap1->sgfn],
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, cor))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
}
// rk4 substep boundary fix
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
#if !USE_CUDA_BSSN
#if (SommerType == 0)
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varl->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
#endif
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#endif
if (!used_gpu_substep)
{
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
}
#if (SommerType == 1)
if (lev == 1) // shibata type sommerfeld
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#endif
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
}
}
if (!used_gpu_substep)
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
#ifdef WithShell
#if USE_CUDA_BSSN
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
// evolve Shell Patches
if (lev == 0)
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#elif (AGM == 1)
if (iter_count == 3)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
cg->fgfs[Lap->sgfn],
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
cg->fgfs[Lap1->sgfn],
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
cg->fgfs[rho->sgfn],
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, cor))
{
cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
// sommerfeld indeed for outter boudary while fix BD for inner boundary
f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varl->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req_cor;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
}
#endif
STEP_TIMER_ADD(TB_CORRECTOR_RHS, timer_corrector_rhs);
STEP_TIMER_DECL(timer_corrector_sync);
Parallel::AsyncSyncState async_cor;
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#endif
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#endif
STEP_TIMER_ADD(TB_CORRECTOR_SYNC, timer_corrector_sync);
#ifdef WithShell
// Complete non-blocking error reduction and check
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
#if (MAPBH == 0)
STEP_TIMER_DECL(timer_bh_corrector);
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count);
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count);
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count);
if (Symmetry > 0)
Porg1[ithBH][2] = fabs(Porg1[ithBH][2]);
if (Symmetry == 2)
{
Porg1[ithBH][0] = fabs(Porg1[ithBH][0]);
Porg1[ithBH][1] = fabs(Porg1[ithBH][1]);
}
if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2]))
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from ("
<< Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2]
<< ")" << endl;
MyList<var> *DG_List = new MyList<var>(Sfx0);
DG_List->insert(Sfx0);
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
STEP_TIMER_ADD(TB_BH_CORRECTOR, timer_bh_corrector);
#endif
// swap time level
if (iter_count < 3)
{
STEP_TIMER_DECL(timer_state_swap);
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(SynchList_pre, SynchList_cor, myrank);
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#ifdef WithShell
if (lev == 0)
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(SynchList_pre, SynchList_cor, myrank);
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
#endif
#if (MAPBH == 0)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
Porg[ithBH][0] = Porg1[ithBH][0];
Porg[ithBH][1] = Porg1[ithBH][1];
Porg[ithBH][2] = Porg1[ithBH][2];
}
}
#endif
STEP_TIMER_ADD(TB_STATE_SWAP, timer_state_swap);
}
}
#if USE_CUDA_BSSN
if (use_cuda_resident_sync)
{
if (!bssn_cuda_keep_resident_after_step(lev, trfls, a_lev))
bssn_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true);
}
#endif
#if (RPS == 0)
// mesh refinement boundary part
RestrictProlong(lev, YN, BB);
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->CS_Inter(SynchList_cor, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
#endif
// note the data structure before update
// SynchList_cor 1 -----------
//
// StateList 0 -----------
//
// OldStateList old -----------
// update
STEP_TIMER_DECL(timer_state_commit);
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(StateList, SynchList_cor, myrank);
cg->swapList(OldStateList, SynchList_cor, myrank);
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#ifdef WithShell
if (lev == 0)
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(StateList, SynchList_cor, myrank);
cg->swapList(OldStateList, SynchList_cor, myrank);
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
#if 0
// check StateList
{
SH->Dump_Data(StateList,0,PhysTime,dT_lev);
if(myrank == 0)
{
cout<<"check StateList"<<endl;
MPI_Abort(MPI_COMM_WORLD,1);
}
}
#endif
}
#endif
STEP_TIMER_ADD(TB_STATE_SWAP, timer_state_commit);
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
Porg0[ithBH][0] = Porg1[ithBH][0];
Porg0[ithBH][1] = Porg1[ithBH][1];
Porg0[ithBH][2] = Porg1[ithBH][2];
}
}
}
//================================================================================================
//================================================================================================
// This member function implements single-step time evolution for each AMR level (alternate)
//================================================================================================
// ICN for bam comparison
#else
void bssn_class::Step(int lev, int YN)
{
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
#ifdef With_AHF
AH_Step_Find(lev, dT_lev);
#endif
bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
double ndeps = numepss;
if (lev < GH->movls)
ndeps = numepsb;
double TRK4 = PhysTime;
int iter_count = 0; // count RK4 substeps
int pre = 0, cor = 1;
int ERROR = 0;
MyList<ss_patch> *sPp;
// Predictor
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
f_icn_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
#ifdef WithShell
#if USE_CUDA_BSSN
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
// evolve Shell Patches
if (lev == 0)
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, pre))
{
cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here
while (varl0)
{
// sommerfeld indeed for outter boudary while fix BD for inner boundary
f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
cg->fgfs[varl0->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
f_icn_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
#if 0
// check rhs
{
SH->Dump_Data(RHSList,0,PhysTime,dT_lev);
if(myrank == 0)
{
cout<<"check rhs"<<endl;
MPI_Abort(MPI_COMM_WORLD,1);
}
}
#endif
}
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
}
#endif
Parallel::AsyncSyncState async_pre;
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#endif
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#endif
#ifdef WithShell
// Complete non-blocking error reduction and check
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count);
f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count);
f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count);
if (Symmetry > 0)
Porg[ithBH][2] = fabs(Porg[ithBH][2]);
if (Symmetry == 2)
{
Porg[ithBH][0] = fabs(Porg[ithBH][0]);
Porg[ithBH][1] = fabs(Porg[ithBH][1]);
}
if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2]))
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "predictor step finds NaN for BH's position from ("
<< Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2]
<< ")" << endl;
MyList<var> *DG_List = new MyList<var>(Sfx0);
DG_List->insert(Sfx0);
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
// data analysis part
// Warning NOTE: the variables1 are used as temp storege room
if (lev == a_lev)
{
AnalysisStuff(lev, dT_lev);
}
// corrector
for (iter_count = 1; iter_count < 3; iter_count++)
{
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#elif (AGM == 1)
if (iter_count == 3)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
cg->fgfs[Lap->sgfn],
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
cg->fgfs[Lap1->sgfn],
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
cg->fgfs[rho->sgfn],
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, cor))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varl->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
f_icn_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
#ifdef WithShell
#if USE_CUDA_BSSN
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
// evolve Shell Patches
if (lev == 0)
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#elif (AGM == 1)
if (iter_count == 3)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
cg->fgfs[Lap->sgfn],
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
cg->fgfs[Lap1->sgfn],
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
cg->fgfs[rho->sgfn],
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, cor))
{
cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
// sommerfeld indeed for outter boudary while fix BD for inner boundary
f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varl->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req_cor;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
}
#endif
Parallel::AsyncSyncState async_cor;
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#endif
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#endif
#ifdef WithShell
// Complete non-blocking error reduction and check
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count);
f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count);
f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count);
if (Symmetry > 0)
Porg1[ithBH][2] = fabs(Porg1[ithBH][2]);
if (Symmetry == 2)
{
Porg1[ithBH][0] = fabs(Porg1[ithBH][0]);
Porg1[ithBH][1] = fabs(Porg1[ithBH][1]);
}
if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2]))
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from ("
<< Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2]
<< ")" << endl;
MyList<var> *DG_List = new MyList<var>(Sfx0);
DG_List->insert(Sfx0);
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
// swap time level
if (iter_count < 3)
{
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(SynchList_pre, SynchList_cor, myrank);
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#ifdef WithShell
if (lev == 0)
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(SynchList_pre, SynchList_cor, myrank);
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
#endif
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
Porg[ithBH][0] = Porg1[ithBH][0];
Porg[ithBH][1] = Porg1[ithBH][1];
Porg[ithBH][2] = Porg1[ithBH][2];
}
}
}
}
#if (RPS == 0)
// mesh refinement boundary part
RestrictProlong(lev, YN, BB);
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->CS_Inter(SynchList_cor, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
#endif
// note the data structure before update
// SynchList_cor 1 -----------
//
// StateList 0 -----------
//
// OldStateList old -----------
// update
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(StateList, SynchList_cor, myrank);
cg->swapList(OldStateList, SynchList_cor, myrank);
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#ifdef WithShell
if (lev == 0)
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(StateList, SynchList_cor, myrank);
cg->swapList(OldStateList, SynchList_cor, myrank);
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
#if 0
// check StateList
{
SH->Dump_Data(StateList,0,PhysTime,dT_lev);
if(myrank == 0)
{
cout<<"check StateList"<<endl;
MPI_Abort(MPI_COMM_WORLD,1);
}
}
#endif
}
#endif
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
Porg0[ithBH][0] = Porg1[ithBH][0];
Porg0[ithBH][1] = Porg1[ithBH][1];
Porg0[ithBH][2] = Porg1[ithBH][2];
}
}
}
#endif
//================================================================================================
//================================================================================================
// This member function implements single-step time evolution for each AMR level
// Variant for the case PSTR == 0
//================================================================================================
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
void bssn_class::Step(int lev, int YN)
{
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step");
setpbh(BH_num, Porg0, Mass, BH_num_input);
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
// new code 2013-2-15, zjcao
#if (MAPBH == 1)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
for (int ith = 0; ith < 3; ith++)
Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev;
if (Symmetry > 0)
Porg1[ithBH][2] = fabs(Porg1[ithBH][2]);
if (Symmetry == 2)
{
Porg1[ithBH][0] = fabs(Porg1[ithBH][0]);
Porg1[ithBH][1] = fabs(Porg1[ithBH][1]);
}
if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2]))
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "predictor step finds NaN for BH's position from ("
<< Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2]
<< ")" << endl;
MyList<var> *DG_List = new MyList<var>(Sfx0);
DG_List->insert(Sfx0);
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
#endif
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor");
#ifdef With_AHF
AH_Step_Find(lev, dT_lev);
#endif
bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
double ndeps = numepss;
if (lev < GH->movls)
ndeps = numepsb;
double TRK4 = PhysTime;
int iter_count = 0; // count RK4 substeps
int pre = 0, cor = 1;
int ERROR = 0;
MyList<ss_patch> *sPp;
// Predictor
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here
while (varl0)
{
#if (SommerType == 0)
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
cg->fgfs[varl0->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
#endif
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#if (SommerType == 1)
#warning "shell part still bam type"
if (lev == 0) // Shibata type sommerfeld
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, pre);
#endif
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation");
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req);
}
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
#endif
// Complete non-blocking error reduction and check
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#if (MAPBH == 0)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count);
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count);
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count);
if (Symmetry > 0)
Porg[ithBH][2] = fabs(Porg[ithBH][2]);
if (Symmetry == 2)
{
Porg[ithBH][0] = fabs(Porg[ithBH][0]);
Porg[ithBH][1] = fabs(Porg[ithBH][1]);
}
if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2]))
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "predictor step finds NaN for BH's position from ("
<< Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl;
MyList<var> *DG_List = new MyList<var>(Sfx0);
DG_List->insert(Sfx0);
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
#endif
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector");
// corrector
for (iter_count = 1; iter_count < 4; iter_count++)
{
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"head of Corrector");
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
if (iter_count == 1 || iter_count == 3)
TRK4 += dT_lev / 2;
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#elif (AGM == 1)
if (iter_count == 3)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
cg->fgfs[Lap->sgfn],
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
cg->fgfs[Lap1->sgfn],
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
cg->fgfs[rho->sgfn],
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, cor))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
#if (SommerType == 0)
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varl->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
#endif
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#if (SommerType == 1)
if (lev == 1) // shibata type sommerfeld
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#endif
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check");
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req_cor;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req_cor);
}
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
#endif
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
// Complete non-blocking error reduction and check
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#if (MAPBH == 0)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count);
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count);
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count);
if (Symmetry > 0)
Porg1[ithBH][2] = fabs(Porg1[ithBH][2]);
if (Symmetry == 2)
{
Porg1[ithBH][0] = fabs(Porg1[ithBH][0]);
Porg1[ithBH][1] = fabs(Porg1[ithBH][1]);
}
if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2]))
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from ("
<< Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2]
<< ")" << endl;
MyList<var> *DG_List = new MyList<var>(Sfx0);
DG_List->insert(Sfx0);
DG_List->insert(Sfy0);
DG_List->insert(Sfz0);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev);
DG_List->clearList();
}
}
}
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector of black hole position");
#endif
// swap time level
if (iter_count < 3)
{
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(SynchList_pre, SynchList_cor, myrank);
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after pre cor swap");
#if (MAPBH == 0)
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
Porg[ithBH][0] = Porg1[ithBH][0];
Porg[ithBH][1] = Porg1[ithBH][1];
Porg[ithBH][2] = Porg1[ithBH][2];
}
}
#endif
}
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"tail of corrector");
}
#if (RPS == 0)
// mesh refinement boundary part
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before RestrictProlong");
RestrictProlong(lev, YN, BB);
#endif
// note the data structure before update
// SynchList_cor 1 -----------
//
// StateList 0 -----------
//
// OldStateList old -----------
// update
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(StateList, SynchList_cor, myrank);
cg->swapList(OldStateList, SynchList_cor, myrank);
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
// for black hole position
if (BH_num > 0 && lev == GH->levels - 1)
{
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{
Porg0[ithBH][0] = Porg1[ithBH][0];
Porg0[ithBH][1] = Porg1[ithBH][1];
Porg0[ithBH][2] = Porg1[ithBH][2];
// if(myrank==GH->start_rank[lev])
// cout<<Porg0[ithBH][0]<<","<<Porg0[ithBH][1]<<","<<Porg0[ithBH][2]<<endl;
}
}
// if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<endl;
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"complet GH Step");
}
//================================================================================================
//================================================================================================
// This member function configures a single time-step evolution for the spherical-shell grid portion.
//================================================================================================
#ifdef WithShell
void bssn_class::SHStep()
{
int lev = 0;
// #if (PSTR == 1 || PSTR == 2)
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step");
// #endif
#if USE_CUDA_BSSN
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
setpbh(BH_num, Porg0, Mass, BH_num_input);
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
// #if (PSTR == 1 || PSTR == 2)
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor");
// #endif
#ifdef With_AHF
AH_Step_Find(lev, dT_lev);
#endif
bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
double ndeps = numepss;
if (lev < GH->movls)
ndeps = numepsb;
double TRK4 = PhysTime;
int iter_count = 0; // count RK4 substeps
int pre = 0, cor = 1;
int ERROR = 0;
MyList<ss_patch> *sPp;
// Predictor
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, pre))
{
cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
// sommerfeld indeed for outter boudary while fix BD for inner boundary
f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
cg->fgfs[varl0->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
#if (PSTR == 1 || PSTR == 2)
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor's error check");
#endif
// Non-blocking error reduction overlapped with Synch to hide Allreduce latency
MPI_Request err_req;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
}
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
// Complete non-blocking error reduction and check
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
if (ERROR)
{
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// corrector
for (iter_count = 1; iter_count < 4; iter_count++)
{
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
if (iter_count == 1 || iter_count == 3)
TRK4 += dT_lev / 2;
{
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (AGM == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#elif (AGM == 1)
if (iter_count == 3)
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
cg->fgfs[Lap->sgfn],
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
cg->fgfs[Lap1->sgfn],
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
cg->fgfs[rho->sgfn],
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, cor))
{
cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
// sommerfeld indeed for outter boudary while fix BD for inner boundary
f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varl->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
// Non-blocking error reduction overlapped with Synch to hide Allreduce latency
MPI_Request err_req_cor;
{
int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
}
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
// Complete non-blocking error reduction and check
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
if (ERROR)
{
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(SynchList_pre, SynchList_cor, myrank);
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
#if (RPS == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->CS_Inter(SynchList_cor, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
// note the data structure before update
// SynchList_cor 1 -----------
//
// StateList 0 -----------
//
// OldStateList old -----------
// update
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
while (BP)
{
Block *cg = BP->data;
cg->swapList(StateList, SynchList_cor, myrank);
cg->swapList(OldStateList, SynchList_cor, myrank);
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
#endif
#endif
//================================================================================================
//================================================================================================
// 0: do not use mixing two levels data for OutBD; 1: do use
#define MIXOUTB 0
// In the cached Restrict->OutBdLow2Hi path, coarse Sync is usually redundant:
// OutBdLow2Hi_cached reads coarse owned cells (build_owned_gsl type-4), not coarse ghost/buffer cells.
// Keep a switch to restore the old behavior if needed for debugging.
#ifndef RP_SYNC_COARSE_AFTER_RESTRICT
#define RP_SYNC_COARSE_AFTER_RESTRICT 0
#endif
void bssn_class::RestrictProlong(int lev, int YN, bool BB,
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
// we assume
// StateList 1 -----------
//
// OldStateList 0 -----------
//
// SynchList_cor old -----------
{
const bool rp_runtime_timing = amss_rp_timing_enabled();
const double rp_runtime_start = rp_runtime_timing ? MPI_Wtime() : 0.0;
const bool rp_detail_timing = amss_rp_detail_timing_enabled();
double rp_t_prepare = 0.0;
double rp_t_restrict = 0.0;
double rp_t_coarse_sync = 0.0;
double rp_t_outbd = 0.0;
double rp_t_fine_sync = 0.0;
double rp_t0 = 0.0;
STEP_TIMER_DECL(timer_restrict_prolong);
#if (PSTR == 1 || PSTR == 2)
// stringstream a_stream;
// a_stream.setf(ios::left);
#endif
if (lev > 0)
{
MyList<Patch> *Pp, *Ppc;
if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level
{
if (rp_detail_timing) rp_t0 = MPI_Wtime();
Pp = GH->PatL[lev - 1];
while (Pp)
{
if (BB)
Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL,
SynchList_pre, 0); // use SynchList_pre as temporal storage space
else
Parallel::prepare_inter_time_level(Pp->data, SL, OL,
SynchList_pre, 0); // use SynchList_pre as temporal storage space
#if (PSTR == 1 || PSTR == 2)
// Pp->data->checkPatch(0,GH->start_rank[GH->mylev]);
#endif
Pp = Pp->next;
}
if (rp_detail_timing) rp_t_prepare += MPI_Wtime() - rp_t0;
#if (PSTR == 1 || PSTR == 2)
// Pp=GH->PatL[lev];
// while(Pp)
// {
// Pp->data->checkPatch(0,GH->start_rank[GH->mylev]);
// Pp=Pp->next;
// }
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": 0 before Restrict";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
#if (RPB == 0)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
#endif
if (rp_detail_timing) rp_t_restrict += MPI_Wtime() - rp_t0;
#elif (RPB == 1)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
if (rp_detail_timing) rp_t_restrict += MPI_Wtime() - rp_t0;
#endif
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": 0 after Restrict";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
#if (ABEtype == 1 || ABEtype == 2)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
if (rp_detail_timing) rp_t_coarse_sync += MPI_Wtime() - rp_t0;
#else
#if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
if (rp_detail_timing) rp_t_coarse_sync += MPI_Wtime() - rp_t0;
#endif
#endif
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": 0 after Sync";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
#if (RPB == 0)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif
if (rp_detail_timing) rp_t_outbd += MPI_Wtime() - rp_t0;
#elif (RPB == 1)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
if (rp_detail_timing) rp_t_outbd += MPI_Wtime() - rp_t0;
#endif
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": 0 after OutBdLow2Hi";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
}
else // no time refinement levels and for all same time levels
{
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": 1 before Restrict";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
#if (RPB == 0)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
#endif
if (rp_detail_timing) rp_t_restrict += MPI_Wtime() - rp_t0;
#elif (RPB == 1)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
if (rp_detail_timing) rp_t_restrict += MPI_Wtime() - rp_t0;
#endif
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": 1 before Sync";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
#if (ABEtype == 1 || ABEtype == 2)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
if (rp_detail_timing) rp_t_coarse_sync += MPI_Wtime() - rp_t0;
#else
#if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
if (rp_detail_timing) rp_t_coarse_sync += MPI_Wtime() - rp_t0;
#endif
#endif
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": 1 after Sync";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
#if (RPB == 0)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif
if (rp_detail_timing) rp_t_outbd += MPI_Wtime() - rp_t0;
#elif (RPB == 1)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
if (rp_detail_timing) rp_t_outbd += MPI_Wtime() - rp_t0;
#endif
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": 1 after OutBdLow2Hi";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
}
#if (ABEtype == 1 || ABEtype == 2)
if (rp_detail_timing) rp_t0 = MPI_Wtime();
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
if (rp_detail_timing) rp_t_fine_sync += MPI_Wtime() - rp_t0;
#else
if (rp_detail_timing) rp_t0 = MPI_Wtime();
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
if (rp_detail_timing) rp_t_fine_sync += MPI_Wtime() - rp_t0;
#endif
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
// a_stream.str("");
// a_stream<<GH->mylev<<": after Sync";
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif
}
if (rp_runtime_timing)
{
const double local_sec = MPI_Wtime() - rp_runtime_start;
double max_sec = 0.0;
MPI_Reduce((void *)&local_sec, &max_sec, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (myrank == 0)
fprintf(stderr, "[AMSS-RP-TIMING] lev=%d YN=%d BB=%d sec=%.6f\n",
lev, YN, BB ? 1 : 0, max_sec);
}
if (rp_detail_timing)
{
double local_detail[5] = {rp_t_prepare, rp_t_restrict, rp_t_coarse_sync, rp_t_outbd, rp_t_fine_sync};
double max_detail[5] = {};
MPI_Reduce(local_detail, max_detail, 5, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (myrank == 0)
fprintf(stderr,
"[AMSS-RP-DETAIL] lev=%d YN=%d BB=%d prepare=%.6f restrict=%.6f "
"coarse_sync=%.6f outbd=%.6f fine_sync=%.6f\n",
lev, YN, BB ? 1 : 0, max_detail[0], max_detail[1],
max_detail[2], max_detail[3], max_detail[4]);
}
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
}
//================================================================================================
//================================================================================================
// auxiliary operation, input lev means original lev-1
void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
// we assume
// StateList 1 -----------
//
// OldStateList 0 -----------
//
// SynchList_cor old -----------
{
STEP_TIMER_DECL(timer_restrict_prolong);
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
if (lev >= GH->levels - 1)
return;
lev = lev + 1;
if (lev > 0)
{
MyList<Patch> *Pp, *Ppc;
if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level
{
Pp = GH->PatL[lev - 1];
while (Pp)
{
if (BB)
Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL,
SynchList_pre, 0); // use SynchList_pre as temporal storage space
else
Parallel::prepare_inter_time_level(Pp->data, SL, OL,
SynchList_pre, 0); // use SynchList_pre as temporal storage space
Pp = Pp->next;
}
#if (RPB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
#endif
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
#endif
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
#else
#if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#endif
#if (RPB == 0)
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif
#elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
#endif
}
else // no time refinement levels and for all same time levels
{
#if (RPB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
#endif
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
#endif
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
#else
#if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#endif
#if (RPB == 0)
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif
#elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
#endif
}
#if (ABEtype == 1 || ABEtype == 2)
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
#endif
}
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
}
//================================================================================================
//================================================================================================
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
{
STEP_TIMER_DECL(timer_restrict_prolong);
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
// we assume for fine
// SynchList_cor 1 -----------
//
// StateList 0 -----------
//
// OldStateList old -----------
// for coarse
// StateList 1 -----------
//
// OldStateList 0 -----------
//
// SynchList_cor old -----------
if (lev > 0)
{
MyList<Patch> *Pp, *Ppc;
if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level
{
if (myrank == 0)
cout << "/=: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
Pp = GH->PatL[lev - 1];
while (Pp)
{
if (BB)
Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor,
SynchList_pre, 0); // use SynchList_pre as temporal storage space
else
Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList,
SynchList_pre, 0); // use SynchList_pre as temporal storage space
Pp = Pp->next;
}
#if (RPB == 0)
#if (ABEtype == 1 || ABEtype == 2)
if (amss_cached_rp_restrict_enabled())
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
else
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
#endif
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
#endif
#if (ABEtype == 1 || ABEtype == 2)
if (amss_rp_skip_coarse_sync_enabled())
{
}
else if (amss_cached_rp_coarse_sync_enabled())
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
else
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
#else
#if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#endif
#if (RPB == 0)
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
if (amss_cached_rp_outbd_enabled())
{
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
}
else
{
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif
#elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
#endif
}
else // no time refinement levels and for all same time levels
{
if (myrank == 0)
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
#if (RPB == 0)
#if (ABEtype == 1 || ABEtype == 2)
if (amss_cached_rp_restrict_enabled())
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]);
else
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]);
#endif
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
#endif
#if (ABEtype == 1 || ABEtype == 2)
if (amss_rp_skip_coarse_sync_enabled())
{
}
else if (amss_cached_rp_coarse_sync_enabled())
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
else
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
#else
#if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#endif
#if (RPB == 0)
#if (MIXOUTB == 0)
#if (ABEtype == 1 || ABEtype == 2)
if (amss_cached_rp_outbd_enabled())
{
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
}
else
{
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif
#elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
#endif
}
#if (ABEtype == 1 || ABEtype == 2)
if (amss_cached_rp_fine_sync_enabled())
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
else
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
#endif
}
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
}
//================================================================================================
//================================================================================================
void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
{
if (lev > 0)
{
MyList<Patch> *Pp, *Ppc;
if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level
{
Pp = GH->PatL[lev - 1];
while (Pp)
{
if (BB)
Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor,
SynchList_pre, 0); // use SynchList_pre as temporal storage space
else
Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList,
SynchList_pre, 0); // use SynchList_pre as temporal storage space
Pp = Pp->next;
}
#if (RPB == 0)
#if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif
#elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
#endif
}
else // no time refinement levels and for all same time levels
{
#if (RPB == 0)
#if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif
#elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
#endif
#if 0
#if (RPB == 0)
Parallel::Restrict(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,GH->rsul[lev],Symmetry);
#endif
#else
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
#endif
#if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
#endif
}
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
}
}
#undef MIXOUTB
#undef RP_SYNC_COARSE_AFTER_RESTRICT
//================================================================================================
//================================================================================================
// This member function computes the gravitational-wave quantity Psi4
//================================================================================================
void bssn_class::Compute_Psi4(int lev)
{
MyList<var> *DG_List = new MyList<var>(Rpsi4);
DG_List->insert(Ipsi4);
#if 0 // test showes this operation does not help
for(int ilev = GH->levels-1;ilev>=lev;ilev--)
{
MyList<Patch> *Pp=GH->PatL[ilev];
#else
MyList<Patch> *Pp = GH->PatL[lev];
#endif
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
#if (Psi4type == 0)
if (0) // if Gamma^i_jk and R_ij can be reused from the rhs calculation
f_ricci_gamma(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
Symmetry);
// the input arguments Gamma^i_jk and R_ij do not need synch, because we do not need to derivate them
f_getnp4(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn],
Symmetry);
#elif (Psi4type == 1)
f_getnp4old(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn],
Symmetry);
#else
#error "not recognized Psi4type"
#endif
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#if 0
Parallel::Sync(GH->PatL[ilev],DG_List,Symmetry);
}
// because of double level data change, you can not do this in above loop
// prolong restrict Psi4
for(int ilev=GH->levels-1;ilev>lev;ilev--)
RestrictProlong(ilev,1,false,DG_List,DG_List,DG_List);
#else
Parallel::Sync(GH->PatL[lev], DG_List, Symmetry);
#endif
#ifdef WithShell
// ShellPatch part
if (lev == 0)
{
MyList<ss_patch> *Pp = SH->PatL;
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
int fngfs = Pp->data->fngfs;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
#if (Psi4type == 0)
if (0) // if Gamma^i_jk and R_ij can be reused from the rhs calculation
f_ricci_gamma_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs +
ShellPatch::gx], cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
Symmetry, lev, Pp->data->sst);
f_getnp4_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn],
Symmetry, Pp->data->sst);
#elif (Psi4type == 1)
f_getnp4old_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn],
Symmetry, Pp->data->sst);
#else
#error "not recognized Psi4type"
#endif
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
SH->Synch(DG_List, Symmetry);
#if 0
// interpolate Psi4
SH->CS_Inter(DG_List,Symmetry);
#endif
}
#endif
DG_List->clearList();
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end of Compute_Psi4");
}
//================================================================================================
//================================================================================================
// This member function sets the black holes' initial puncture positions
//================================================================================================
void bssn_class::Setup_Black_Hole_position()
{
char filename[50];
{
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
if (iter != parameters::str_par.end())
{
strcpy(filename, (iter->second).c_str());
}
else
{
cout << "Error inputpar" << endl;
exit(0);
}
}
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && skey == "BH_num")
{
BH_num_input = BH_num = atoi(sval.c_str());
break;
}
}
inf.close();
}
// set up the data for black holes
// these arrays will be deleted when bssn_class is deleted
Pmom = new double[3 * BH_num];
Spin = new double[3 * BH_num];
Mass = new double[BH_num];
Porg0 = new double *[BH_num];
Porgbr = new double *[BH_num];
Porg = new double *[BH_num];
Porg1 = new double *[BH_num];
Porg_rhs = new double *[BH_num];
for (int i = 0; i < BH_num; i++)
{
Porg0[i] = new double[3];
Porgbr[i] = new double[3];
Porg[i] = new double[3];
Porg1[i] = new double[3];
Porg_rhs[i] = new double[3];
}
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
if (status == -1)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "BSSN" && sind < BH_num)
{
if (skey == "Mass")
Mass[sind] = atof(sval.c_str());
else if (skey == "Porgx")
Porg0[sind][0] = atof(sval.c_str());
else if (skey == "Porgy")
Porg0[sind][1] = atof(sval.c_str());
else if (skey == "Porgz")
Porg0[sind][2] = atof(sval.c_str());
else if (skey == "Spinx")
Spin[sind * 3] = atof(sval.c_str());
else if (skey == "Spiny")
Spin[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Spinz")
Spin[sind * 3 + 2] = atof(sval.c_str());
else if (skey == "Pmomx")
Pmom[sind * 3] = atof(sval.c_str());
else if (skey == "Pmomy")
Pmom[sind * 3 + 1] = atof(sval.c_str());
else if (skey == "Pmomz")
Pmom[sind * 3 + 2] = atof(sval.c_str());
}
}
inf.close();
}
// echo information of Black holes
if (myrank == 0)
{
cout << endl;
cout << " initial information of " << BH_num << " Black Hole(s) " << endl;
cout << setw(12) << "Mass"
<< setw(12) << "x"
<< setw(12) << "y"
<< setw(12) << "z"
<< setw(16) << "Px"
<< setw(16) << "Py"
<< setw(12) << "Pz"
<< setw(12) << "Sx"
<< setw(12) << "Sy"
<< setw(12) << "Sz" << endl;
for (int i = 0; i < BH_num; i++)
{
cout << setw(12) << Mass[i]
<< setw(12) << Porg0[i][0]
<< setw(12) << Porg0[i][1]
<< setw(12) << Porg0[i][2]
<< setw(16) << Pmom[i * 3]
<< setw(16) << Pmom[i * 3 + 1]
<< setw(12) << Pmom[i * 3 + 2]
<< setw(12) << Spin[i * 3]
<< setw(12) << Spin[i * 3 + 1]
<< setw(12) << Spin[i * 3 + 2] << endl;
}
}
int maxl = 1;
int levels;
int *grids;
double bbox[6];
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind1, sind2, sind3;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
cout << "bssn_class::Setup_Black_Hole_position: Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind1);
if (status == -1)
{
cout << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "cgh" && skey == "levels")
{
levels = atoi(sval.c_str());
break;
}
}
inf.close();
}
grids = new int[levels];
// read parameter from file
{
const int LEN = 256;
char pline[LEN];
string str, sgrp, skey, sval;
int sind1, sind2, sind3;
ifstream inf(filename, ifstream::in);
if (!inf.good() && myrank == 0)
{
cout << "bssn_class::Setup_Black_Hole_position: Can not open parameter file " << filename
<< " for inputing information of black holes" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (int i = 1; inf.good(); i++)
{
inf.getline(pline, LEN);
str = pline;
int status = misc::parse_parts(str, sgrp, skey, sval, sind1, sind2, sind3);
if (status == -1)
{
cout << "error reading parameter file " << filename << " in line " << i << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (status == 0)
continue;
if (sgrp == "cgh" && skey == "grids" && sind1 < levels)
grids[sind1] = atoi(sval.c_str());
if (sgrp == "cgh" && skey == "bbox" && sind1 == 0 && sind2 == 0)
bbox[sind3] = atof(sval.c_str());
}
inf.close();
}
for (int i = 0; i < levels; i++)
if (maxl < grids[i])
maxl = grids[i];
delete[] grids;
if (BH_num > maxl)
{
int BH_numc = BH_num;
for (int i = 0; i < BH_num; i++)
if (Porg0[i][0] < bbox[0] || Porg0[i][0] > bbox[3] ||
Porg0[i][1] < bbox[1] || Porg0[i][1] > bbox[4] ||
Porg0[i][2] < bbox[2] || Porg0[i][2] > bbox[5])
{
delete[] Porg0[i];
Porg0[i] = 0;
BH_numc--;
}
if (BH_num > BH_numc)
{
maxl = BH_numc;
int bhi;
double *tmp;
tmp = Pmom;
Pmom = new double[3 * maxl];
bhi = 0;
for (int i = 0; i < BH_num; i++)
if (Porg0[i])
{
for (int j = 0; j < 3; j++)
Pmom[3 * bhi + j] = tmp[3 * i + j];
bhi++;
}
delete[] tmp;
tmp = Spin;
Spin = new double[3 * maxl];
bhi = 0;
for (int i = 0; i < BH_num; i++)
if (Porg0[i])
{
for (int j = 0; j < 3; j++)
Spin[3 * bhi + j] = tmp[3 * i + j];
bhi++;
}
delete[] tmp;
tmp = Mass;
Mass = new double[3 * maxl];
bhi = 0;
for (int i = 0; i < BH_num; i++)
if (Porg0[i])
{
Mass[bhi] = tmp[i];
bhi++;
}
delete[] tmp;
double **ttmp;
ttmp = Porg0;
Porg0 = new double *[maxl];
bhi = 0;
for (int i = 0; i < BH_num; i++)
if (ttmp[i])
{
Porg0[bhi] = ttmp[i];
bhi++;
}
delete[] ttmp;
for (int i = 0; i < BH_num; i++)
{
delete[] Porgbr[i];
delete[] Porg[i];
delete[] Porg1[i];
delete[] Porg_rhs[i];
}
delete[] Porgbr;
delete[] Porg;
delete[] Porg1;
delete[] Porg_rhs;
BH_num = maxl;
Porgbr = new double *[BH_num];
Porg = new double *[BH_num];
Porg1 = new double *[BH_num];
Porg_rhs = new double *[BH_num];
for (int i = 0; i < BH_num; i++)
{
Porgbr[i] = new double[3];
Porg[i] = new double[3];
Porg1[i] = new double[3];
Porg_rhs[i] = new double[3];
}
}
}
for (int i = 0; i < BH_num; i++)
{
for (int j = 0; j < dim; j++)
Porgbr[i][j] = Porg0[i][j];
}
setpbh(BH_num, Porg0, Mass, BH_num_input);
}
//================================================================================================
//================================================================================================
// This member function computes black hole positions
//================================================================================================
#if 0
// old code
void bssn_class::compute_Porg_rhs(double **BH_PS,double **BH_RHS,var *forx,var *fory,var *forz,int lev)
{
const int InList = 3;
MyList<var> * DG_List=new MyList<var>(forx);
DG_List->insert(fory); DG_List->insert(forz);
int n;
double *x1,*y1,*z1;
double *shellf;
shellf=new double[3*BH_num];
double *pox[3];
for(int i=0;i<3;i++) pox[i] = new double[BH_num];
for( n = 0; n < BH_num; n++)
{
pox[0][n] = BH_PS[n][0];
pox[1][n] = BH_PS[n][1];
pox[2][n] = BH_PS[n][2];
}
if(!Parallel::PatList_Interp_Points(GH->PatL[lev],DG_List,BH_num,pox,shellf,Symmetry))
{
ErrorMonitor->outfile<<"fail to find black holes at t = "<<PhysTime<<endl;
for( n = 0; n < BH_num; n++)
ErrorMonitor->outfile<<"(x,y,z) = ("<<pox[0][n]<<","<<pox[1][n]<<","<<pox[2][n]<<")"<<endl;
}
for( n = 0; n < BH_num; n++)
{
BH_RHS[n][0]=-shellf[3*n ];
BH_RHS[n][1]=-shellf[3*n+1];
BH_RHS[n][2]=-shellf[3*n+2];
}
DG_List->clearList();
delete[] shellf;
for(int i=0;i<3;i++) delete[] pox[i];
}
#else
// new code considering diferent levels for different black hole
void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev)
{
const int InList = 3;
MyList<var> *DG_List = new MyList<var>(forx);
DG_List->insert(fory);
DG_List->insert(forz);
double *x1, *y1, *z1;
double *shellf;
shellf = new double[3];
double *pox[3];
for (int i = 0; i < 3; i++)
pox[i] = new double[1];
for (int n = 0; n < BH_num; n++)
{
pox[0][0] = BH_PS[n][0];
pox[1][0] = BH_PS[n][1];
pox[2][0] = BH_PS[n][2];
int lev = ilev;
#if USE_CUDA_BSSN
if (bssn_cuda_bh_interp_resident_enabled() &&
bssn_cuda_use_resident_sync(lev) &&
bssn_cuda_interp_bh_point_resident(GH->PatL[lev], myrank, BH_PS[n], forx, fory, forz, Symmetry, shellf))
{
BH_RHS[n][0] = -shellf[0];
BH_RHS[n][1] = -shellf[1];
BH_RHS[n][2] = -shellf[2];
continue;
}
#endif
#if (PSTR == 0)
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry))
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry, GH->Commlev[lev]))
#endif
{
lev--;
if (lev < 0)
{
ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl;
for (n = 0; n < BH_num; n++)
ErrorMonitor->outfile << "(x,y,z) = ("
<< pox[0][n] << "," << pox[1][n] << "," << pox[2][n]
<< ")" << endl;
break;
}
}
if (lev >= 0)
{
BH_RHS[n][0] = -shellf[0];
BH_RHS[n][1] = -shellf[1];
BH_RHS[n][2] = -shellf[2];
}
}
DG_List->clearList();
delete[] shellf;
for (int i = 0; i < 3; i++)
delete[] pox[i];
}
#endif
//================================================================================================
//================================================================================================
// This member function computes gravitational-wave related quantities and performs analysis
//================================================================================================
void bssn_class::AnalysisStuff(int lev, double dT_lev)
{
LastAnas += dT_lev;
if (LastAnas >= AnasTime)
{
const bool analysis_timing = amss_analysis_timing_enabled();
const double t_analysis_start = analysis_timing ? MPI_Wtime() : 0.0;
double t_psi4_sec = 0.0;
double t_surface_sec = 0.0;
#ifdef Point_Psi4
#error "not support parallel levels yet"
// Gam_ijk and R_ij have been calculated in Interp_Constraint()
double SYM = 1, ANT = -1;
for (int levh = lev; levh < GH->levels; levh++)
{
MyList<Patch> *Pp = GH->PatL[levh];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_fderivs(cg->shape, cg->fgfs[phi0->sgfn],
cg->fgfs[phix->sgfn], cg->fgfs[phiy->sgfn], cg->fgfs[phiz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, levh);
f_fderivs(cg->shape, cg->fgfs[trK0->sgfn],
cg->fgfs[trKx->sgfn], cg->fgfs[trKy->sgfn], cg->fgfs[trKz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, levh);
f_fderivs(cg->shape, cg->fgfs[Axx0->sgfn],
cg->fgfs[Axxx->sgfn], cg->fgfs[Axxy->sgfn], cg->fgfs[Axxz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, levh);
f_fderivs(cg->shape, cg->fgfs[Axy0->sgfn],
cg->fgfs[Axyx->sgfn], cg->fgfs[Axyy->sgfn], cg->fgfs[Axyz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
ANT, ANT, SYM, Symmetry, levh);
f_fderivs(cg->shape, cg->fgfs[Axz0->sgfn],
cg->fgfs[Axzx->sgfn], cg->fgfs[Axzy->sgfn], cg->fgfs[Axzz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
ANT, SYM, ANT, Symmetry, levh);
f_fderivs(cg->shape, cg->fgfs[Ayy0->sgfn],
cg->fgfs[Ayyx->sgfn], cg->fgfs[Ayyy->sgfn], cg->fgfs[Ayyz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, levh);
f_fderivs(cg->shape, cg->fgfs[Ayz0->sgfn],
cg->fgfs[Ayzx->sgfn], cg->fgfs[Ayzy->sgfn], cg->fgfs[Ayzz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, ANT, ANT, Symmetry, levh);
f_fderivs(cg->shape, cg->fgfs[Azz0->sgfn],
cg->fgfs[Azzx->sgfn], cg->fgfs[Azzy->sgfn], cg->fgfs[Azzz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, levh);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#ifdef WithShell
// ShellPatch part
if (lev == 0)
{
MyList<ss_patch> *Pp = SH->PatL;
while (Pp)
{
MyList<Block> *BL = Pp->data->blb;
int fngfs = Pp->data->fngfs;
while (BL)
{
Block *cg = BL->data;
if (myrank == cg->rank)
{
f_fderivs_shc(cg->shape, cg->fgfs[phi0->sgfn],
cg->fgfs[phix->sgfn], cg->fgfs[phiy->sgfn], cg->fgfs[phiz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
phi0->SoA[0], phi0->SoA[1], phi0->SoA[2],
Symmetry, levh, Pp->data->sst,
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz]);
f_fderivs_shc(cg->shape, cg->fgfs[trK0->sgfn],
cg->fgfs[trKx->sgfn], cg->fgfs[trKy->sgfn], cg->fgfs[trKz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
trK0->SoA[0], trK0->SoA[1], trK0->SoA[2],
Symmetry, levh, Pp->data->sst,
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz]);
f_fderivs_shc(cg->shape, cg->fgfs[Axx0->sgfn],
cg->fgfs[Axxx->sgfn], cg->fgfs[Axxy->sgfn], cg->fgfs[Axxz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
Axx0->SoA[0], Axx0->SoA[1], Axx0->SoA[2],
Symmetry, levh, Pp->data->sst,
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz]);
f_fderivs_shc(cg->shape, cg->fgfs[Axy0->sgfn],
cg->fgfs[Axyx->sgfn], cg->fgfs[Axyy->sgfn], cg->fgfs[Axyz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
Axy0->SoA[0], Axy0->SoA[1], Axy0->SoA[2],
Symmetry, levh, Pp->data->sst,
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz]);
f_fderivs_shc(cg->shape, cg->fgfs[Axz0->sgfn],
cg->fgfs[Axzx->sgfn], cg->fgfs[Axzy->sgfn], cg->fgfs[Axzz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
Axz0->SoA[0], Axz0->SoA[1], Axz0->SoA[2],
Symmetry, levh, Pp->data->sst,
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz]);
f_fderivs_shc(cg->shape, cg->fgfs[Ayy0->sgfn],
cg->fgfs[Ayyx->sgfn], cg->fgfs[Ayyy->sgfn], cg->fgfs[Ayyz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
Ayy0->SoA[0], Ayy0->SoA[1], Ayy0->SoA[2],
Symmetry, levh, Pp->data->sst,
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz]);
f_fderivs_shc(cg->shape, cg->fgfs[Ayz0->sgfn],
cg->fgfs[Ayzx->sgfn], cg->fgfs[Ayzy->sgfn], cg->fgfs[Ayzz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
Ayz0->SoA[0], Ayz0->SoA[1], Ayz0->SoA[2],
Symmetry, levh, Pp->data->sst,
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz]);
f_fderivs_shc(cg->shape, cg->fgfs[Azz0->sgfn],
cg->fgfs[Azzx->sgfn], cg->fgfs[Azzy->sgfn], cg->fgfs[Azzz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
Azz0->SoA[0], Azz0->SoA[1], Azz0->SoA[2],
Symmetry, levh, Pp->data->sst,
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz]);
}
if (BL == Pp->data->ble)
break;
BL = BL->next;
}
Pp = Pp->next;
}
}
#endif
}
#else
{
const double t0 = analysis_timing ? MPI_Wtime() : 0.0;
Compute_Psi4(lev);
if (analysis_timing)
t_psi4_sec += MPI_Wtime() - t0;
}
#endif
double *RP, *IP, *RoutMAP;
int NN = 0;
for (int pl = 2; pl < maxl + 1; pl++)
for (int pm = -pl; pm < pl + 1; pm++)
NN++;
RP = new double[NN];
IP = new double[NN];
RoutMAP = new double[7];
double Rex = maxrex;
bool patch_mass_prepared = false;
#ifdef WithShell
bool shell_mass_prepared = false;
#endif
const int analysis_map_every = amss_analysis_map_every();
static long long analysis_map_counter = 0;
static vector<double> cached_RoutMAP;
const bool map_cache_valid = int(cached_RoutMAP.size()) == decn * 7;
const bool refresh_map_this_analysis =
analysis_map_every <= 1 || !map_cache_valid ||
(analysis_map_counter % analysis_map_every) == 0;
if (!map_cache_valid)
cached_RoutMAP.assign(decn * 7, 0.0);
for (int i = 0; i < decn; i++)
{
const double t_surface0 = analysis_timing ? MPI_Wtime() : 0.0;
#ifdef Point_Psi4
Waveshell->surf_Wave(Rex, GH, SH,
phi, trK,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
phix, phiy, phiz,
trKx, trKy, trKz,
Axxx, Axxy, Axxz,
Axyx, Axyy, Axyz,
Axzx, Axzy, Axzz,
Ayyx, Ayyy, Ayyz,
Ayzx, Ayzy, Ayzz,
Azzx, Azzy, Azzz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
2, maxl, NN, RP, IP, ErrorMonitor);
#ifdef WithShell
if (lev > 0 || Rex < GH->bbox[0][0][3])
{
Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
RoutMAP, ErrorMonitor, !patch_mass_prepared);
patch_mass_prepared = true;
}
else
{
Waveshell->surf_MassPAng(Rex, lev, SH, phi0, trK0,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
RoutMAP, ErrorMonitor, !shell_mass_prepared);
shell_mass_prepared = true;
}
#else
Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
RoutMAP, ErrorMonitor, !patch_mass_prepared);
patch_mass_prepared = true;
#endif
#else
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before surface integral");
#ifdef WithShell
if (lev > 0 || Rex < GH->bbox[0][0][3])
{
Waveshell->surf_WaveMassPAng(Rex, lev, GH,
Rpsi4, Ipsi4, 2, maxl, NN, RP, IP,
phi0, trK0,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1,
RoutMAP, ErrorMonitor, !patch_mass_prepared);
patch_mass_prepared = true;
}
else
{
Waveshell->surf_WaveMassPAng(Rex, lev, SH,
Rpsi4, Ipsi4, 2, maxl, NN, RP, IP,
phi0, trK0,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1,
RoutMAP, ErrorMonitor, !shell_mass_prepared);
shell_mass_prepared = true;
}
#else
#if (PSTR == 0)
if (analysis_map_every <= 1)
{
Waveshell->surf_WaveMassPAng(Rex, lev, GH,
Rpsi4, Ipsi4, 2, maxl, NN, RP, IP,
phi0, trK0,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1,
RoutMAP, ErrorMonitor, !patch_mass_prepared);
patch_mass_prepared = true;
}
else
{
Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor);
if (refresh_map_this_analysis)
{
Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1,
RoutMAP, ErrorMonitor, !patch_mass_prepared);
patch_mass_prepared = true;
for (int q = 0; q < 7; q++)
cached_RoutMAP[i * 7 + q] = RoutMAP[q];
}
else
{
for (int q = 0; q < 7; q++)
RoutMAP[q] = cached_RoutMAP[i * 7 + q];
}
}
#elif (PSTR == 1 || PSTR == 2)
Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor, GH->Commlev[lev]);
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after surf_Wave");
Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0,
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
RoutMAP, ErrorMonitor, GH->Commlev[lev], !patch_mass_prepared);
patch_mass_prepared = true;
#endif
#endif
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end surface integral");
if (analysis_timing)
t_surface_sec += MPI_Wtime() - t_surface0;
#endif
if (i == 0)
{
ADMMass = RoutMAP[0];
}
#if (PSTR == 1 || PSTR == 2)
if (GH->start_rank[a_lev] > 0)
{
MPI_Status status;
// receive
if (myrank == 0)
{
MPI_Recv(RP, NN, MPI_DOUBLE, GH->start_rank[a_lev], 1, MPI_COMM_WORLD, &status);
MPI_Recv(IP, NN, MPI_DOUBLE, GH->start_rank[a_lev], 2, MPI_COMM_WORLD, &status);
MPI_Recv(RoutMAP, 7, MPI_DOUBLE, GH->start_rank[a_lev], 3, MPI_COMM_WORLD, &status);
}
// send
if (myrank == GH->start_rank[a_lev])
{
MPI_Send(RP, NN, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
MPI_Send(IP, NN, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
MPI_Send(RoutMAP, 7, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD);
}
}
#endif
Psi4Monitor->writefile(PhysTime, NN, RP, IP);
MAPMonitor->writefile(PhysTime, 7, RoutMAP);
Rex = Rex - drex;
}
if (analysis_map_every > 1)
analysis_map_counter++;
delete[] RP;
delete[] IP;
delete[] RoutMAP;
if (analysis_timing)
{
fprintf(stderr,
"[AMSS-ANALYSIS][rank %d] lev=%d psi4=%.6f surface=%.6f total_before_bh=%.6f detectors=%d modes=%d\n",
myrank, lev, t_psi4_sec, t_surface_sec,
MPI_Wtime() - t_analysis_start, decn, NN);
}
// black hole's position
{
double *pox;
pox = new double[dim * BH_num];
for (int bhi = 0; bhi < BH_num; bhi++)
for (int i = 0; i < dim; i++)
pox[dim * bhi + i] = Porg0[bhi][i];
BHMonitor->writefile(PhysTime, dim * BH_num, pox);
delete[] pox;
}
LastAnas = 0;
}
}
//================================================================================================
//================================================================================================
#if USE_CUDA_BSSN && defined(WithShell)
#undef f_compute_rhs_bssn_ss
// Restore the original Fortran name mapping from bssn_rhs.h (fortran3 convention)
#define f_compute_rhs_bssn_ss compute_rhs_bssn_ss_
#endif
// This member function computes and outputs constraint violations
//================================================================================================
void bssn_class::Constraint_Out()
{
LastConsOut += dT * pow(0.5, Mymax(0, trfls));
if (LastConsOut >= AnasTime)
// Constraint violation
{
// recompute least the constraint data lost for moved new grid
for (int lev = 0; lev < GH->levels; lev++)
{
// make sure the data consistent for higher levels
if ((lev == 0 && bssn_constraint_recompute_from_state(lev, cuda_level0_constraint_cache_valid)) ||
(lev > 0 && ConstraintRefreshLevels && ConstraintRefreshLevels[lev]))
{
double TRK4 = PhysTime;
double ndeps = numepsb;
int pre = 0;
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry);
}
#ifdef WithShell
if (0) // if the constrait quantities can be reused from the step rhs calculation
{
MyList<ss_patch> *sPp;
sPp = SH->PatL;
while (sPp)
{
double TRK4 = PhysTime;
int pre = 0;
int lev = 0;
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, pre);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
SH->Synch(ConstraintList, Symmetry);
#endif
double ConV[7];
#if (PSTR == 1 || PSTR == 2)
double ConV_h[7];
#endif
var *ConstraintVars[7] = {Cons_Ham, Cons_Px, Cons_Py, Cons_Pz, Cons_Gx, Cons_Gy, Cons_Gz};
#ifdef WithShell
SH->L2Norm7(ConstraintVars, ConV);
ConVMonitor->writefile(PhysTime, 7, ConV);
#endif
for (int levi = 0; levi < GH->levels; levi++)
{
#if (PSTR == 0)
Parallel::L2Norm7(GH->PatL[levi]->data, ConstraintVars, ConV);
#elif (PSTR == 1 || PSTR == 2)
Parallel::L2Norm7(GH->PatL[levi]->data, ConstraintVars, ConV, GH->Commlev[levi]);
// misc::tillherecheck("before collect data to cpu0");
// MPI_ALLREDUCE( sendbuf, recvbuf, count, datatype, op, comm), sendbu and recvbuf must be different
if (levi > 0)
{
if (GH->mylev == levi && myrank == GH->start_rank[levi])
for (int i = 0; i < 7; i++)
ConV_h[i] = ConV[i];
else
for (int i = 0; i < 7; i++)
ConV_h[i] = 0;
MPI_Allreduce(ConV_h, ConV, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
}
#endif
ConVMonitor->writefile(PhysTime, 7, ConV);
/*
if(fabs(ConV[0])<0.00001)
{
MyList<var> * DG_List=new MyList<var>(Cons_Ham);
DG_List->insert(Cons_Px); DG_List->insert(Cons_Py); DG_List->insert(Cons_Px);
DG_List->insert(Cons_Gx); DG_List->insert(Cons_Gy); DG_List->insert(Cons_Gx);
Parallel::Dump_Data(GH->PatL[levi],DG_List,"jiu",0,1);
DG_List->clearList();
if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1);
}
*/
}
Interp_Constraint(false);
LastConsOut = 0;
if (ConstraintRefreshLevels)
for (int lev = 0; lev < GH->levels; lev++)
ConstraintRefreshLevels[lev] = 0;
}
}
//================================================================================================
//================================================================================================
// This member function computes derivatives required for apparent-horizon calculations
//================================================================================================
#ifdef With_AHF
void bssn_class::AH_Prepare_derivatives()
{
double SYM = 1.0, ANT = -1.0;
int ZEO = 0;
for (int lev = 0; lev < GH->levels; lev++)
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_fderivs(cg->shape, cg->fgfs[phi0->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, ZEO);
f_fderivs(cg->shape, cg->fgfs[gxx0->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamzxx->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, ZEO);
f_fderivs(cg->shape, cg->fgfs[gxy0->sgfn],
cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamzxy->sgfn],
cg->X[0], cg->X[1], cg->X[2],
ANT, ANT, SYM, Symmetry, ZEO);
f_fderivs(cg->shape, cg->fgfs[gxz0->sgfn],
cg->fgfs[Gamxxz->sgfn], cg->fgfs[Gamyxz->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
ANT, SYM, ANT, Symmetry, ZEO);
f_fderivs(cg->shape, cg->fgfs[gyy0->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamzyy->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, ZEO);
f_fderivs(cg->shape, cg->fgfs[gyz0->sgfn],
cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamzyz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, ANT, ANT, Symmetry, ZEO);
f_fderivs(cg->shape, cg->fgfs[gzz0->sgfn],
cg->fgfs[Gamxzz->sgfn], cg->fgfs[Gamyzz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->X[0], cg->X[1], cg->X[2],
SYM, SYM, SYM, Symmetry, ZEO);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
Parallel::Sync(GH->PatL[lev], AHDList, Symmetry);
}
}
//================================================================================================
//================================================================================================
// This member function interpolates apparent-horizon data
//================================================================================================
bool bssn_class::AH_Interp_Points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetryi)
{
MyList<var> *varl;
int num_var = 0;
varl = VarList;
while (varl)
{
num_var++;
varl = varl->next;
}
double pox[3];
for (int i = 0; i < NN; i++)
{
for (int j = 0; j < 3; j++)
pox[j] = XX[j][i];
int lev = GH->levels - 1;
bool notfound = true;
while (notfound)
{
if (lev < 0)
{
#ifdef WithShell
if (SH->Interp_One_Point(VarList, pox, Shellf + i * num_var, Symmetryi))
{
return true;
}
if (myrank == 0)
{
cout << " bssn_class::AH_Interp_Points: point ("
<< pox[0] << "," << pox[1] << "," << pox[2]
<< ") is out of cgh and shell domain!" << endl;
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << " bssn_class::AH_Interp_Points: point ("
<< pox[0] << "," << pox[1] << "," << pox[2]
<< ") is out of cgh and shell domain!" << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
#else
if (myrank == 0)
{
cout << " bssn_class::AH_Interp_Points: point ("
<< pox[0] << "," << pox[1] << "," << pox[2]
<< ") is out of cgh domain!" << endl;
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << " bssn_class::AH_Interp_Points: point ("
<< pox[0] << "," << pox[1] << "," << pox[2]
<< ") is out of cgh domain!" << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
#endif
return false;
}
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
if (Pp->data->Interp_ONE_Point(VarList, pox, Shellf + i * num_var, Symmetryi))
{
notfound = false;
break;
}
Pp = Pp->next;
}
lev--;
}
}
return true;
}
//================================================================================================
//================================================================================================
// This member function computes apparent horizons
//================================================================================================
void bssn_class::AH_Step_Find(int lev, double dT_lev)
{
if ((lev == GH->levels - 1))
{
int ncount = int(PhysTime / dT_lev);
bool tf = false;
for (int ihn = 0; ihn < HN_num; ihn++)
{
if (ncount % findeveryl[ihn] == 0)
{
tf = true;
break;
}
}
if (tf)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
prev_clock = clock();
const int cdumpid = int(PhysTime / AHdumptime) + 1;
for (int ihn = 0; ihn < HN_num; ihn++)
dumpid[ihn] = cdumpid;
double gam;
for (int ihn = 0; ihn < BH_num; ihn++)
{
xc[ihn] = Porg0[ihn][0];
yc[ihn] = Porg0[ihn][1];
zc[ihn] = Porg0[ihn][2];
gam = fabs(Pmom[ihn * 3]) / (Mass[ihn]);
gam = sqrt(1 - gam * gam);
xr[ihn] = Mass[ihn] * gam;
gam = fabs(Pmom[ihn * 3 + 1]) / (Mass[ihn]);
gam = sqrt(1 - gam * gam);
yr[ihn] = Mass[ihn] * gam;
gam = fabs(Pmom[ihn * 3 + 2]) / (Mass[ihn]);
gam = sqrt(1 - gam * gam);
zr[ihn] = Mass[ihn] * gam;
dTT[ihn] = -1;
if (ncount % findeveryl[ihn] == 0)
{
trigger[ihn] = true;
dTT[ihn] = findeveryl[ihn] * dT_lev;
}
else
trigger[ihn] = false;
if (trigger[ihn] && (dumpid[ihn] > lastahdumpid[ihn]))
lastahdumpid[ihn] = dumpid[ihn];
else
dumpid[ihn] = 0;
}
int ihn = BH_num;
for (int ia = 0; ia < BH_num; ia++)
for (int ib = ia + 1; ib < BH_num; ib++)
{
xc[ihn] = (Porg0[ia][0] + Porg0[ib][0]) / 2;
yc[ihn] = (Porg0[ia][1] + Porg0[ib][1]) / 2;
zc[ihn] = (Porg0[ia][2] + Porg0[ib][2]) / 2;
xr[ihn] = yr[ihn] = zr[ihn] = Mass[ia] + Mass[ib];
dTT[ihn] = -1;
if (fabs(Porg0[ia][0] - Porg0[ib][0]) < 2 * xr[ihn] &&
fabs(Porg0[ia][1] - Porg0[ib][1]) < 2 * xr[ihn] &&
fabs(Porg0[ia][2] - Porg0[ib][2]) < 2 * xr[ihn] &&
(ncount % findeveryl[ihn] == 0))
{
trigger[ihn] = true;
dTT[ihn] = findeveryl[ihn] * dT_lev;
}
else
trigger[ihn] = false;
if (trigger[ihn] && (dumpid[ihn] > lastahdumpid[ihn]))
lastahdumpid[ihn] = dumpid[ihn];
else
dumpid[ihn] = 0;
ihn++;
}
#if (ABEtype == 1 || ABEtype == 2)
if (PhysTime > 10)
{
ihn--;
trigger[ihn] = true;
xr[ihn] = yr[ihn] = zr[ihn] = 50;
// if(myrank==0) for(ihn=0;ihn<HN_num;ihn++) cout<<"trigger#"<<ihn<<": "<<trigger[ihn]<<endl;
}
#endif
#if USE_CUDA_BSSN
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
AHFinderDirect::AHFinderDirect_find_horizons(HN_num, dumpid,
xc, yc, zc, xr, yr, zr, trigger, dTT);
// note rhs and Gamijk have been used as temp storage space
if (myrank == 0)
{
curr_clock = clock();
cout << " Finding horizon used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
}
}
#endif
//================================================================================================
//================================================================================================
// This member function interpolates constraint data
//================================================================================================
void bssn_class::Interp_Constraint(bool infg)
{
if (infg)
{
// we do not support a_lev != 0 yet.
if (a_lev > 0)
return;
// recompute least the constraint data lost for moved new grid
for (int lev = 0; lev < GH->levels; lev++)
{
// make sure the data consistent for higher levels
if (bssn_constraint_recompute_from_state(lev, cuda_level0_constraint_cache_valid))
{
double TRK4 = PhysTime;
double ndeps = numepsb;
int pre = 0;
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry);
}
#ifdef WithShell
if (0) // if the constrait quantities can be reused from the step rhs calculation
{
MyList<ss_patch> *sPp;
sPp = SH->PatL;
while (sPp)
{
double TRK4 = PhysTime;
int pre = 0;
int lev = 0;
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, pre);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
SH->Synch(ConstraintList, Symmetry);
#endif
}
// interpolate
double *x1, *y1, *z1;
const int n = 1000;
double lmax, lmin, dd;
lmin = 0;
#ifdef WithShell
lmax = SH->Rrange[1];
#else
lmax = GH->bbox[0][0][4];
#endif
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
dd = (lmax - lmin) / (n - 1);
#else
#ifdef Cell
dd = (lmax - lmin) / n;
#else
#error Not define Vertex nor Cell
#endif
#endif
x1 = new double[n];
y1 = new double[n];
z1 = new double[n];
for (int i = 0; i < n; i++)
{
x1[i] = 0;
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
y1[i] = lmin + i * dd;
#else
#ifdef Cell
y1[i] = lmin + (i + 0.5) * dd;
#else
#error Not define Vertex nor Cell
#endif
#endif
z1[i] = 0;
}
int InList = 0;
MyList<var> *varl = ConstraintList;
while (varl)
{
InList++;
varl = varl->next;
}
double *shellf;
shellf = new double[n * InList];
for (int i = 0; i < n; i++)
{
double XX[3];
XX[0] = x1[i];
XX[1] = y1[i];
XX[2] = z1[i];
bool fg = GH->Interp_One_Point(ConstraintList, XX, shellf + i * InList, Symmetry);
#ifdef WithShell
if (!fg)
fg = SH->Interp_One_Point(ConstraintList, XX, shellf + i * InList, Symmetry);
#endif
if (!fg && myrank == 0)
{
cout << "bssn_class::Interp_Constraint meets wrong" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
if (myrank == 0)
{
ofstream outfile;
char suffix[64];
sprintf(suffix, "/interp_constraint_%05d.dat", int(PhysTime / dT + 0.5));
string filename = ErrorMonitor->out_dir + suffix;
// 0.5 for round off
outfile.open(filename.c_str());
outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl;
for (int i = 0; i < n; i++)
{
outfile << setw(10) << setprecision(10) << y1[i];
for (int j = 0; j < InList; j++)
outfile << " " << setw(16) << setprecision(15) << shellf[InList * i + j];
outfile << endl;
}
outfile.close();
}
delete[] shellf;
}
//================================================================================================
//================================================================================================
// This member function computes constraint violations
//================================================================================================
void bssn_class::Compute_Constraint()
{
double TRK4 = PhysTime;
double ndeps = numepsb;
int pre = 0;
int lev;
for (lev = 0; lev < GH->levels; lev++)
{
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry);
}
// prolong restrict constraint quantities
for (lev = GH->levels - 1; lev > 0; lev--)
RestrictProlong(lev, 1, false, ConstraintList, ConstraintList, ConstraintList);
#ifdef WithShell
lev = 0;
{
MyList<ss_patch> *sPp;
sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[fngfs + ShellPatch::gx],
cg->fgfs[fngfs + ShellPatch::gy],
cg->fgfs[fngfs + ShellPatch::gz],
cg->fgfs[fngfs + ShellPatch::drhodx],
cg->fgfs[fngfs + ShellPatch::drhody],
cg->fgfs[fngfs + ShellPatch::drhodz],
cg->fgfs[fngfs + ShellPatch::dsigmadx],
cg->fgfs[fngfs + ShellPatch::dsigmady],
cg->fgfs[fngfs + ShellPatch::dsigmadz],
cg->fgfs[fngfs + ShellPatch::dRdx],
cg->fgfs[fngfs + ShellPatch::dRdy],
cg->fgfs[fngfs + ShellPatch::dRdz],
cg->fgfs[fngfs + ShellPatch::drhodxx],
cg->fgfs[fngfs + ShellPatch::drhodxy],
cg->fgfs[fngfs + ShellPatch::drhodxz],
cg->fgfs[fngfs + ShellPatch::drhodyy],
cg->fgfs[fngfs + ShellPatch::drhodyz],
cg->fgfs[fngfs + ShellPatch::drhodzz],
cg->fgfs[fngfs + ShellPatch::dsigmadxx],
cg->fgfs[fngfs + ShellPatch::dsigmadxy],
cg->fgfs[fngfs + ShellPatch::dsigmadxz],
cg->fgfs[fngfs + ShellPatch::dsigmadyy],
cg->fgfs[fngfs + ShellPatch::dsigmadyz],
cg->fgfs[fngfs + ShellPatch::dsigmadzz],
cg->fgfs[fngfs + ShellPatch::dRdxx],
cg->fgfs[fngfs + ShellPatch::dRdxy],
cg->fgfs[fngfs + ShellPatch::dRdxz],
cg->fgfs[fngfs + ShellPatch::dRdyy],
cg->fgfs[fngfs + ShellPatch::dRdyz],
cg->fgfs[fngfs + ShellPatch::dRdzz],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, numepsh, sPp->data->sst, pre);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
SH->Synch(ConstraintList, Symmetry);
// interpolate constraint quantities
SH->CS_Inter(ConstraintList, Symmetry);
#endif
}
//================================================================================================
//================================================================================================
void bssn_class::testRestrict()
{
MyList<var> *DG_List = new MyList<var>(phi0);
int lev = 0;
double ZEO = 0, ONE = 1;
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ZEO);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
lev = 1;
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ONE);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], DG_List, DG_List, Symmetry);
Parallel::Sync(GH->PatL[lev - 1], DG_List, Symmetry);
Parallel::Dump_Data(GH->PatL[lev - 1], DG_List, 0, PhysTime, dT);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT);
DG_List->clearList();
exit(0);
}
//================================================================================================
//================================================================================================
void bssn_class::testOutBd()
{
MyList<var> *DG_List = new MyList<var>(phi0);
int lev = 1;
double ZEO = 0, ONE = 1;
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ZEO);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
lev = 0;
Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ONE);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
lev = 1;
MyList<Patch> *Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, DG_List, DG_List, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
Parallel::Sync(GH->PatL[lev], DG_List, Symmetry);
Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT);
Parallel::Dump_Data(GH->PatL[lev - 1], DG_List, 0, PhysTime, dT);
DG_List->clearList();
exit(0);
}
//================================================================================================
//================================================================================================
// This member function enforces/checks the traceless condition
//================================================================================================
void bssn_class::Enforce_algcon(int lev, int fg)
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
if (fg == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
else
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#ifdef WithShell
if (lev == 0)
{
MyList<ss_patch> *sPp = SH->PatL;
while (sPp)
{
MyList<Block> *BP = sPp->data->blb;
int fngfs = sPp->data->fngfs;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
{
if (fg == 0)
f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
else
f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
}
if (BP == sPp->data->ble)
break;
BP = BP->next;
}
sPp = sPp->next;
}
}
#endif
}
//================================================================================================
//================================================================================================
// This member function monitors stdin for an 'abort' input
//================================================================================================
bool bssn_class::check_Stdin_Abort()
{
fd_set readfds;
struct timeval timeout;
FD_ZERO(&readfds);
FD_SET(STDIN_FILENO, &readfds);
// Set timeout to 0 — perform a non-blocking check
timeout.tv_sec = 0;
timeout.tv_usec = 0;
int activity = select(STDIN_FILENO + 1, &readfds, nullptr, nullptr, &timeout);
if (activity > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
string input_abort;
if (cin >> input_abort) {
if (input_abort == "stop") {
return true;
}
}
}
return false;
}
//================================================================================================