Compare commits
4 Commits
lzd-cuda
...
cjy-cassiu
| Author | SHA1 | Date | |
|---|---|---|---|
| d96ca6ed2a | |||
| 60ad63e8cc | |||
| 087d034ee3 | |||
| 5f664716ab |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,6 +1,6 @@
|
||||
__pycache__
|
||||
GW150914
|
||||
GW150914*
|
||||
GW150914-origin
|
||||
docs
|
||||
*.tmp
|
||||
.codex
|
||||
|
||||
|
||||
@@ -13,14 +13,17 @@ import numpy
|
||||
|
||||
## Setting MPI processes and the output file directory
|
||||
|
||||
File_directory = "GW150914" ## output file directory
|
||||
Output_directory = "binary_output" ## binary data file directory
|
||||
## The file directory name should not be too long
|
||||
MPI_processes = 8 ## number of mpi processes used in the simulation
|
||||
|
||||
GPU_Calculation = "yes" ## Use GPU or not
|
||||
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
|
||||
CPU_Part = 1.0
|
||||
File_directory = "GW150914" ## output file directory
|
||||
Output_directory = "binary_output" ## binary data file directory
|
||||
## The file directory name should not be too long
|
||||
MPI_processes = 64 ## number of mpi processes used in the simulation
|
||||
OMP_Threads = 3 ## number of OpenMP threads used by each MPI process
|
||||
MPI_hosts = ["localhost", "192.168.20.102"] ## MPI hosts for multi-node runs
|
||||
MPI_processes_per_node = 32 ## MPI ranks launched on each node in MPI_hosts
|
||||
|
||||
GPU_Calculation = "no" ## Use GPU or not
|
||||
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
|
||||
CPU_Part = 1.0
|
||||
GPU_Part = 0.0
|
||||
|
||||
#################################################
|
||||
@@ -50,7 +53,7 @@ Check_Time = 100.0
|
||||
Dump_Time = 100.0 ## time inteval dT for dumping binary data
|
||||
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
|
||||
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
|
||||
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number
|
||||
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number
|
||||
Courant_Factor = 0.5 ## Courant Factor
|
||||
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength
|
||||
|
||||
|
||||
@@ -258,7 +258,7 @@ print()
|
||||
if (input_data.GPU_Calculation == "no"):
|
||||
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE")
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE_CUDA")
|
||||
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABEGPU")
|
||||
|
||||
if not os.path.exists( ABE_file ):
|
||||
print( )
|
||||
|
||||
@@ -2,18 +2,13 @@
|
||||
"""
|
||||
AMSS-NCKU GW150914 Simulation Regression Test Script (Comprehensive Version)
|
||||
|
||||
Verification Requirements:
|
||||
1. RMS errors < 1% for:
|
||||
- 3D Vector Total RMS
|
||||
- X Component RMS
|
||||
- Y Component RMS
|
||||
- Z Component RMS
|
||||
2. ADM constraint violation < 2 (Grid Level 0)
|
||||
3. The following figure PDFs must match GW150914-origin exactly after rasterization:
|
||||
- ADM_Constraint_Grid_Level_0.pdf
|
||||
- BH_Trajectory_21_XY.pdf
|
||||
- BH_Trajectory_XY.pdf
|
||||
The script also reports the percentage of differing pixels for each figure.
|
||||
Verification Requirements:
|
||||
1. RMS errors < 1% for:
|
||||
- 3D Vector Total RMS
|
||||
- X Component RMS
|
||||
- Y Component RMS
|
||||
- Z Component RMS
|
||||
2. ADM constraint violation < 2 (Grid Level 0)
|
||||
|
||||
RMS Calculation Method:
|
||||
- Computes trajectory deviation on the XY plane independently for BH1 and BH2
|
||||
@@ -25,13 +20,9 @@ Default: output_dir = GW150914/AMSS_NCKU_output
|
||||
Reference: GW150914-origin (baseline simulation)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
|
||||
# ANSI Color Codes
|
||||
class Color:
|
||||
@@ -58,143 +49,17 @@ def load_bh_trajectory(filepath):
|
||||
}
|
||||
|
||||
|
||||
def load_constraint_data(filepath):
|
||||
"""Load constraint violation data"""
|
||||
data = []
|
||||
def load_constraint_data(filepath):
|
||||
"""Load constraint violation data"""
|
||||
data = []
|
||||
with open(filepath, 'r') as f:
|
||||
for line in f:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
parts = line.split()
|
||||
if len(parts) >= 8:
|
||||
data.append([float(x) for x in parts[:8]])
|
||||
return np.array(data)
|
||||
|
||||
|
||||
def resolve_figure_dir(path):
|
||||
"""Resolve the sibling figure directory from an output or figure path."""
|
||||
normalized = os.path.normpath(path)
|
||||
if os.path.basename(normalized) == "figure":
|
||||
return normalized
|
||||
return os.path.join(os.path.dirname(normalized), "figure")
|
||||
|
||||
|
||||
def render_pdf_to_images(pdf_path, dpi=150):
|
||||
"""Render a PDF to RGB images using Ghostscript."""
|
||||
gs_path = shutil.which("gs")
|
||||
if gs_path is None:
|
||||
raise RuntimeError("Ghostscript executable 'gs' was not found in PATH")
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="amss_verify_pdf_") as temp_dir:
|
||||
output_pattern = os.path.join(temp_dir, "page-%03d.ppm")
|
||||
cmd = [
|
||||
gs_path,
|
||||
"-q",
|
||||
"-dSAFER",
|
||||
"-dBATCH",
|
||||
"-dNOPAUSE",
|
||||
"-sDEVICE=ppmraw",
|
||||
f"-r{dpi}",
|
||||
f"-o{output_pattern}",
|
||||
pdf_path
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
message = exc.stderr.strip() or str(exc)
|
||||
raise RuntimeError(f"Failed to render PDF '{pdf_path}': {message}") from exc
|
||||
|
||||
ppm_files = sorted(
|
||||
os.path.join(temp_dir, filename)
|
||||
for filename in os.listdir(temp_dir)
|
||||
if filename.endswith(".ppm")
|
||||
)
|
||||
|
||||
if not ppm_files:
|
||||
raise RuntimeError(f"No rendered pages were produced for '{pdf_path}'")
|
||||
|
||||
images = []
|
||||
for ppm_file in ppm_files:
|
||||
with Image.open(ppm_file) as img:
|
||||
images.append(np.array(img.convert("RGB"), dtype=np.uint8))
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def compare_rendered_pages(ref_img, target_img):
|
||||
"""Return (different_pixels, total_pixels) for two rendered RGB pages."""
|
||||
ref_h, ref_w = ref_img.shape[:2]
|
||||
tgt_h, tgt_w = target_img.shape[:2]
|
||||
total_pixels = max(ref_h, tgt_h) * max(ref_w, tgt_w)
|
||||
|
||||
if ref_h == tgt_h and ref_w == tgt_w:
|
||||
different_pixels = int(np.count_nonzero(np.any(ref_img != target_img, axis=2)))
|
||||
return different_pixels, total_pixels
|
||||
|
||||
diff_mask = np.ones((max(ref_h, tgt_h), max(ref_w, tgt_w)), dtype=bool)
|
||||
overlap_h = min(ref_h, tgt_h)
|
||||
overlap_w = min(ref_w, tgt_w)
|
||||
overlap_diff = np.any(ref_img[:overlap_h, :overlap_w] != target_img[:overlap_h, :overlap_w], axis=2)
|
||||
diff_mask[:overlap_h, :overlap_w] = overlap_diff
|
||||
different_pixels = int(np.count_nonzero(diff_mask))
|
||||
return different_pixels, total_pixels
|
||||
|
||||
|
||||
def compare_pdf_images(ref_pdf, target_pdf, dpi=150, threshold_percent=0.001):
|
||||
"""Compare two PDFs by rasterizing them and counting differing pixels."""
|
||||
ref_pages = render_pdf_to_images(ref_pdf, dpi=dpi)
|
||||
target_pages = render_pdf_to_images(target_pdf, dpi=dpi)
|
||||
|
||||
total_pixels = 0
|
||||
different_pixels = 0
|
||||
max_pages = max(len(ref_pages), len(target_pages))
|
||||
|
||||
for page_idx in range(max_pages):
|
||||
if page_idx < len(ref_pages) and page_idx < len(target_pages):
|
||||
page_diff, page_total = compare_rendered_pages(ref_pages[page_idx], target_pages[page_idx])
|
||||
else:
|
||||
existing_page = ref_pages[page_idx] if page_idx < len(ref_pages) else target_pages[page_idx]
|
||||
page_total = existing_page.shape[0] * existing_page.shape[1]
|
||||
page_diff = page_total
|
||||
|
||||
total_pixels += page_total
|
||||
different_pixels += page_diff
|
||||
|
||||
diff_percent = (different_pixels / total_pixels * 100.0) if total_pixels else 0.0
|
||||
return {
|
||||
"different_pixels": different_pixels,
|
||||
"total_pixels": total_pixels,
|
||||
"diff_percent": diff_percent,
|
||||
"pages_ref": len(ref_pages),
|
||||
"pages_target": len(target_pages),
|
||||
"passed": diff_percent < threshold_percent
|
||||
}
|
||||
|
||||
|
||||
def compare_required_figures(reference_figure_dir, target_figure_dir):
|
||||
"""Compare the required GW150914 figure PDFs."""
|
||||
figure_names = [
|
||||
"ADM_Constraint_Grid_Level_0.pdf",
|
||||
"BH_Trajectory_21_XY.pdf",
|
||||
"BH_Trajectory_XY.pdf"
|
||||
]
|
||||
|
||||
results = []
|
||||
for figure_name in figure_names:
|
||||
ref_pdf = os.path.join(reference_figure_dir, figure_name)
|
||||
target_pdf = os.path.join(target_figure_dir, figure_name)
|
||||
|
||||
if not os.path.exists(ref_pdf):
|
||||
raise FileNotFoundError(f"Reference figure not found: {ref_pdf}")
|
||||
if not os.path.exists(target_pdf):
|
||||
raise FileNotFoundError(f"Target figure not found: {target_pdf}")
|
||||
|
||||
comparison = compare_pdf_images(ref_pdf, target_pdf)
|
||||
comparison["name"] = figure_name
|
||||
results.append(comparison)
|
||||
|
||||
return results
|
||||
data.append([float(x) for x in parts[:8]])
|
||||
return np.array(data)
|
||||
|
||||
def calculate_all_rms_errors(bh_data_ref, bh_data_target):
|
||||
"""
|
||||
@@ -300,7 +165,7 @@ def print_rms_results(rms_dict, error, threshold=1.0):
|
||||
|
||||
return all_passed
|
||||
|
||||
def print_constraint_results(results, threshold=2.0):
|
||||
def print_constraint_results(results, threshold=2.0):
|
||||
print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}")
|
||||
print("-" * 65)
|
||||
|
||||
@@ -315,49 +180,22 @@ def print_constraint_results(results, threshold=2.0):
|
||||
print(f"\n Maximum violation: {results['max_violation']:.6f}")
|
||||
print(f" Requirement: < {threshold}")
|
||||
print(f" Status: {get_status_text(passed)}")
|
||||
|
||||
return passed
|
||||
|
||||
|
||||
def print_figure_results(results, threshold_percent=0.001):
|
||||
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
|
||||
print("-" * 65)
|
||||
print(f" Requirement: < {threshold_percent:.3f}% differing pixels\n")
|
||||
|
||||
all_passed = True
|
||||
for result in results:
|
||||
passed = result["passed"]
|
||||
all_passed = all_passed and passed
|
||||
status = get_status_text(passed)
|
||||
print(f" {result['name']:32}: {result['diff_percent']:10.6f}% | Status: {status}")
|
||||
|
||||
if result["pages_ref"] != result["pages_target"]:
|
||||
print(f" {'':32} pages(ref/target): {result['pages_ref']}/{result['pages_target']}")
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def print_figure_error(error_message):
|
||||
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
|
||||
print("-" * 65)
|
||||
print(f" {Color.RED}Error: {error_message}{Color.RESET}")
|
||||
return False
|
||||
|
||||
|
||||
def print_summary(rms_passed, constraint_passed, figure_passed):
|
||||
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
print(Color.BOLD + "Verification Summary" + Color.RESET)
|
||||
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
|
||||
all_passed = rms_passed and constraint_passed and figure_passed
|
||||
|
||||
res_rms = get_status_text(rms_passed)
|
||||
res_con = get_status_text(constraint_passed)
|
||||
res_fig = get_status_text(figure_passed)
|
||||
|
||||
print(f" [1] Comprehensive RMS check: {res_rms}")
|
||||
print(f" [2] ADM constraint check: {res_con}")
|
||||
print(f" [3] Figure pixel comparison: {res_fig}")
|
||||
|
||||
return passed
|
||||
|
||||
|
||||
def print_summary(rms_passed, constraint_passed):
|
||||
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
print(Color.BOLD + "Verification Summary" + Color.RESET)
|
||||
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
|
||||
all_passed = rms_passed and constraint_passed
|
||||
|
||||
res_rms = get_status_text(rms_passed)
|
||||
res_con = get_status_text(constraint_passed)
|
||||
|
||||
print(f" [1] Comprehensive RMS check: {res_rms}")
|
||||
print(f" [2] ADM constraint check: {res_con}")
|
||||
|
||||
final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
|
||||
print(f"\n Overall result: {final_status}")
|
||||
@@ -372,14 +210,12 @@ def main():
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")
|
||||
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
|
||||
target_figure_dir = resolve_figure_dir(target_dir)
|
||||
reference_figure_dir = os.path.join(script_dir, "GW150914-origin/figure")
|
||||
|
||||
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
|
||||
bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
|
||||
constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
|
||||
|
||||
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
|
||||
bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
|
||||
constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
|
||||
|
||||
if not os.path.exists(bh_file_ref):
|
||||
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}")
|
||||
@@ -391,11 +227,9 @@ def main():
|
||||
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}")
|
||||
sys.exit(1)
|
||||
|
||||
print_header()
|
||||
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
|
||||
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
|
||||
print(f"{Color.BOLD}Reference Figures: {Color.RESET} {Color.BLUE}{reference_figure_dir}{Color.RESET}")
|
||||
print(f"{Color.BOLD}Target Figures: {Color.RESET} {Color.BLUE}{target_figure_dir}{Color.RESET}")
|
||||
print_header()
|
||||
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
|
||||
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
|
||||
|
||||
bh_data_ref = load_bh_trajectory(bh_file_ref)
|
||||
bh_data_target = load_bh_trajectory(bh_file_target)
|
||||
@@ -405,18 +239,12 @@ def main():
|
||||
rms_dict, error = calculate_all_rms_errors(bh_data_ref, bh_data_target)
|
||||
rms_passed = print_rms_results(rms_dict, error)
|
||||
|
||||
# Output constraint results
|
||||
constraint_results = analyze_constraint_violation(constraint_data)
|
||||
constraint_passed = print_constraint_results(constraint_results)
|
||||
|
||||
try:
|
||||
figure_results = compare_required_figures(reference_figure_dir, target_figure_dir)
|
||||
figure_passed = print_figure_results(figure_results)
|
||||
except (FileNotFoundError, RuntimeError) as exc:
|
||||
figure_passed = print_figure_error(str(exc))
|
||||
|
||||
all_passed = print_summary(rms_passed, constraint_passed, figure_passed)
|
||||
sys.exit(0 if all_passed else 1)
|
||||
# Output constraint results
|
||||
constraint_results = analyze_constraint_violation(constraint_data)
|
||||
constraint_passed = print_constraint_results(constraint_results)
|
||||
|
||||
all_passed = print_summary(rms_passed, constraint_passed)
|
||||
sys.exit(0 if all_passed else 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -100,14 +100,12 @@ namespace Parallel
|
||||
MyList<gridseg> **combined_dst;
|
||||
int *send_lengths;
|
||||
int *recv_lengths;
|
||||
double **send_bufs;
|
||||
double **recv_bufs;
|
||||
int *send_buf_caps;
|
||||
int *recv_buf_caps;
|
||||
unsigned char *send_buf_pinned;
|
||||
unsigned char *recv_buf_pinned;
|
||||
MPI_Request *reqs;
|
||||
MPI_Status *stats;
|
||||
double **send_bufs;
|
||||
double **recv_bufs;
|
||||
int *send_buf_caps;
|
||||
int *recv_buf_caps;
|
||||
MPI_Request *reqs;
|
||||
MPI_Status *stats;
|
||||
int max_reqs;
|
||||
bool lengths_valid;
|
||||
int *tc_req_node;
|
||||
@@ -118,11 +116,10 @@ namespace Parallel
|
||||
void destroy();
|
||||
};
|
||||
|
||||
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
|
||||
void Sync_ensure_cache(MyList<Patch> *PatL, int Symmetry, SyncCache &cache);
|
||||
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
|
||||
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
|
||||
struct AsyncSyncState {
|
||||
int req_no;
|
||||
@@ -182,13 +179,12 @@ namespace Parallel
|
||||
MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only);
|
||||
MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue
|
||||
MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat);
|
||||
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
||||
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||
double L2Norm(Patch *Pat, var *vf);
|
||||
void L2Norm7(Patch *Pat, var **vf, double *norms);
|
||||
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
||||
void checkvarl(MyList<var> *pp, bool first_only);
|
||||
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
||||
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||
double L2Norm(Patch *Pat, var *vf);
|
||||
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
||||
void checkvarl(MyList<var> *pp, bool first_only);
|
||||
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||
MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||
void prepare_inter_time_level(Patch *Pat,
|
||||
@@ -220,12 +216,11 @@ namespace Parallel
|
||||
void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape);
|
||||
bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl);
|
||||
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
|
||||
|
||||
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
||||
void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here);
|
||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||
|
||||
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||
bool periodic, int start_rank, int end_rank, int nodes = 0);
|
||||
|
||||
@@ -3439,10 +3439,10 @@ void ShellPatch::write_Pablo_file_ss(int *ext, double xmin, double xmax, double
|
||||
delete[] Z;
|
||||
}
|
||||
|
||||
double ShellPatch::L2Norm(var *vf)
|
||||
{
|
||||
double tvf, dtvf = 0;
|
||||
int BDW = overghost;
|
||||
double ShellPatch::L2Norm(var *vf)
|
||||
{
|
||||
double tvf, dtvf = 0;
|
||||
int BDW = overghost;
|
||||
|
||||
MyList<ss_patch> *sPp = PatL;
|
||||
while (sPp)
|
||||
@@ -3469,50 +3469,13 @@ double ShellPatch::L2Norm(var *vf)
|
||||
MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
tvf = sqrt(tvf);
|
||||
|
||||
return tvf;
|
||||
}
|
||||
void ShellPatch::L2Norm7(var **vf, double *norms)
|
||||
{
|
||||
double tvf[7], dtvf[7];
|
||||
int BDW = overghost;
|
||||
for (int i = 0; i < 7; i++)
|
||||
dtvf[i] = 0;
|
||||
|
||||
MyList<ss_patch> *sPp = PatL;
|
||||
while (sPp)
|
||||
{
|
||||
MyList<Block> *Bp = sPp->data->blb;
|
||||
while (Bp)
|
||||
{
|
||||
Block *cg = Bp->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
|
||||
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
|
||||
cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn],
|
||||
cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn],
|
||||
cg->fgfs[vf[6]->sgfn], tvf, BDW);
|
||||
for (int i = 0; i < 7; i++)
|
||||
dtvf[i] += tvf[i];
|
||||
}
|
||||
if (Bp == sPp->data->ble)
|
||||
break;
|
||||
Bp = Bp->next;
|
||||
}
|
||||
sPp = sPp->next;
|
||||
}
|
||||
|
||||
MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
for (int i = 0; i < 7; i++)
|
||||
norms[i] = sqrt(tvf[i]);
|
||||
}
|
||||
|
||||
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
|
||||
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,
|
||||
double *Shellf)
|
||||
|
||||
return tvf;
|
||||
}
|
||||
|
||||
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
|
||||
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,
|
||||
double *Shellf)
|
||||
{
|
||||
MyList<var> *varl;
|
||||
int num_var = 0;
|
||||
|
||||
@@ -195,11 +195,10 @@ public:
|
||||
bool Interp_One_Point(MyList<var> *VarList,
|
||||
double *XX, /*input global Cartesian coordinate*/
|
||||
double *Shellf, int Symmetry);
|
||||
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
|
||||
char *filename, int sst);
|
||||
double L2Norm(var *vf);
|
||||
void L2Norm7(var **vf, double *norms);
|
||||
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
|
||||
};
|
||||
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
|
||||
char *filename, int sst);
|
||||
double L2Norm(var *vf);
|
||||
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
|
||||
};
|
||||
|
||||
#endif /* SHELLPATCH_H */
|
||||
|
||||
@@ -26,20 +26,12 @@ using namespace std;
|
||||
#include "shellfunctions.h"
|
||||
#include "cpbc.h"
|
||||
#include "kodiss.h"
|
||||
#include "parameters.h"
|
||||
|
||||
#ifndef USE_CUDA_Z4C
|
||||
#define USE_CUDA_Z4C 0
|
||||
#endif
|
||||
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#include "z4c_rhs_cuda.h"
|
||||
#endif
|
||||
|
||||
#ifdef With_AHF
|
||||
#include "derivatives.h"
|
||||
#include "myglobal.h"
|
||||
#endif
|
||||
#include "parameters.h"
|
||||
|
||||
#ifdef With_AHF
|
||||
#include "derivatives.h"
|
||||
#include "myglobal.h"
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -175,554 +167,12 @@ Z4c_class::~Z4c_class()
|
||||
|
||||
#define MRBD 0 // 0: fix BD for meshrefinement level; 1: sommerfeld_bam for them; 2: sommerfeld_yo for them
|
||||
|
||||
#ifndef CPBC
|
||||
// for sommerfeld boundary
|
||||
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#ifdef WithShell
|
||||
#error "USE_CUDA_Z4C resident path currently supports Cartesian non-shell Z4C only"
|
||||
#endif
|
||||
#if (MRBD == 2)
|
||||
#error "USE_CUDA_Z4C resident path does not support MRBD == 2"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
static const int k_z4c_cuda_bh_state_indices[3] = {18, 19, 20};
|
||||
|
||||
bool fill_z4c_cuda_views(Block *cg, MyList<var> *vars,
|
||||
double **host_views,
|
||||
double *propspeeds = 0,
|
||||
double *soa_flat = 0)
|
||||
{
|
||||
int idx = 0;
|
||||
while (vars && idx < Z4C_CUDA_STATE_COUNT)
|
||||
{
|
||||
host_views[idx] = cg->fgfs[vars->data->sgfn];
|
||||
if (propspeeds)
|
||||
propspeeds[idx] = vars->data->propspeed;
|
||||
if (soa_flat)
|
||||
{
|
||||
soa_flat[3 * idx + 0] = vars->data->SoA[0];
|
||||
soa_flat[3 * idx + 1] = vars->data->SoA[1];
|
||||
soa_flat[3 * idx + 2] = vars->data->SoA[2];
|
||||
}
|
||||
vars = vars->next;
|
||||
++idx;
|
||||
}
|
||||
return idx == Z4C_CUDA_STATE_COUNT && vars == 0;
|
||||
}
|
||||
|
||||
void z4c_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars, int myrank, bool release_ctx)
|
||||
{
|
||||
MyList<Patch> *Pp = PatL;
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank && z4c_cuda_has_resident_state(cg))
|
||||
{
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, vars, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on resident state download" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if (z4c_cuda_download_resident_state(cg, cg->shape, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C resident state download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if (release_ctx)
|
||||
z4c_cuda_release_step_ctx(cg);
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
}
|
||||
|
||||
bool z4c_cuda_patch_contains_point(Patch *patch, const double *point)
|
||||
{
|
||||
if (!patch)
|
||||
return false;
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
const double h = patch->getdX(d);
|
||||
const double lo = patch->bbox[d] + patch->lli[d] * h;
|
||||
const double hi = patch->bbox[dim + d] - patch->uui[d] * h;
|
||||
if (point[d] < lo || point[d] > hi)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool z4c_cuda_point_in_block(Patch *patch, Block *block,
|
||||
const double *point, const double *DH)
|
||||
{
|
||||
if (!patch || !block)
|
||||
return false;
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
double llb;
|
||||
double uub;
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
llb = (feq(block->bbox[d], patch->bbox[d], DH[d] / 2))
|
||||
? block->bbox[d] + patch->lli[d] * DH[d]
|
||||
: block->bbox[d] + (ghost_width - 0.5) * DH[d];
|
||||
uub = (feq(block->bbox[dim + d], patch->bbox[dim + d], DH[d] / 2))
|
||||
? block->bbox[dim + d] - patch->uui[d] * DH[d]
|
||||
: block->bbox[dim + d] - (ghost_width - 0.5) * DH[d];
|
||||
#else
|
||||
#ifdef Cell
|
||||
llb = (feq(block->bbox[d], patch->bbox[d], DH[d] / 2))
|
||||
? block->bbox[d] + patch->lli[d] * DH[d]
|
||||
: block->bbox[d] + ghost_width * DH[d];
|
||||
uub = (feq(block->bbox[dim + d], patch->bbox[dim + d], DH[d] / 2))
|
||||
? block->bbox[dim + d] - patch->uui[d] * DH[d]
|
||||
: block->bbox[dim + d] - ghost_width * DH[d];
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
if (point[d] - llb < -DH[d] / 2 || point[d] - uub > DH[d] / 2)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int z4c_cuda_interp_tile_start(const double *coords, int n, double x, double dx, int ordn)
|
||||
{
|
||||
if (!coords || n <= ordn)
|
||||
return 0;
|
||||
int cxi = int((x - coords[0]) / dx + 0.4) + 1;
|
||||
int start = cxi - ordn / 2;
|
||||
if (start < 0)
|
||||
start = 0;
|
||||
const int max_start = n - ordn;
|
||||
if (start > max_start)
|
||||
start = max_start;
|
||||
return start;
|
||||
}
|
||||
|
||||
bool z4c_cuda_interp_bh_point_resident(MyList<Patch> *PatL,
|
||||
int myrank,
|
||||
const double *point,
|
||||
var *forx, var *fory, var *forz,
|
||||
int Symmetry,
|
||||
double *shellf)
|
||||
{
|
||||
const int ordn = 2 * ghost_width;
|
||||
int owner_rank = -1;
|
||||
|
||||
shellf[0] = shellf[1] = shellf[2] = 0.0;
|
||||
|
||||
MyList<Patch> *PL = PatL;
|
||||
while (PL)
|
||||
{
|
||||
Patch *patch = PL->data;
|
||||
if (!z4c_cuda_patch_contains_point(patch, point))
|
||||
{
|
||||
PL = PL->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
double DH[dim];
|
||||
for (int d = 0; d < dim; d++)
|
||||
DH[d] = patch->getdX(d);
|
||||
|
||||
MyList<Block> *BP = patch->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *block = BP->data;
|
||||
if (z4c_cuda_point_in_block(patch, block, point, DH))
|
||||
{
|
||||
owner_rank = block->rank;
|
||||
if (myrank == owner_rank)
|
||||
{
|
||||
int interp_ordn = ordn;
|
||||
int interp_sym = Symmetry;
|
||||
double x = point[0];
|
||||
double y = point[1];
|
||||
double z = point[2];
|
||||
|
||||
if (z4c_cuda_has_resident_state(block) &&
|
||||
block->shape[0] >= ordn && block->shape[1] >= ordn && block->shape[2] >= ordn)
|
||||
{
|
||||
const int sx = ordn;
|
||||
const int sy = ordn;
|
||||
const int sz = ordn;
|
||||
const int region_all = sx * sy * sz;
|
||||
const int i0 = z4c_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
|
||||
const int j0 = z4c_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
|
||||
const int k0 = z4c_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
|
||||
double *packed_fields = new double[3 * region_all];
|
||||
var *vars[3] = {forx, fory, forz};
|
||||
for (int f = 0; f < 3; f++)
|
||||
{
|
||||
if (z4c_cuda_pack_state_region_to_host_buffer(block,
|
||||
k_z4c_cuda_bh_state_indices[f],
|
||||
packed_fields + f * region_all,
|
||||
block->shape,
|
||||
i0, j0, k0,
|
||||
sx, sy, sz) != 0)
|
||||
{
|
||||
delete[] packed_fields;
|
||||
cout << "CUDA Z4C BH tile download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int tile_shape[3] = {sx, sy, sz};
|
||||
f_global_interp(tile_shape,
|
||||
block->X[0] + i0,
|
||||
block->X[1] + j0,
|
||||
block->X[2] + k0,
|
||||
packed_fields + f * region_all,
|
||||
shellf[f],
|
||||
x, y, z,
|
||||
interp_ordn,
|
||||
vars[f]->SoA,
|
||||
interp_sym);
|
||||
}
|
||||
delete[] packed_fields;
|
||||
}
|
||||
else
|
||||
{
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[forx->sgfn], shellf[0],
|
||||
x, y, z, interp_ordn, forx->SoA, interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[fory->sgfn], shellf[1],
|
||||
x, y, z, interp_ordn, fory->SoA, interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[forz->sgfn], shellf[2],
|
||||
x, y, z, interp_ordn, forz->SoA, interp_sym);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (BP == patch->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
|
||||
if (owner_rank >= 0)
|
||||
break;
|
||||
PL = PL->next;
|
||||
}
|
||||
|
||||
if (owner_rank < 0)
|
||||
return false;
|
||||
|
||||
MPI_Bcast(shellf, 3, MPI_DOUBLE, owner_rank, MPI_COMM_WORLD);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool z4c_cuda_compute_porg_rhs_resident(cgh *GH,
|
||||
int ilev,
|
||||
int myrank,
|
||||
int BH_num,
|
||||
double **BH_PS,
|
||||
double **BH_RHS,
|
||||
var *forx, var *fory, var *forz,
|
||||
int Symmetry)
|
||||
{
|
||||
for (int n = 0; n < BH_num; n++)
|
||||
{
|
||||
double shellf[3] = {0.0, 0.0, 0.0};
|
||||
int lev = ilev;
|
||||
while (lev >= 0 &&
|
||||
!z4c_cuda_interp_bh_point_resident(GH->PatL[lev], myrank, BH_PS[n],
|
||||
forx, fory, forz, Symmetry, shellf))
|
||||
{
|
||||
--lev;
|
||||
}
|
||||
if (lev < 0)
|
||||
return false;
|
||||
BH_RHS[n][0] = -shellf[0];
|
||||
BH_RHS[n][1] = -shellf[1];
|
||||
BH_RHS[n][2] = -shellf[2];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
||||
void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifdef With_AHF
|
||||
AH_Step_Find(lev, dT_lev);
|
||||
#endif
|
||||
bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
|
||||
double ndeps = numepss;
|
||||
if (lev < GH->movls)
|
||||
ndeps = numepsb;
|
||||
double TRK4 = PhysTime;
|
||||
int iter_count = 0;
|
||||
int pre = 0, cor = 1;
|
||||
int ERROR = 0;
|
||||
|
||||
MyList<Patch> *Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
double *state_in[Z4C_CUDA_STATE_COUNT];
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
double propspeed[Z4C_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, StateList, state_in, propspeed, soa_flat) ||
|
||||
!fill_z4c_cuda_views(cg, SynchList_pre, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on predictor step" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int apply_bam_bc = 0;
|
||||
#if (MRBD == 0)
|
||||
#if (SommerType == 0)
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#elif (MRBD == 1)
|
||||
apply_bam_bc = 1;
|
||||
#endif
|
||||
int keep_resident_state = 1;
|
||||
int apply_enforce_ga = 0;
|
||||
#if (AGM == 0)
|
||||
apply_enforce_ga = 1;
|
||||
#endif
|
||||
if (z4c_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out,
|
||||
propspeed, soa_flat, Pp->data->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, pre,
|
||||
keep_resident_state, apply_enforce_ga, chitiny))
|
||||
{
|
||||
cout << "CUDA Z4C predictor substep failed in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed in predictor at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count);
|
||||
if (Symmetry > 0)
|
||||
Porg[ithBH][2] = fabs(Porg[ithBH][2]);
|
||||
if (Symmetry == 2)
|
||||
{
|
||||
Porg[ithBH][0] = fabs(Porg[ithBH][0]);
|
||||
Porg[ithBH][1] = fabs(Porg[ithBH][1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((lev == a_lev) && (LastAnas + dT_lev >= AnasTime))
|
||||
z4c_cuda_download_level_state(GH->PatL[lev], SynchList_pre, myrank, false);
|
||||
if (lev == a_lev)
|
||||
AnalysisStuff(lev, dT_lev);
|
||||
|
||||
for (iter_count = 1; iter_count < 4; iter_count++)
|
||||
{
|
||||
if (iter_count == 1 || iter_count == 3)
|
||||
TRK4 += dT_lev / 2;
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
double *state_in[Z4C_CUDA_STATE_COUNT];
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
double propspeed[Z4C_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, SynchList_pre, state_in, propspeed, soa_flat) ||
|
||||
!fill_z4c_cuda_views(cg, SynchList_cor, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on corrector step" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int apply_bam_bc = 0;
|
||||
#if (MRBD == 0)
|
||||
#if (SommerType == 0)
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#elif (MRBD == 1)
|
||||
apply_bam_bc = 1;
|
||||
#endif
|
||||
int keep_resident_state = 1;
|
||||
int apply_enforce_ga = 0;
|
||||
#if (AGM == 0)
|
||||
apply_enforce_ga = 1;
|
||||
#elif (AGM == 1)
|
||||
apply_enforce_ga = (iter_count == 3) ? 1 : 0;
|
||||
#endif
|
||||
if (z4c_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out,
|
||||
propspeed, soa_flat, Pp->data->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, cor,
|
||||
keep_resident_state, apply_enforce_ga, chitiny))
|
||||
{
|
||||
cout << "CUDA Z4C corrector substep failed in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed in RK4 substep#" << iter_count
|
||||
<< " at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
if (!z4c_cuda_compute_porg_rhs_resident(GH, lev, myrank, BH_num,
|
||||
Porg, Porg1,
|
||||
Sfx, Sfy, Sfz, Symmetry))
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed to interpolate black-hole shift at t = "
|
||||
<< PhysTime << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count);
|
||||
if (Symmetry > 0)
|
||||
Porg1[ithBH][2] = fabs(Porg1[ithBH][2]);
|
||||
if (Symmetry == 2)
|
||||
{
|
||||
Porg1[ithBH][0] = fabs(Porg1[ithBH][0]);
|
||||
Porg1[ithBH][1] = fabs(Porg1[ithBH][1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (iter_count < 3)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
cg->swapList(SynchList_pre, SynchList_cor, myrank);
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
Porg[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
z4c_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true);
|
||||
|
||||
#if (RPS == 0)
|
||||
RestrictProlong(lev, YN, BB);
|
||||
#endif
|
||||
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
cg->swapList(StateList, SynchList_cor, myrank);
|
||||
cg->swapList(OldStateList, SynchList_cor, myrank);
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
Porg0[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg0[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
#else
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifndef CPBC
|
||||
// for sommerfeld boundary
|
||||
|
||||
void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifdef With_AHF
|
||||
AH_Step_Find(lev, dT_lev);
|
||||
#endif
|
||||
@@ -1589,19 +1039,15 @@ void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
Porg0[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg0[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
// for constraint preserving boundary (CPBC)
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#error "USE_CUDA_Z4C resident path does not support CPBC"
|
||||
#endif
|
||||
#ifndef WithShell
|
||||
#error "CPBC only supports Shell"
|
||||
#endif
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// for constraint preserving boundary (CPBC)
|
||||
#ifndef WithShell
|
||||
#error "CPBC only supports Shell"
|
||||
#endif
|
||||
|
||||
// 0: extroplate rhs, 1: extroplate variable
|
||||
// 2: extroplate variable but before RHS calculation
|
||||
|
||||
@@ -94,31 +94,29 @@
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry,Lev,eps,co)
|
||||
|
||||
if (co == 0) then
|
||||
#if (ABV == 0)
|
||||
call ricci_gamma(ex, X, Y, Z, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry)
|
||||
#endif
|
||||
call constraint_bssn(ex, X, Y, Z,&
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry)
|
||||
endif
|
||||
#if (ABV == 0)
|
||||
call ricci_gamma(ex, X, Y, Z, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry)
|
||||
#endif
|
||||
call constraint_bssn(ex, X, Y, Z,&
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry)
|
||||
|
||||
return
|
||||
|
||||
@@ -228,12 +226,11 @@
|
||||
|
||||
call get_Z4cparameters(kappa1,kappa2,kappa3,FF,eta)
|
||||
|
||||
!!! sanity check
|
||||
#ifdef DEBUG
|
||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
|
||||
!!! sanity check
|
||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
|
||||
+sum(TZ)
|
||||
if(dX.ne.dX) then
|
||||
if(sum(chi).ne.sum(chi))write(*,*)"Z4c_rhs.f90: find NaN in chi"
|
||||
@@ -260,11 +257,10 @@
|
||||
if(sum(dtSfx).ne.sum(dtSfx))write(*,*)"Z4c_rhs.f90: find NaN in dtSfx"
|
||||
if(sum(dtSfy).ne.sum(dtSfy))write(*,*)"Z4c_rhs.f90: find NaN in dtSfy"
|
||||
if(sum(dtSfz).ne.sum(dtSfz))write(*,*)"Z4c_rhs.f90: find NaN in dtSfz"
|
||||
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs.f90: find NaN in TZ"
|
||||
gont = 1
|
||||
return
|
||||
endif
|
||||
#endif
|
||||
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs.f90: find NaN in TZ"
|
||||
gont = 1
|
||||
return
|
||||
endif
|
||||
|
||||
PI = dacos(-ONE)
|
||||
|
||||
@@ -1267,32 +1263,30 @@
|
||||
|
||||
endif
|
||||
|
||||
if (co == 0) then
|
||||
#if (ABV == 0)
|
||||
call ricci_gamma(ex, X, Y, Z, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry)
|
||||
#endif
|
||||
|
||||
call constraint_bssn(ex, X, Y, Z,&
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry)
|
||||
endif
|
||||
#if (ABV == 0)
|
||||
call ricci_gamma(ex, X, Y, Z, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry)
|
||||
#endif
|
||||
|
||||
call constraint_bssn(ex, X, Y, Z,&
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry)
|
||||
|
||||
gont = 0
|
||||
|
||||
|
||||
@@ -121,12 +121,11 @@
|
||||
|
||||
call get_Z4cparameters(kappa1,kappa2,kappa3,FF,eta)
|
||||
|
||||
!!! sanity check
|
||||
#ifdef DEBUG
|
||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
|
||||
!!! sanity check
|
||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
|
||||
+sum(TZ)
|
||||
if(dX.ne.dX) then
|
||||
if(sum(chi).ne.sum(chi))write(*,*)"Z4c_rhs_ss.f90: find NaN in chi"
|
||||
@@ -153,11 +152,10 @@
|
||||
if(sum(dtSfx).ne.sum(dtSfx))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfx"
|
||||
if(sum(dtSfy).ne.sum(dtSfy))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfy"
|
||||
if(sum(dtSfz).ne.sum(dtSfz))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfz"
|
||||
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs_ss.f90: find NaN in TZ"
|
||||
gont = 1
|
||||
return
|
||||
endif
|
||||
#endif
|
||||
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs_ss.f90: find NaN in TZ"
|
||||
gont = 1
|
||||
return
|
||||
endif
|
||||
|
||||
PI = dacos(-ONE)
|
||||
|
||||
@@ -1390,43 +1388,41 @@
|
||||
call kodis_sh(ex,crho,sigma,R,TZ,TZ_rhs,SSS,Symmetry,eps,sst)
|
||||
endif
|
||||
|
||||
if (co == 0) then
|
||||
#if (ABV == 1)
|
||||
call ricci_gamma_ss(ex,crho,sigma,R,X, Y, Z, &
|
||||
drhodx, drhody, drhodz, &
|
||||
dsigmadx,dsigmady,dsigmadz, &
|
||||
dRdx,dRdy,dRdz, &
|
||||
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
|
||||
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
|
||||
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry,Lev,sst)
|
||||
#endif
|
||||
call constraint_bssn_ss(ex,crho,sigma,R,X, Y, Z, &
|
||||
drhodx, drhody, drhodz, &
|
||||
dsigmadx,dsigmady,dsigmadz, &
|
||||
dRdx,dRdy,dRdz, &
|
||||
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
|
||||
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
|
||||
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry,Lev,sst)
|
||||
endif
|
||||
#if (ABV == 1)
|
||||
call ricci_gamma_ss(ex,crho,sigma,R,X, Y, Z, &
|
||||
drhodx, drhody, drhodz, &
|
||||
dsigmadx,dsigmady,dsigmadz, &
|
||||
dRdx,dRdy,dRdz, &
|
||||
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
|
||||
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
|
||||
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry,Lev,sst)
|
||||
call constraint_bssn_ss(ex,crho,sigma,R,X, Y, Z, &
|
||||
drhodx, drhody, drhodz, &
|
||||
dsigmadx,dsigmady,dsigmadz, &
|
||||
dRdx,dRdy,dRdz, &
|
||||
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
|
||||
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
|
||||
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry,Lev,sst)
|
||||
#endif
|
||||
|
||||
gont = 0
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -45,12 +45,10 @@ public:
|
||||
int checkrun;
|
||||
char checkfilename[50];
|
||||
int Steps;
|
||||
double StartTime, TotalTime;
|
||||
double AnasTime, DumpTime, d2DumpTime, CheckTime;
|
||||
double LastAnas, LastConsOut;
|
||||
bool cuda_level0_constraint_cache_valid;
|
||||
int *ConstraintRefreshLevels;
|
||||
double Courant;
|
||||
double StartTime, TotalTime;
|
||||
double AnasTime, DumpTime, d2DumpTime, CheckTime;
|
||||
double LastAnas, LastConsOut;
|
||||
double Courant;
|
||||
double numepss, numepsb, numepsh;
|
||||
int Symmetry;
|
||||
int maxl, decn;
|
||||
@@ -135,9 +133,9 @@ public:
|
||||
Parallel::SyncCache *sync_cache_restrict; // cached Restrict in RestrictProlong
|
||||
Parallel::SyncCache *sync_cache_outbd; // cached OutBdLow2Hi in RestrictProlong
|
||||
|
||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||
monitor *ConVMonitor, *TimingMonitor;
|
||||
surface_integral *Waveshell;
|
||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||
monitor *ConVMonitor;
|
||||
surface_integral *Waveshell;
|
||||
checkpoint *CheckPoint;
|
||||
|
||||
public:
|
||||
|
||||
2908
AMSS_NCKU_source/bssn_gpu.cu
Normal file
2908
AMSS_NCKU_source/bssn_gpu.cu
Normal file
File diff suppressed because it is too large
Load Diff
73
AMSS_NCKU_source/bssn_gpu.h
Normal file
73
AMSS_NCKU_source/bssn_gpu.h
Normal file
@@ -0,0 +1,73 @@
|
||||
|
||||
#ifndef BSSN_GPU_H_
|
||||
#define BSSN_GPU_H_
|
||||
#include "bssn_macro.h"
|
||||
#include "macrodef.fh"
|
||||
|
||||
#define DEVICE_ID 0
|
||||
// #define DEVICE_ID_BY_MPI_RANK
|
||||
#define GRID_DIM 256
|
||||
#define BLOCK_DIM 128
|
||||
|
||||
#define _FH2_(i, j, k) fh[(i) + (j) * _1D_SIZE[2] + (k) * _2D_SIZE[2]]
|
||||
#define _FH3_(i, j, k) fh[(i) + (j) * _1D_SIZE[3] + (k) * _2D_SIZE[3]]
|
||||
#define pow2(x) ((x) * (x))
|
||||
#define TimeBetween(a, b) ((b.tv_sec - a.tv_sec) + (b.tv_usec - a.tv_usec) / 1000000.0f)
|
||||
#define M_ metac.
|
||||
#define Mh_ meta->
|
||||
#define Ms_ metassc.
|
||||
#define Msh_ metass->
|
||||
|
||||
// #define TIMING
|
||||
|
||||
#define RHS_SS_PARA int calledby, int mpi_rank, int *ex, double &T, double *crho, double *sigma, double *R, double *X, double *Y, double *Z, double *drhodx, double *drhody, double *drhodz, double *dsigmadx, double *dsigmady, double *dsigmadz, double *dRdx, double *dRdy, double *dRdz, double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, double *chi, double *trK, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Gamx, double *Gamy, double *Gamz, double *Lap, double *betax, double *betay, double *betaz, double *dtSfx, double *dtSfy, double *dtSfz, double *chi_rhs, double *trK_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, int &Symmetry, int &Lev, double &eps, int &sst, int &co
|
||||
|
||||
/** main function */
|
||||
int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
|
||||
double *chi, double *trK,
|
||||
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
|
||||
double *rho, double *Sx, double *Sy, double *Sz, double *Sxx,
|
||||
double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
|
||||
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
||||
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
||||
int &Symmetry, int &Lev, double &eps, int &co);
|
||||
|
||||
int gpu_rhs_ss(RHS_SS_PARA);
|
||||
|
||||
/** Init GPU side data in GPUMeta. */
|
||||
// void init_fluid_meta_gpu(GPUMeta *gpu_meta);
|
||||
|
||||
#endif
|
||||
7792
AMSS_NCKU_source/bssn_gpu_class.C
Normal file
7792
AMSS_NCKU_source/bssn_gpu_class.C
Normal file
File diff suppressed because it is too large
Load Diff
210
AMSS_NCKU_source/bssn_gpu_class.h
Normal file
210
AMSS_NCKU_source/bssn_gpu_class.h
Normal file
@@ -0,0 +1,210 @@
|
||||
|
||||
#ifndef BSSN_GPU_CLASS_H
|
||||
#define BSSN_GPU_CLASS_H
|
||||
|
||||
#ifdef newc
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
using namespace std;
|
||||
#else
|
||||
#include <iostream.h>
|
||||
#include <iomanip.h>
|
||||
#include <fstream.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#endif
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#include "macrodef.h"
|
||||
#include "cgh.h"
|
||||
#include "ShellPatch.h"
|
||||
#include "misc.h"
|
||||
#include "var.h"
|
||||
#include "MyList.h"
|
||||
#include "monitor.h"
|
||||
#include "surface_integral.h"
|
||||
#include "checkpoint.h"
|
||||
|
||||
// added by yangquan
|
||||
#include "bssn_macro.h"
|
||||
|
||||
extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
|
||||
|
||||
class bssn_class
|
||||
{
|
||||
public:
|
||||
// added by yangquan
|
||||
//----------------------
|
||||
int gpu_num_mynode;
|
||||
int cpu_core_num_mynode;
|
||||
int mpi_process_num_mynode;
|
||||
int my_sequence_mynode;
|
||||
int mynode_id;
|
||||
int use_gpu;
|
||||
|
||||
virtual void Step_GPU(int lev, int YN);
|
||||
virtual void Get_runtime_envirment();
|
||||
// virtual void Step_OPENMP(int lev,int YN);
|
||||
//----------------------
|
||||
|
||||
int ngfs;
|
||||
int nprocs, myrank;
|
||||
cgh *GH;
|
||||
ShellPatch *SH;
|
||||
double PhysTime;
|
||||
|
||||
int checkrun;
|
||||
char checkfilename[50];
|
||||
int Steps;
|
||||
double StartTime, TotalTime;
|
||||
double AnasTime, DumpTime, d2DumpTime, CheckTime;
|
||||
double LastAnas, LastConsOut;
|
||||
double Courant;
|
||||
double numepss, numepsb, numepsh;
|
||||
int Symmetry;
|
||||
int maxl, decn;
|
||||
double maxrex, drex;
|
||||
int trfls, a_lev;
|
||||
|
||||
double dT;
|
||||
double chitiny;
|
||||
|
||||
double **Porg0, **Porgbr, **Porg, **Porg1, **Porg_rhs;
|
||||
int BH_num, BH_num_input;
|
||||
double *Mass, *Pmom, *Spin;
|
||||
double ADMMass;
|
||||
|
||||
var *phio, *trKo;
|
||||
var *gxxo, *gxyo, *gxzo, *gyyo, *gyzo, *gzzo;
|
||||
var *Axxo, *Axyo, *Axzo, *Ayyo, *Ayzo, *Azzo;
|
||||
var *Gmxo, *Gmyo, *Gmzo;
|
||||
var *Lapo, *Sfxo, *Sfyo, *Sfzo;
|
||||
var *dtSfxo, *dtSfyo, *dtSfzo;
|
||||
|
||||
var *phi0, *trK0;
|
||||
var *gxx0, *gxy0, *gxz0, *gyy0, *gyz0, *gzz0;
|
||||
var *Axx0, *Axy0, *Axz0, *Ayy0, *Ayz0, *Azz0;
|
||||
var *Gmx0, *Gmy0, *Gmz0;
|
||||
var *Lap0, *Sfx0, *Sfy0, *Sfz0;
|
||||
var *dtSfx0, *dtSfy0, *dtSfz0;
|
||||
|
||||
var *phi, *trK;
|
||||
var *gxx, *gxy, *gxz, *gyy, *gyz, *gzz;
|
||||
var *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;
|
||||
var *Gmx, *Gmy, *Gmz;
|
||||
var *Lap, *Sfx, *Sfy, *Sfz;
|
||||
var *dtSfx, *dtSfy, *dtSfz;
|
||||
|
||||
var *phi1, *trK1;
|
||||
var *gxx1, *gxy1, *gxz1, *gyy1, *gyz1, *gzz1;
|
||||
var *Axx1, *Axy1, *Axz1, *Ayy1, *Ayz1, *Azz1;
|
||||
var *Gmx1, *Gmy1, *Gmz1;
|
||||
var *Lap1, *Sfx1, *Sfy1, *Sfz1;
|
||||
var *dtSfx1, *dtSfy1, *dtSfz1;
|
||||
|
||||
var *phi_rhs, *trK_rhs;
|
||||
var *gxx_rhs, *gxy_rhs, *gxz_rhs, *gyy_rhs, *gyz_rhs, *gzz_rhs;
|
||||
var *Axx_rhs, *Axy_rhs, *Axz_rhs, *Ayy_rhs, *Ayz_rhs, *Azz_rhs;
|
||||
var *Gmx_rhs, *Gmy_rhs, *Gmz_rhs;
|
||||
var *Lap_rhs, *Sfx_rhs, *Sfy_rhs, *Sfz_rhs;
|
||||
var *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;
|
||||
|
||||
var *rho, *Sx, *Sy, *Sz, *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;
|
||||
|
||||
var *Gamxxx, *Gamxxy, *Gamxxz, *Gamxyy, *Gamxyz, *Gamxzz;
|
||||
var *Gamyxx, *Gamyxy, *Gamyxz, *Gamyyy, *Gamyyz, *Gamyzz;
|
||||
var *Gamzxx, *Gamzxy, *Gamzxz, *Gamzyy, *Gamzyz, *Gamzzz;
|
||||
|
||||
var *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz;
|
||||
|
||||
var *Rpsi4, *Ipsi4;
|
||||
var *t1Rpsi4, *t1Ipsi4, *t2Rpsi4, *t2Ipsi4;
|
||||
|
||||
var *Cons_Ham, *Cons_Px, *Cons_Py, *Cons_Pz, *Cons_Gx, *Cons_Gy, *Cons_Gz;
|
||||
|
||||
#ifdef Point_Psi4
|
||||
var *phix, *phiy, *phiz;
|
||||
var *trKx, *trKy, *trKz;
|
||||
var *Axxx, *Axxy, *Axxz;
|
||||
var *Axyx, *Axyy, *Axyz;
|
||||
var *Axzx, *Axzy, *Axzz;
|
||||
var *Ayyx, *Ayyy, *Ayyz;
|
||||
var *Ayzx, *Ayzy, *Ayzz;
|
||||
var *Azzx, *Azzy, *Azzz;
|
||||
#endif
|
||||
// FIXME: uc = StateList, up = OldStateList, upp = SynchList_cor; so never touch these three data
|
||||
MyList<var> *StateList, *SynchList_pre, *SynchList_cor, *RHSList;
|
||||
MyList<var> *OldStateList, *DumpList;
|
||||
MyList<var> *ConstraintList;
|
||||
|
||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||
monitor *ConVMonitor;
|
||||
surface_integral *Waveshell;
|
||||
checkpoint *CheckPoint;
|
||||
|
||||
public:
|
||||
bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
|
||||
int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
|
||||
int a_levi, int maxli, int decni, double maxrexi, double drexi);
|
||||
~bssn_class();
|
||||
|
||||
void Evolve(int Steps);
|
||||
void RecursiveStep(int lev);
|
||||
#if (PSTR == 1)
|
||||
void ParallelStep();
|
||||
void SHStep();
|
||||
#endif
|
||||
void RestrictProlong(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
|
||||
void RestrictProlong_aux(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
|
||||
void RestrictProlong(int lev, int YN, bool BB);
|
||||
void ProlongRestrict(int lev, int YN, bool BB);
|
||||
void Setup_Black_Hole_position();
|
||||
void compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int lev);
|
||||
bool read_Pablo_file(int *ext, double *datain, char *filename);
|
||||
void write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
|
||||
char *filename);
|
||||
void AnalysisStuff(int lev, double dT_lev);
|
||||
void Setup_KerrSchild();
|
||||
void Enforce_algcon(int lev, int fg);
|
||||
|
||||
void testRestrict();
|
||||
void testOutBd();
|
||||
|
||||
virtual void Setup_Initial_Data_Lousto();
|
||||
virtual void Setup_Initial_Data_Cao();
|
||||
virtual void Initialize();
|
||||
virtual void Read_Ansorg();
|
||||
virtual void Read_Pablo() {};
|
||||
virtual void Compute_Psi4(int lev);
|
||||
virtual void Step(int lev, int YN);
|
||||
virtual void Interp_Constraint(bool infg);
|
||||
virtual void Constraint_Out();
|
||||
virtual void Compute_Constraint();
|
||||
|
||||
#ifdef With_AHF
|
||||
protected:
|
||||
MyList<var> *AHList, *AHDList, *GaugeList;
|
||||
int AHfindevery;
|
||||
double AHdumptime;
|
||||
int *lastahdumpid, HN_num; // number of possible horizons
|
||||
int *findeveryl;
|
||||
double *xc, *yc, *zc, *xr, *yr, *zr;
|
||||
bool *trigger;
|
||||
double *dTT;
|
||||
int *dumpid;
|
||||
|
||||
public:
|
||||
void AH_Prepare_derivatives();
|
||||
bool AH_Interp_Points(MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetryi);
|
||||
void AH_Step_Find(int lev, double dT_lev);
|
||||
#endif
|
||||
};
|
||||
#endif /* BSSN_GPU_CLASS_H */
|
||||
@@ -22,32 +22,19 @@
|
||||
#define f_compute_rhs_Z4c_ss COMPUTE_RHS_Z4C_SS
|
||||
#define f_compute_constraint_fr COMPUTE_CONSTRAINT_FR
|
||||
#endif
|
||||
#ifdef fortran3
|
||||
#define f_compute_rhs_bssn compute_rhs_bssn_
|
||||
#ifdef fortran3
|
||||
#define f_compute_rhs_bssn compute_rhs_bssn_
|
||||
#define f_compute_rhs_bssn_ss compute_rhs_bssn_ss_
|
||||
#define f_compute_rhs_bssn_escalar compute_rhs_bssn_escalar_
|
||||
#define f_compute_rhs_bssn_escalar_ss compute_rhs_bssn_escalar_ss_
|
||||
#define f_compute_rhs_Z4c compute_rhs_z4c_
|
||||
#define f_compute_rhs_Z4cnot compute_rhs_z4cnot_
|
||||
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
|
||||
#define f_compute_constraint_fr compute_constraint_fr_
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
void f_bssn_rhs_kernel_timing_reset();
|
||||
int f_bssn_rhs_kernel_timing_bucket_count();
|
||||
const double *f_bssn_rhs_kernel_timing_local_seconds();
|
||||
const char *f_bssn_rhs_kernel_timing_label(int);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
{
|
||||
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
||||
#define f_compute_constraint_fr compute_constraint_fr_
|
||||
#endif
|
||||
extern "C"
|
||||
{
|
||||
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
||||
double *, double *, // chi, trK
|
||||
double *, double *, double *, double *, double *, double *, // gij
|
||||
double *, double *, double *, double *, double *, double *, // Aij
|
||||
|
||||
@@ -2,88 +2,30 @@
|
||||
#include "bssn_rhs.h"
|
||||
#include "share_func.h"
|
||||
#include "tool.h"
|
||||
#include <time.h>
|
||||
|
||||
#ifdef _OPENMP
|
||||
#define BSSN_OMP_TASK_GROUP_BEGIN \
|
||||
_Pragma("omp parallel") \
|
||||
{ \
|
||||
_Pragma("omp single nowait") \
|
||||
{
|
||||
#define BSSN_OMP_TASK_CALL(...) \
|
||||
_Pragma("omp task") { __VA_ARGS__; }
|
||||
#define BSSN_OMP_TASK_GROUP_END \
|
||||
_Pragma("omp taskwait") \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define BSSN_OMP_TASK_GROUP_BEGIN {
|
||||
#define BSSN_OMP_TASK_CALL(...) { __VA_ARGS__; }
|
||||
#define BSSN_OMP_TASK_GROUP_END }
|
||||
#endif
|
||||
// 0-based i,j,k
|
||||
// #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny))
|
||||
// ex(1)=nx, ex(2)=ny, ex(3)=nz
|
||||
|
||||
// 用法:a[ IDX_F(i,j,k,nx,ny) ]
|
||||
|
||||
#ifndef BSSN_KERNEL_FINE_TIMING
|
||||
#define BSSN_KERNEL_FINE_TIMING 0
|
||||
#endif
|
||||
|
||||
#if BSSN_KERNEL_FINE_TIMING
|
||||
namespace rhs_kernel_timing
|
||||
{
|
||||
enum Bucket
|
||||
{
|
||||
KB_SETUP_DERIVS = 0,
|
||||
KB_GEOM_GAMMA,
|
||||
KB_RICCI_METRIC,
|
||||
KB_CHI_LAPSE,
|
||||
KB_AIJ_TRK_GAUGE,
|
||||
KB_KO_CONSTRAINT,
|
||||
KB_COUNT
|
||||
};
|
||||
|
||||
static double local_bucket_seconds[KB_COUNT];
|
||||
|
||||
static const char *bucket_labels[KB_COUNT] =
|
||||
{
|
||||
"setup_derivs",
|
||||
"geom_gamma",
|
||||
"ricci_metric",
|
||||
"chi_lapse",
|
||||
"aij_trk_gauge",
|
||||
"ko_constraint"
|
||||
};
|
||||
|
||||
static inline double now_seconds()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return double(ts.tv_sec) + 1.0e-9 * double(ts.tv_nsec);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void f_bssn_rhs_kernel_timing_reset()
|
||||
{
|
||||
for (int i = 0; i < rhs_kernel_timing::KB_COUNT; ++i)
|
||||
rhs_kernel_timing::local_bucket_seconds[i] = 0.0;
|
||||
}
|
||||
|
||||
extern "C" int f_bssn_rhs_kernel_timing_bucket_count()
|
||||
{
|
||||
return rhs_kernel_timing::KB_COUNT;
|
||||
}
|
||||
|
||||
extern "C" const double *f_bssn_rhs_kernel_timing_local_seconds()
|
||||
{
|
||||
return rhs_kernel_timing::local_bucket_seconds;
|
||||
}
|
||||
|
||||
extern "C" const char *f_bssn_rhs_kernel_timing_label(int bucket_index)
|
||||
{
|
||||
if (bucket_index < 0 || bucket_index >= rhs_kernel_timing::KB_COUNT)
|
||||
return "unknown";
|
||||
return rhs_kernel_timing::bucket_labels[bucket_index];
|
||||
}
|
||||
|
||||
#define RHS_KERNEL_TIMER_DECL(var_name) const double var_name = rhs_kernel_timing::now_seconds()
|
||||
#define RHS_KERNEL_TIMER_ADD(bucket_name, var_name) \
|
||||
rhs_kernel_timing::local_bucket_seconds[int(rhs_kernel_timing::bucket_name)] += \
|
||||
rhs_kernel_timing::now_seconds() - (var_name)
|
||||
#else
|
||||
extern "C" void f_bssn_rhs_kernel_timing_reset() {}
|
||||
extern "C" int f_bssn_rhs_kernel_timing_bucket_count() { return 0; }
|
||||
extern "C" const double *f_bssn_rhs_kernel_timing_local_seconds() { return 0; }
|
||||
extern "C" const char *f_bssn_rhs_kernel_timing_label(int) { return "disabled"; }
|
||||
|
||||
#define RHS_KERNEL_TIMER_DECL(var_name)
|
||||
#define RHS_KERNEL_TIMER_ADD(bucket_name, var_name)
|
||||
#endif
|
||||
|
||||
// C function that calculates the right-hand side for BSSN equations
|
||||
int f_compute_rhs_bssn(int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
@@ -178,25 +120,26 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
dY = Y[1] - Y[0];
|
||||
dZ = Z[1] - Z[0];
|
||||
|
||||
RHS_KERNEL_TIMER_DECL(timer_setup_derivs);
|
||||
// 1ms //
|
||||
for(int i=0;i<all;i+=1){
|
||||
alpn1[i] = Lap[i] + 1.0;
|
||||
chin1[i] = chi[i] + 1.0;
|
||||
}
|
||||
// 9ms //
|
||||
fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev);
|
||||
fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev);
|
||||
fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev);
|
||||
fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev);
|
||||
fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev);
|
||||
fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
BSSN_OMP_TASK_GROUP_BEGIN
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_GROUP_END
|
||||
|
||||
// 3ms //
|
||||
for(int i=0;i<all;i+=1){
|
||||
@@ -218,8 +161,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
(dxx[i] + ONE) * betaxz[i] + gxy[i] * betayz[i] + gyz[i] * betayx[i]
|
||||
+ (dzz[i] + ONE) * betazx[i] - gxz[i] * betayy[i];
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_SETUP_DERIVS, timer_setup_derivs);
|
||||
RHS_KERNEL_TIMER_DECL(timer_geom_gamma);
|
||||
// Fused: inverse metric + Gamma constraint + Christoffel (3 loops -> 1)
|
||||
for(int i=0;i<all;i+=1){
|
||||
double det = (dxx[i] + ONE) * (dyy[i] + ONE) * (dzz[i] + ONE) + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i] -
|
||||
@@ -362,6 +303,9 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ ( gupxy[i]*gupyz[i] + gupyy[i]*gupxz[i] ) * Axy[i]
|
||||
+ ( gupxy[i]*gupzz[i] + gupyz[i]*gupxz[i] ) * Axz[i]
|
||||
+ ( gupyy[i]*gupzz[i] + gupyz[i]*gupyz[i] ) * Ayz[i];
|
||||
Rxx[i] = axx; Ryy[i] = ayy; Rzz[i] = azz;
|
||||
Rxy[i] = axy; Rxz[i] = axz; Ryz[i] = ayz;
|
||||
|
||||
Gamx_rhs[i] = - TWO * ( Lapx[i]*axx + Lapy[i]*axy + Lapz[i]*axz ) +
|
||||
TWO * alpn1[i] * (
|
||||
-F3o2/chin1[i] * ( chix[i]*axx + chiy[i]*axy + chiz[i]*axz ) -
|
||||
@@ -391,18 +335,15 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ TWO * ( Gamzxy[i]*axy + Gamzxz[i]*axz + Gamzyz[i]*ayz )
|
||||
);
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_GEOM_GAMMA, timer_geom_gamma);
|
||||
RHS_KERNEL_TIMER_DECL(timer_ricci_metric);
|
||||
// 22.3ms //
|
||||
fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
|
||||
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev);
|
||||
fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,
|
||||
X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
|
||||
fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,
|
||||
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev);
|
||||
fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev);
|
||||
BSSN_OMP_TASK_GROUP_BEGIN
|
||||
BSSN_OMP_TASK_CALL(fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev))
|
||||
BSSN_OMP_TASK_GROUP_END
|
||||
|
||||
// Fused: fxx/Gamxa + Gamma_rhs part 2 (2 loops -> 1)
|
||||
for(int i=0;i<all;i+=1){
|
||||
@@ -410,6 +351,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
double lfxx = gxxx[i] + gxyy[i] + gxzz[i];
|
||||
double lfxy = gxyx[i] + gyyy[i] + gyzz[i];
|
||||
double lfxz = gxzx[i] + gyzy[i] + gzzz[i];
|
||||
fxx[i] = lfxx; fxy[i] = lfxy; fxz[i] = lfxz;
|
||||
|
||||
double gxa = gupxx[i]*Gamxxx[i] + gupyy[i]*Gamxyy[i] + gupzz[i]*Gamxzz[i]
|
||||
+ TWO * ( gupxy[i]*Gamxxy[i] + gupxz[i]*Gamxxz[i] + gupyz[i]*Gamxyz[i] );
|
||||
@@ -763,74 +705,69 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ Gamxyz[i] * gzzx[i] + Gamyyz[i] * gzzy[i] + Gamzyz[i] * gzzz[i]
|
||||
);
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_RICCI_METRIC, timer_ricci_metric);
|
||||
|
||||
RHS_KERNEL_TIMER_DECL(timer_chi_lapse);
|
||||
// 22.3ms //
|
||||
fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
|
||||
// 7ms //
|
||||
for (int i=0;i<all;i+=1) {
|
||||
const double inv_chin1 = ONE / chin1[i];
|
||||
const double half_inv_chin1 = HALF * inv_chin1;
|
||||
const double scaled_inv = F3o2 * inv_chin1;
|
||||
const double cxx = fxx[i] - Gamxxx[i] * chix[i] - Gamyxx[i] * chiy[i] - Gamzxx[i] * chiz[i];
|
||||
const double cxy = fxy[i] - Gamxxy[i] * chix[i] - Gamyxy[i] * chiy[i] - Gamzxy[i] * chiz[i];
|
||||
const double cxz = fxz[i] - Gamxxz[i] * chix[i] - Gamyxz[i] * chiy[i] - Gamzxz[i] * chiz[i];
|
||||
const double cyy = fyy[i] - Gamxyy[i] * chix[i] - Gamyyy[i] * chiy[i] - Gamzyy[i] * chiz[i];
|
||||
const double cyz = fyz[i] - Gamxyz[i] * chix[i] - Gamyyz[i] * chiy[i] - Gamzyz[i] * chiz[i];
|
||||
const double czz = fzz[i] - Gamxzz[i] * chix[i] - Gamyzz[i] * chiy[i] - Gamzzz[i] * chiz[i];
|
||||
const double ricci_chi =
|
||||
gupxx[i] * (cxx - scaled_inv * chix[i] * chix[i])
|
||||
+ gupyy[i] * (cyy - scaled_inv * chiy[i] * chiy[i])
|
||||
+ gupzz[i] * (czz - scaled_inv * chiz[i] * chiz[i])
|
||||
+ TWO * gupxy[i] * (cxy - scaled_inv * chix[i] * chiy[i])
|
||||
+ TWO * gupxz[i] * (cxz - scaled_inv * chix[i] * chiz[i])
|
||||
+ TWO * gupyz[i] * (cyz - scaled_inv * chiy[i] * chiz[i]);
|
||||
f[i] = ricci_chi;
|
||||
Rxx[i] = Rxx[i] + ( cxx - half_inv_chin1 * chix[i] * chix[i] + (dxx[i] + ONE) * ricci_chi ) * half_inv_chin1;
|
||||
Ryy[i] = Ryy[i] + ( cyy - half_inv_chin1 * chiy[i] * chiy[i] + (dyy[i] + ONE) * ricci_chi ) * half_inv_chin1;
|
||||
Rzz[i] = Rzz[i] + ( czz - half_inv_chin1 * chiz[i] * chiz[i] + (dzz[i] + ONE) * ricci_chi ) * half_inv_chin1;
|
||||
fxx[i] = fxx[i] - Gamxxx[i] * chix[i] - Gamyxx[i] * chiy[i] - Gamzxx[i] * chiz[i];
|
||||
fxy[i] = fxy[i] - Gamxxy[i] * chix[i] - Gamyxy[i] * chiy[i] - Gamzxy[i] * chiz[i];
|
||||
fxz[i] = fxz[i] - Gamxxz[i] * chix[i] - Gamyxz[i] * chiy[i] - Gamzxz[i] * chiz[i];
|
||||
fyy[i] = fyy[i] - Gamxyy[i] * chix[i] - Gamyyy[i] * chiy[i] - Gamzyy[i] * chiz[i];
|
||||
fyz[i] = fyz[i] - Gamxyz[i] * chix[i] - Gamyyz[i] * chiy[i] - Gamzyz[i] * chiz[i];
|
||||
fzz[i] = fzz[i] - Gamxzz[i] * chix[i] - Gamyzz[i] * chiy[i] - Gamzzz[i] * chiz[i];
|
||||
f[i] =
|
||||
gupxx[i] * (fxx[i] - (F3o2 / chin1[i]) * chix[i] * chix[i])
|
||||
+ gupyy[i] * (fyy[i] - (F3o2 / chin1[i]) * chiy[i] * chiy[i])
|
||||
+ gupzz[i] * (fzz[i] - (F3o2 / chin1[i]) * chiz[i] * chiz[i])
|
||||
+ TWO * gupxy[i] * (fxy[i] - (F3o2 / chin1[i]) * chix[i] * chiy[i])
|
||||
+ TWO * gupxz[i] * (fxz[i] - (F3o2 / chin1[i]) * chix[i] * chiz[i])
|
||||
+ TWO * gupyz[i] * (fyz[i] - (F3o2 / chin1[i]) * chiy[i] * chiz[i]);
|
||||
Rxx[i] = Rxx[i] + ( fxx[i] - (chix[i] * chix[i]) / (chin1[i] * TWO) + (dxx[i] + ONE) * f[i] ) / (chin1[i] * TWO);
|
||||
Ryy[i] = Ryy[i] + ( fyy[i] - (chiy[i] * chiy[i]) / (chin1[i] * TWO) + (dyy[i] + ONE) * f[i] ) / (chin1[i] * TWO);
|
||||
Rzz[i] = Rzz[i] + ( fzz[i] - (chiz[i] * chiz[i]) / (chin1[i] * TWO) + (dzz[i] + ONE) * f[i] ) / (chin1[i] * TWO);
|
||||
|
||||
Rxy[i] = Rxy[i] + ( cxy - half_inv_chin1 * chix[i] * chiy[i] + gxy[i] * ricci_chi ) * half_inv_chin1;
|
||||
Rxz[i] = Rxz[i] + ( cxz - half_inv_chin1 * chix[i] * chiz[i] + gxz[i] * ricci_chi ) * half_inv_chin1;
|
||||
Ryz[i] = Ryz[i] + ( cyz - half_inv_chin1 * chiy[i] * chiz[i] + gyz[i] * ricci_chi ) * half_inv_chin1;
|
||||
Rxy[i] = Rxy[i] + ( fxy[i] - (chix[i] * chiy[i]) / (chin1[i] * TWO) + gxy[i] * f[i] ) / (chin1[i] * TWO);
|
||||
Rxz[i] = Rxz[i] + ( fxz[i] - (chix[i] * chiz[i]) / (chin1[i] * TWO) + gxz[i] * f[i] ) / (chin1[i] * TWO);
|
||||
Ryz[i] = Ryz[i] + ( fyz[i] - (chiy[i] * chiz[i]) / (chin1[i] * TWO) + gyz[i] * f[i] ) / (chin1[i] * TWO);
|
||||
}
|
||||
|
||||
// 24ms //
|
||||
fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
|
||||
// 6ms //
|
||||
for (int i=0;i<all;i+=1) {
|
||||
const double inv_chin1 = ONE / chin1[i];
|
||||
const double gchi_x = (gupxx[i] * chix[i] + gupxy[i] * chiy[i] + gupxz[i] * chiz[i]) * inv_chin1;
|
||||
const double gchi_y = (gupxy[i] * chix[i] + gupyy[i] * chiy[i] + gupyz[i] * chiz[i]) * inv_chin1;
|
||||
const double gchi_z = (gupxz[i] * chix[i] + gupyz[i] * chiy[i] + gupzz[i] * chiz[i]) * inv_chin1;
|
||||
/* gxxx,gxxy,gxxz (这里是“升指标后的chi导数/chi”那类量,你沿用原变量名即可) */
|
||||
gxxx[i] = (gupxx[i] * chix[i] + gupxy[i] * chiy[i] + gupxz[i] * chiz[i]) / chin1[i];
|
||||
gxxy[i] = (gupxy[i] * chix[i] + gupyy[i] * chiy[i] + gupyz[i] * chiz[i]) / chin1[i];
|
||||
gxxz[i] = (gupxz[i] * chix[i] + gupyz[i] * chiy[i] + gupzz[i] * chiz[i]) / chin1[i];
|
||||
|
||||
/* Christoffel 修正项 */
|
||||
Gamxxx[i] = Gamxxx[i] - ( ((chix[i] + chix[i]) * inv_chin1) - (dxx[i] + ONE) * gchi_x ) * HALF;
|
||||
Gamyxx[i] = Gamyxx[i] - ( 0.0 - (dxx[i] + ONE) * gchi_y ) * HALF; /* 原式只有 -gxx*gxxy */
|
||||
Gamzxx[i] = Gamzxx[i] - ( 0.0 - (dxx[i] + ONE) * gchi_z ) * HALF;
|
||||
Gamxxx[i] = Gamxxx[i] - ( ((chix[i] + chix[i]) / chin1[i]) - (dxx[i] + ONE) * gxxx[i] ) * HALF;
|
||||
Gamyxx[i] = Gamyxx[i] - ( 0.0 - (dxx[i] + ONE) * gxxy[i] ) * HALF; /* 原式只有 -gxx*gxxy */
|
||||
Gamzxx[i] = Gamzxx[i] - ( 0.0 - (dxx[i] + ONE) * gxxz[i] ) * HALF;
|
||||
|
||||
Gamxyy[i] = Gamxyy[i] - ( 0.0 - (dyy[i] + ONE) * gchi_x ) * HALF;
|
||||
Gamyyy[i] = Gamyyy[i] - ( ((chiy[i] + chiy[i]) * inv_chin1) - (dyy[i] + ONE) * gchi_y ) * HALF;
|
||||
Gamzyy[i] = Gamzyy[i] - ( 0.0 - (dyy[i] + ONE) * gchi_z ) * HALF;
|
||||
Gamxyy[i] = Gamxyy[i] - ( 0.0 - (dyy[i] + ONE) * gxxx[i] ) * HALF;
|
||||
Gamyyy[i] = Gamyyy[i] - ( ((chiy[i] + chiy[i]) / chin1[i]) - (dyy[i] + ONE) * gxxy[i] ) * HALF;
|
||||
Gamzyy[i] = Gamzyy[i] - ( 0.0 - (dyy[i] + ONE) * gxxz[i] ) * HALF;
|
||||
|
||||
Gamxzz[i] = Gamxzz[i] - ( 0.0 - (dzz[i] + ONE) * gchi_x ) * HALF;
|
||||
Gamyzz[i] = Gamyzz[i] - ( 0.0 - (dzz[i] + ONE) * gchi_y ) * HALF;
|
||||
Gamzzz[i] = Gamzzz[i] - ( ((chiz[i] + chiz[i]) * inv_chin1) - (dzz[i] + ONE) * gchi_z ) * HALF;
|
||||
Gamxzz[i] = Gamxzz[i] - ( 0.0 - (dzz[i] + ONE) * gxxx[i] ) * HALF;
|
||||
Gamyzz[i] = Gamyzz[i] - ( 0.0 - (dzz[i] + ONE) * gxxy[i] ) * HALF;
|
||||
Gamzzz[i] = Gamzzz[i] - ( ((chiz[i] + chiz[i]) / chin1[i]) - (dzz[i] + ONE) * gxxz[i] ) * HALF;
|
||||
|
||||
Gamxxy[i] = Gamxxy[i] - ( ( chiy[i] * inv_chin1) - gxy[i] * gchi_x ) * HALF;
|
||||
Gamyxy[i] = Gamyxy[i] - ( ( chix[i] * inv_chin1) - gxy[i] * gchi_y ) * HALF;
|
||||
Gamzxy[i] = Gamzxy[i] - ( 0.0 - gxy[i] * gchi_z ) * HALF;
|
||||
Gamxxy[i] = Gamxxy[i] - ( ( chiy[i] / chin1[i]) - gxy[i] * gxxx[i] ) * HALF;
|
||||
Gamyxy[i] = Gamyxy[i] - ( ( chix[i] / chin1[i]) - gxy[i] * gxxy[i] ) * HALF;
|
||||
Gamzxy[i] = Gamzxy[i] - ( 0.0 - gxy[i] * gxxz[i] ) * HALF;
|
||||
|
||||
Gamxxz[i] = Gamxxz[i] - ( ( chiz[i] * inv_chin1) - gxz[i] * gchi_x ) * HALF;
|
||||
Gamyxz[i] = Gamyxz[i] - ( 0.0 - gxz[i] * gchi_y ) * HALF;
|
||||
Gamzxz[i] = Gamzxz[i] - ( ( chix[i] * inv_chin1) - gxz[i] * gchi_z ) * HALF;
|
||||
Gamxxz[i] = Gamxxz[i] - ( ( chiz[i] / chin1[i]) - gxz[i] * gxxx[i] ) * HALF;
|
||||
Gamyxz[i] = Gamyxz[i] - ( 0.0 - gxz[i] * gxxy[i] ) * HALF;
|
||||
Gamzxz[i] = Gamzxz[i] - ( ( chix[i] / chin1[i]) - gxz[i] * gxxz[i] ) * HALF;
|
||||
|
||||
Gamxyz[i] = Gamxyz[i] - ( 0.0 - gyz[i] * gchi_x ) * HALF;
|
||||
Gamyyz[i] = Gamyyz[i] - ( ( chiz[i] * inv_chin1) - gyz[i] * gchi_y ) * HALF;
|
||||
Gamzyz[i] = Gamzyz[i] - ( ( chiy[i] * inv_chin1) - gyz[i] * gchi_z ) * HALF;
|
||||
Gamxyz[i] = Gamxyz[i] - ( 0.0 - gyz[i] * gxxx[i] ) * HALF;
|
||||
Gamyyz[i] = Gamyyz[i] - ( ( chiz[i] / chin1[i]) - gyz[i] * gxxy[i] ) * HALF;
|
||||
Gamzyz[i] = Gamzyz[i] - ( ( chiy[i] / chin1[i]) - gyz[i] * gxxz[i] ) * HALF;
|
||||
|
||||
/* fxx..fyz 修正:减去 Γ * ∂Lap */
|
||||
fxx[i] = fxx[i] - Gamxxx[i] * Lapx[i] - Gamyxx[i] * Lapy[i] - Gamzxx[i] * Lapz[i];
|
||||
@@ -844,8 +781,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
trK_rhs[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ TWO * ( gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i] );
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_CHI_LAPSE, timer_chi_lapse);
|
||||
RHS_KERNEL_TIMER_DECL(timer_aij_trk_gauge);
|
||||
// 2.5ms //
|
||||
for (int i=0;i<all;i+=1) {
|
||||
const double divb = betaxx[i] + betayy[i] + betazz[i];
|
||||
@@ -1098,12 +1033,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
betaz_rhs[i] = FF * dtSfz[i];
|
||||
|
||||
reta[i] =
|
||||
gupxx[i] * chix[i] * chix[i]
|
||||
+ gupyy[i] * chiy[i] * chiy[i]
|
||||
+ gupzz[i] * chiz[i] * chiz[i]
|
||||
+ TWO * ( gupxy[i] * chix[i] * chiy[i]
|
||||
+ gupxz[i] * chix[i] * chiz[i]
|
||||
+ gupyz[i] * chiy[i] * chiz[i] );
|
||||
gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i]
|
||||
+ gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i]
|
||||
+ gupzz[i] * dtSfz_rhs[i] * dtSfz_rhs[i]
|
||||
+ TWO * ( gupxy[i] * dtSfx_rhs[i] * dtSfy_rhs[i]
|
||||
+ gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i]
|
||||
+ gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] );
|
||||
|
||||
#if (GAUGE == 2)
|
||||
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 );
|
||||
@@ -1116,12 +1051,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
|
||||
#elif (GAUGE == 4 || GAUGE == 5)
|
||||
reta[i] =
|
||||
gupxx[i] * chix[i] * chix[i]
|
||||
+ gupyy[i] * chiy[i] * chiy[i]
|
||||
+ gupzz[i] * chiz[i] * chiz[i]
|
||||
+ TWO * ( gupxy[i] * chix[i] * chiy[i]
|
||||
+ gupxz[i] * chix[i] * chiz[i]
|
||||
+ gupyz[i] * chiy[i] * chiz[i] );
|
||||
gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i]
|
||||
+ gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i]
|
||||
+ gupzz[i] * dtSfz_rhs[i] * dtSfz_rhs[i]
|
||||
+ TWO * ( gupxy[i] * dtSfx_rhs[i] * dtSfy_rhs[i]
|
||||
+ gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i]
|
||||
+ gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] );
|
||||
|
||||
#if (GAUGE == 4)
|
||||
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 );
|
||||
@@ -1146,33 +1081,33 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
|
||||
#endif
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_AIJ_TRK_GAUGE, timer_aij_trk_gauge);
|
||||
RHS_KERNEL_TIMER_DECL(timer_ko_constraint);
|
||||
// advection + KO dissipation with shared symmetry buffer
|
||||
lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
|
||||
BSSN_OMP_TASK_GROUP_BEGIN
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps))
|
||||
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps))
|
||||
BSSN_OMP_TASK_GROUP_END
|
||||
// 2ms //
|
||||
if(co==0){
|
||||
for (int i=0;i<all;i+=1) {
|
||||
@@ -1219,12 +1154,14 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
}
|
||||
|
||||
// 1ms //
|
||||
fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
|
||||
fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0);
|
||||
fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0);
|
||||
fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
|
||||
fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0);
|
||||
fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
|
||||
BSSN_OMP_TASK_GROUP_BEGIN
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0))
|
||||
BSSN_OMP_TASK_CALL(fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0))
|
||||
BSSN_OMP_TASK_GROUP_END
|
||||
// 7ms //
|
||||
for (int i=0;i<all;i+=1) {
|
||||
gxxx[i] = gxxx[i] - ( Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]
|
||||
@@ -1279,7 +1216,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i];
|
||||
}
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_KO_CONSTRAINT, timer_ko_constraint);
|
||||
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,127 +0,0 @@
|
||||
#ifndef BSSN_RHS_CUDA_H
|
||||
#define BSSN_RHS_CUDA_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
BSSN_CUDA_STATE_COUNT = 24,
|
||||
BSSN_CUDA_MATTER_COUNT = 10
|
||||
};
|
||||
|
||||
int f_compute_rhs_bssn(int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
double *chi, double *trK,
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
double *rho, double *Sx, double *Sy, double *Sz,
|
||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
||||
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
||||
int &Symmetry, int &Lev, double &eps, int &co);
|
||||
|
||||
int bssn_cuda_rk4_substep(void *block_tag,
|
||||
int *ex, double *X, double *Y, double *Z,
|
||||
double **state_host_in,
|
||||
double **state_host_out,
|
||||
double **matter_host,
|
||||
const double *propspeed,
|
||||
const double *soa_flat,
|
||||
const double *bbox,
|
||||
double &dT,
|
||||
double &T,
|
||||
int &RK4,
|
||||
int &apply_bam_bc,
|
||||
int &Symmetry,
|
||||
int &Lev,
|
||||
double &eps,
|
||||
int &co,
|
||||
int &use_zero_matter,
|
||||
int &keep_resident_state,
|
||||
int &apply_enforce_ga,
|
||||
double &chitiny);
|
||||
|
||||
int bssn_cuda_copy_state_region_to_host(void *block_tag,
|
||||
int state_index,
|
||||
double *host_state,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_copy_state_region_from_host(void *block_tag,
|
||||
int state_index,
|
||||
double *host_state,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_download_resident_state(void *block_tag,
|
||||
int *ex,
|
||||
double **state_host_out);
|
||||
|
||||
int bssn_cuda_download_constraint_outputs(int *ex,
|
||||
double **constraint_host_out);
|
||||
|
||||
int bssn_cuda_pack_state_region_to_host_buffer(void *block_tag,
|
||||
int state_index,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_unpack_state_region_from_host_buffer(void *block_tag,
|
||||
int state_index,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_pack_state_batch_to_host_buffer(void *block_tag,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_unpack_state_batch_from_host_buffer(void *block_tag,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_download_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
const int *state_indices,
|
||||
double **state_host_out);
|
||||
|
||||
int bssn_cuda_upload_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
const int *state_indices,
|
||||
double **state_host_in);
|
||||
|
||||
int bssn_cuda_has_resident_state(void *block_tag);
|
||||
|
||||
void bssn_cuda_release_step_ctx(void *block_tag);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1942
AMSS_NCKU_source/bssn_step_gpu.C
Normal file
1942
AMSS_NCKU_source/bssn_step_gpu.C
Normal file
File diff suppressed because it is too large
Load Diff
@@ -41,8 +41,8 @@ void fdderivs(const int ex[3],
|
||||
const size_t nz = (size_t)ex3 + 2;
|
||||
const size_t fh_size = nx * ny * nz;
|
||||
|
||||
static double *fh = NULL;
|
||||
static size_t cap = 0;
|
||||
static thread_local double *fh = NULL;
|
||||
static thread_local size_t cap = 0;
|
||||
|
||||
if (fh_size > cap) {
|
||||
free(fh);
|
||||
|
||||
@@ -50,8 +50,8 @@ void fderivs(const int ex[3],
|
||||
const size_t ny = (size_t)ex2 + 2;
|
||||
const size_t nz = (size_t)ex3 + 2;
|
||||
const size_t fh_size = nx * ny * nz;
|
||||
static double *fh = NULL;
|
||||
static size_t cap = 0;
|
||||
static thread_local double *fh = NULL;
|
||||
static thread_local size_t cap = 0;
|
||||
|
||||
if (fh_size > cap) {
|
||||
free(fh);
|
||||
|
||||
@@ -1511,88 +1511,13 @@ deallocate(f_flat)
|
||||
|
||||
f_out = f_out*dX*dY*dZ
|
||||
|
||||
return
|
||||
|
||||
end subroutine l2normhelper
|
||||
!--------------------------------------------------------------------------------------
|
||||
subroutine l2normhelper7(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||
f1,f2,f3,f4,f5,f6,f7,f_out,gw)
|
||||
|
||||
implicit none
|
||||
!~~~~~~> Input parameters:
|
||||
integer,intent(in ):: ex(1:3)
|
||||
real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3)),xmin,ymin,zmin,xmax,ymax,zmax
|
||||
integer,intent(in)::gw
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: f1,f2,f3,f4,f5,f6,f7
|
||||
real*8, intent(out) :: f_out(7)
|
||||
!~~~~~~> Other variables:
|
||||
|
||||
real*8 :: dX, dY, dZ
|
||||
integer::imin,jmin,kmin
|
||||
integer::imax,jmax,kmax
|
||||
integer::i,j,k
|
||||
real*8 :: s1,s2,s3,s4,s5,s6,s7
|
||||
|
||||
dX = X(2) - X(1)
|
||||
dY = Y(2) - Y(1)
|
||||
dZ = Z(2) - Z(1)
|
||||
|
||||
! for ghost zone
|
||||
imin = gw+1
|
||||
jmin = gw+1
|
||||
kmin = gw+1
|
||||
|
||||
imax = ex(1) - gw
|
||||
jmax = ex(2) - gw
|
||||
kmax = ex(3) - gw
|
||||
|
||||
!for patch boundary (i.e., not ghost boundary)
|
||||
|
||||
if(dabs(X(ex(1))-xmax) < dX) imax = ex(1)
|
||||
if(dabs(Y(ex(2))-ymax) < dY) jmax = ex(2)
|
||||
if(dabs(Z(ex(3))-zmax) < dZ) kmax = ex(3)
|
||||
if(dabs(X(1)-xmin) < dX) imin = 1
|
||||
if(dabs(Y(1)-ymin) < dY) jmin = 1
|
||||
if(dabs(Z(1)-zmin) < dZ) kmin = 1
|
||||
|
||||
s1 = 0.d0
|
||||
s2 = 0.d0
|
||||
s3 = 0.d0
|
||||
s4 = 0.d0
|
||||
s5 = 0.d0
|
||||
s6 = 0.d0
|
||||
s7 = 0.d0
|
||||
|
||||
do k=kmin,kmax
|
||||
do j=jmin,jmax
|
||||
!DIR$ SIMD REDUCTION(+:s1,s2,s3,s4,s5,s6,s7)
|
||||
do i=imin,imax
|
||||
s1 = s1 + f1(i,j,k)*f1(i,j,k)
|
||||
s2 = s2 + f2(i,j,k)*f2(i,j,k)
|
||||
s3 = s3 + f3(i,j,k)*f3(i,j,k)
|
||||
s4 = s4 + f4(i,j,k)*f4(i,j,k)
|
||||
s5 = s5 + f5(i,j,k)*f5(i,j,k)
|
||||
s6 = s6 + f6(i,j,k)*f6(i,j,k)
|
||||
s7 = s7 + f7(i,j,k)*f7(i,j,k)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
f_out(1) = s1*dX*dY*dZ
|
||||
f_out(2) = s2*dX*dY*dZ
|
||||
f_out(3) = s3*dX*dY*dZ
|
||||
f_out(4) = s4*dX*dY*dZ
|
||||
f_out(5) = s5*dX*dY*dZ
|
||||
f_out(6) = s6*dX*dY*dZ
|
||||
f_out(7) = s7*dX*dY*dZ
|
||||
|
||||
return
|
||||
|
||||
end subroutine l2normhelper7
|
||||
!--------------------------------------------------------------------------------------
|
||||
! calculate L2norm especially for shell Blocks
|
||||
subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||
f,f_out,gw,ogw,Symmetry)
|
||||
return
|
||||
|
||||
end subroutine l2normhelper
|
||||
!--------------------------------------------------------------------------------------
|
||||
! calculate L2norm especially for shell Blocks
|
||||
subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||
f,f_out,gw,ogw,Symmetry)
|
||||
|
||||
implicit none
|
||||
!~~~~~~> Input parameters:
|
||||
|
||||
@@ -12,10 +12,9 @@
|
||||
#define f_global_interpind global_interpind
|
||||
#define f_global_interpind2d global_interpind2d
|
||||
#define f_global_interpind1d global_interpind1d
|
||||
#define f_l2normhelper l2normhelper
|
||||
#define f_l2normhelper7 l2normhelper7
|
||||
#define f_l2normhelper_sh l2normhelper_sh
|
||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms
|
||||
#define f_l2normhelper l2normhelper
|
||||
#define f_l2normhelper_sh l2normhelper_sh
|
||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms
|
||||
#define f_average average
|
||||
#define f_average3 average3
|
||||
#define f_average2 average2
|
||||
@@ -42,10 +41,9 @@
|
||||
#define f_global_interpind GLOBAL_INTERPIND
|
||||
#define f_global_interpind2d GLOBAL_INTERPIND2D
|
||||
#define f_global_interpind1d GLOBAL_INTERPIND1D
|
||||
#define f_l2normhelper L2NORMHELPER
|
||||
#define f_l2normhelper7 L2NORMHELPER7
|
||||
#define f_l2normhelper_sh L2NORMHELPER_SH
|
||||
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS
|
||||
#define f_l2normhelper L2NORMHELPER
|
||||
#define f_l2normhelper_sh L2NORMHELPER_SH
|
||||
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS
|
||||
#define f_average AVERAGE
|
||||
#define f_average3 AVERAGE3
|
||||
#define f_average2 AVERAGE2
|
||||
@@ -72,10 +70,9 @@
|
||||
#define f_global_interpind global_interpind_
|
||||
#define f_global_interpind2d global_interpind2d_
|
||||
#define f_global_interpind1d global_interpind1d_
|
||||
#define f_l2normhelper l2normhelper_
|
||||
#define f_l2normhelper7 l2normhelper7_
|
||||
#define f_l2normhelper_sh l2normhelper_sh_
|
||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_
|
||||
#define f_l2normhelper l2normhelper_
|
||||
#define f_l2normhelper_sh l2normhelper_sh_
|
||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_
|
||||
#define f_average average_
|
||||
#define f_average3 average3_
|
||||
#define f_average2 average2_
|
||||
@@ -159,29 +156,20 @@ extern "C"
|
||||
int *, double *, int &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper7(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper_sh(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double &, int &, int &, int &);
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper_sh(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double &, int &, int &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
|
||||
@@ -43,7 +43,13 @@ void lopsided_kodis(const int ex[3],
|
||||
const size_t nz = (size_t)ex3 + 3;
|
||||
const size_t fh_size = nx * ny * nz;
|
||||
|
||||
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||
static thread_local double *fh = NULL;
|
||||
static thread_local size_t cap = 0;
|
||||
if (fh_size > cap) {
|
||||
free(fh);
|
||||
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||
cap = fh_size;
|
||||
}
|
||||
if (!fh) return;
|
||||
|
||||
symmetry_bd(3, ex, f, fh, SoA);
|
||||
@@ -243,6 +249,4 @@ void lopsided_kodis(const int ex[3],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(fh);
|
||||
}
|
||||
|
||||
@@ -29,16 +29,6 @@
|
||||
|
||||
#define REGLEV 0
|
||||
|
||||
#define BSSN_FINE_TIMING 0
|
||||
|
||||
#define BSSN_FINE_TIMING_EVERY 1
|
||||
|
||||
#define BSSN_FINE_TIMING_TOPN 8
|
||||
|
||||
#define BSSN_KERNEL_FINE_TIMING 0
|
||||
|
||||
#define BSSN_ENABLE_STDIN_ABORT_POLL 0
|
||||
|
||||
//#define USE_GPU
|
||||
|
||||
//#define CHECKDETAIL
|
||||
@@ -98,21 +88,6 @@
|
||||
// 0: for every level;
|
||||
// 1: for all
|
||||
//
|
||||
// define BSSN_FINE_TIMING
|
||||
// enable fine-grained per-timestep timing monitor
|
||||
//
|
||||
// define BSSN_FINE_TIMING_EVERY
|
||||
// report timing every N coarse timesteps
|
||||
//
|
||||
// define BSSN_FINE_TIMING_TOPN
|
||||
// number of hottest timing buckets shown in stdout
|
||||
//
|
||||
// define BSSN_KERNEL_FINE_TIMING
|
||||
// enable split timing inside compute_rhs_bssn
|
||||
//
|
||||
// define BSSN_ENABLE_STDIN_ABORT_POLL
|
||||
// poll stdin and broadcast abort flag every coarse step
|
||||
//
|
||||
// define USE_GPU
|
||||
// use gpu or not
|
||||
//
|
||||
@@ -167,3 +142,4 @@
|
||||
#define TINY 1e-10
|
||||
|
||||
#endif /* MICRODEF_H */
|
||||
|
||||
|
||||
@@ -1,35 +1,35 @@
|
||||
|
||||
|
||||
include makefile.inc
|
||||
|
||||
## polint(ordn=6) kernel selector:
|
||||
## 1 (default): barycentric fast path
|
||||
## 0 : fallback to Neville path
|
||||
POLINT6_USE_BARY ?= 1
|
||||
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
||||
|
||||
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
|
||||
## make -> opt (PGO-guided, maximum performance)
|
||||
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data)
|
||||
PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
||||
include makefile.inc
|
||||
|
||||
## polint(ordn=6) kernel selector:
|
||||
## 1 (default): barycentric fast path
|
||||
## 0 : fallback to Neville path
|
||||
POLINT6_USE_BARY ?= 1
|
||||
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
||||
|
||||
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
|
||||
## make -> opt (PGO-guided, maximum performance)
|
||||
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data)
|
||||
PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
||||
|
||||
ifeq ($(PGO_MODE),instrument)
|
||||
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
else
|
||||
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
||||
## PGO has been turned off, now tested and found to be negative optimization
|
||||
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
||||
|
||||
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
endif
|
||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) $(OPENMP_FLAGS)
|
||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
else
|
||||
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
||||
## PGO has been turned off, now tested and found to be negative optimization
|
||||
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
||||
|
||||
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) $(OPENMP_FLAGS)
|
||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
endif
|
||||
|
||||
.SUFFIXES: .o .f90 .C .for .cu
|
||||
|
||||
@@ -45,14 +45,6 @@ endif
|
||||
.cu.o:
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers)
|
||||
bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# CUDA rewrite of Z4C Cartesian RHS
|
||||
z4c_rhs_cuda.o: z4c_rhs_cuda.cu z4c_rhs_cuda.h bssn_rhs.h macrodef.h ricci_gamma.h
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# C rewrite of BSSN RHS kernel and helpers
|
||||
bssn_rhs_c.o: bssn_rhs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
@@ -66,17 +58,14 @@ fdderivs_c.o: fdderivs_c.C
|
||||
kodiss_c.o: kodiss_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
lopsided_c.o: lopsided_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
lopsided_kodis_c.o: lopsided_kodis_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
z4c_rhs_c.o: z4c_rhs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
||||
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
lopsided_c.o: lopsided_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
lopsided_kodis_c.o: lopsided_kodis_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
||||
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||
@@ -92,63 +81,22 @@ TwoPunctureABE.o: TwoPunctureABE.C
|
||||
|
||||
# Input files
|
||||
|
||||
## CUDA BSSN RHS switch
|
||||
## 1 : use the rewritten CUDA bssn_rhs backend
|
||||
## 0 : keep the normal CPU/Fortran selection below
|
||||
USE_CUDA_BSSN ?= 0
|
||||
USE_CUDA_Z4C ?= 0
|
||||
|
||||
CXXAPPFLAGS += -DUSE_CUDA_BSSN=$(USE_CUDA_BSSN)
|
||||
CUDA_APP_FLAGS += -DUSE_CUDA_BSSN=$(USE_CUDA_BSSN)
|
||||
CXXAPPFLAGS += -DUSE_CUDA_Z4C=$(USE_CUDA_Z4C)
|
||||
CUDA_APP_FLAGS += -DUSE_CUDA_Z4C=$(USE_CUDA_Z4C)
|
||||
|
||||
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
||||
CFILES_CPU =
|
||||
else
|
||||
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
||||
CFILES_CPU = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||
endif
|
||||
|
||||
CFILES_CUDA_BSSN = bssn_rhs_cuda.o
|
||||
|
||||
ifeq ($(USE_CUDA_BSSN),1)
|
||||
CFILES = $(CFILES_CUDA_BSSN)
|
||||
else
|
||||
CFILES = $(CFILES_CPU)
|
||||
endif
|
||||
|
||||
ifeq ($(USE_CUDA_Z4C),1)
|
||||
CFILES += z4c_rhs_cuda.o
|
||||
Z4C_F90_OBJ =
|
||||
else ifeq ($(USE_CXX_Z4C_KERNELS),1)
|
||||
CFILES += z4c_rhs_c.o
|
||||
Z4C_F90_OBJ =
|
||||
else
|
||||
Z4C_F90_OBJ = Z4c_rhs.o
|
||||
endif
|
||||
|
||||
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
||||
ifeq ($(USE_CXX_RK4),1)
|
||||
RK4_C_OBJ = rungekutta4_rout_c.o
|
||||
RK4_F90_OBJ =
|
||||
else
|
||||
RK4_C_OBJ =
|
||||
RK4_F90_OBJ = rungekutta4_rout.o
|
||||
endif
|
||||
|
||||
CFILES += $(RK4_C_OBJ)
|
||||
ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) z4c_rhs_cuda.o $(RK4_C_OBJ)
|
||||
|
||||
ABE_LDLIBS = $(LDLIBS)
|
||||
ifeq ($(USE_CUDA_BSSN),1)
|
||||
ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH)
|
||||
endif
|
||||
ifeq ($(USE_CUDA_Z4C),1)
|
||||
ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH)
|
||||
endif
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
||||
CFILES =
|
||||
else
|
||||
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
||||
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||
endif
|
||||
|
||||
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
||||
ifeq ($(USE_CXX_RK4),1)
|
||||
CFILES += rungekutta4_rout_c.o
|
||||
RK4_F90_OBJ =
|
||||
else
|
||||
RK4_F90_OBJ = rungekutta4_rout.o
|
||||
endif
|
||||
|
||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
||||
@@ -157,7 +105,7 @@ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
||||
NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o
|
||||
|
||||
#C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o surface_integral.o ShellPatch.o\
|
||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
||||
@@ -165,13 +113,13 @@ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
NullShellPatch2_Evo.o \
|
||||
bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o
|
||||
|
||||
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
||||
$(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\
|
||||
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
||||
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
|
||||
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
|
||||
fadmquantites_bssn.o $(Z4C_F90_OBJ) Z4c_rhs_ss.o point_diff_new_sh.o\
|
||||
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
||||
$(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\
|
||||
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
||||
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
|
||||
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
|
||||
fadmquantites_bssn.o Z4c_rhs.o Z4c_rhs_ss.o point_diff_new_sh.o\
|
||||
cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\
|
||||
getnpem2.o empart.o NullNews.o fourdcurvature.o\
|
||||
bssn2adm.o adm_constraint.o adm_ricci_gamma.o\
|
||||
@@ -195,10 +143,10 @@ initial_guess.o Newton.o Jacobian.o ilucg.o IntPnts0.o IntPnts.o
|
||||
|
||||
TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o
|
||||
|
||||
#CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
||||
CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
||||
|
||||
# file dependences
|
||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||
|
||||
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
@@ -209,7 +157,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
|
||||
initial_null2.h NullShellPatch2.h
|
||||
|
||||
#$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
rungekutta4_rout.h var.h bssn_rhs.h sommerfeld_rout.h\
|
||||
cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\
|
||||
@@ -221,7 +169,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
|
||||
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
|
||||
|
||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||
|
||||
TwoPunctureFILES: TwoPunctures.h
|
||||
|
||||
@@ -231,18 +179,13 @@ misc.o : zbesh.o
|
||||
|
||||
# projects
|
||||
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(ABE_LDLIBS)
|
||||
|
||||
ABE_CUDA: USE_CUDA_BSSN=1
|
||||
ABE_CUDA: USE_CUDA_Z4C=1
|
||||
ABE_CUDA: $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
|
||||
|
||||
#ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||
# $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
|
||||
TwoPunctureABE: $(TwoPunctureFILES)
|
||||
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||
|
||||
clean:
|
||||
rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f
|
||||
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||
|
||||
@@ -29,6 +29,9 @@ endif
|
||||
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
|
||||
PGO_MODE ?= opt
|
||||
|
||||
## OpenMP switch for C/C++ kernels
|
||||
OPENMP_FLAGS ?= -qopenmp
|
||||
|
||||
## Interp_Points load balance profiling mode
|
||||
## off : (default) no load balance instrumentation
|
||||
## profile : Pass 1 — instrument Interp_Points to collect timing profile
|
||||
@@ -48,11 +51,6 @@ endif
|
||||
## 0 : fall back to original Fortran kernels
|
||||
USE_CXX_KERNELS ?= 1
|
||||
|
||||
## Z4C Cartesian RHS kernel switch
|
||||
## 1 (default) : use C++ rewrite of Z4c_rhs (main Cartesian path faster)
|
||||
## 0 : use original Fortran Z4c_rhs.o
|
||||
USE_CXX_Z4C_KERNELS ?= 1
|
||||
|
||||
## RK4 kernel implementation switch
|
||||
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
||||
## 0 : use original Fortran rungekutta4_rout.o
|
||||
@@ -68,7 +66,3 @@ Cu = nvcc
|
||||
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
||||
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
||||
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
||||
CUDA_ARCH ?= sm_80
|
||||
ifneq ($(strip $(CUDA_ARCH)),)
|
||||
CUDA_APP_FLAGS += -arch=$(CUDA_ARCH)
|
||||
endif
|
||||
|
||||
@@ -317,9 +317,9 @@ void scalar_class::Setup_Initial_Data()
|
||||
#endif
|
||||
}
|
||||
}
|
||||
void scalar_class::Evolve(int Steps)
|
||||
{
|
||||
clock_t prev_clock, curr_clock;
|
||||
void scalar_class::Evolve(int Steps)
|
||||
{
|
||||
double prev_clock = 0.0, curr_clock = 0.0;
|
||||
double LastDump = 0.0, LastCheck = 0.0;
|
||||
LastAnas = 0;
|
||||
|
||||
@@ -327,8 +327,8 @@ void scalar_class::Evolve(int Steps)
|
||||
|
||||
for (int ncount = 1; ncount < Steps + 1; ncount++)
|
||||
{
|
||||
if (myrank == 0)
|
||||
curr_clock = clock();
|
||||
if (myrank == 0)
|
||||
curr_clock = MPI_Wtime();
|
||||
RecursiveStep(0);
|
||||
|
||||
LastDump += dT_mon;
|
||||
@@ -343,13 +343,13 @@ void scalar_class::Evolve(int Steps)
|
||||
#endif
|
||||
LastDump = 0;
|
||||
}
|
||||
if (myrank == 0)
|
||||
{
|
||||
prev_clock = curr_clock;
|
||||
curr_clock = clock();
|
||||
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime
|
||||
<< " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl;
|
||||
}
|
||||
if (myrank == 0)
|
||||
{
|
||||
prev_clock = curr_clock;
|
||||
curr_clock = MPI_Wtime();
|
||||
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime
|
||||
<< " Computer used " << (curr_clock - prev_clock) << " seconds! " << endl;
|
||||
}
|
||||
if (PhysTime >= TotalTime)
|
||||
break;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,24 +27,19 @@ using namespace std;
|
||||
class surface_integral
|
||||
{
|
||||
|
||||
private:
|
||||
int Symmetry, factor;
|
||||
int N_theta, N_phi; // Number of points in Theta & Phi directions
|
||||
double dphi, dcostheta;
|
||||
double *arcostheta, *wtcostheta;
|
||||
int n_tot; // size of arrays
|
||||
|
||||
double *nx_g, *ny_g, *nz_g; // global list of unit normals
|
||||
int myrank, cpusize;
|
||||
int wave_cache_spinw, wave_cache_maxl, wave_cache_modes;
|
||||
double *wave_theta_pos, *wave_theta_neg;
|
||||
double *wave_phi_cos, *wave_phi_sin;
|
||||
void clear_wave_cache();
|
||||
void build_wave_cache(int spinw, int maxl);
|
||||
|
||||
public:
|
||||
surface_integral(int iSymmetry);
|
||||
~surface_integral();
|
||||
private:
|
||||
int Symmetry, factor;
|
||||
int N_theta, N_phi; // Number of points in Theta & Phi directions
|
||||
double dphi, dcostheta;
|
||||
double *arcostheta, *wtcostheta;
|
||||
int n_tot; // size of arrays
|
||||
|
||||
double *nx_g, *ny_g, *nz_g; // global list of unit normals
|
||||
int myrank, cpusize;
|
||||
|
||||
public:
|
||||
surface_integral(int iSymmetry);
|
||||
~surface_integral();
|
||||
|
||||
void surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,
|
||||
int spinw, int maxl, int NN, double *RP, double *IP,
|
||||
@@ -82,37 +77,21 @@ public:
|
||||
double &, double &, double &, double &, double &, double &, double &,
|
||||
double &, double &, double &, double &, double &, double &,
|
||||
double &, double &)); // NN is the length of RP and IP
|
||||
void surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||
void surf_MassPAng(double rex, int lev, ShellPatch *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||
void surf_WaveMassPAng(double rex, int lev, cgh *GH,
|
||||
var *Rpsi4, var *Ipsi4, int spinw, int maxl, int NN, double *RP, double *IP,
|
||||
var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||
void surf_WaveMassPAng(double rex, int lev, ShellPatch *GH,
|
||||
var *Rpsi4, var *Ipsi4, int spinw, int maxl, int NN, double *RP, double *IP,
|
||||
var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||
void surf_Wave(double rex, cgh *GH, ShellPatch *SH,
|
||||
var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
void surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor);
|
||||
void surf_MassPAng(double rex, int lev, ShellPatch *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor);
|
||||
void surf_Wave(double rex, cgh *GH, ShellPatch *SH,
|
||||
var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *chix, var *chiy, var *chiz,
|
||||
var *trKx, var *trKy, var *trKz,
|
||||
@@ -131,12 +110,12 @@ public:
|
||||
bool SR_Interp_Points(MyList<var> *VarList, cgh *GH, ShellPatch *SH,
|
||||
int NN, double **XX, double *Shellf);
|
||||
|
||||
void surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i
|
||||
double *Rout, monitor *Monitor, MPI_Comm Comm_here, bool refresh_mass_fields = true);
|
||||
void surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i
|
||||
double *Rout, monitor *Monitor, MPI_Comm Comm_here);
|
||||
void surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,
|
||||
int spinw, int maxl, int NN, double *RP, double *IP,
|
||||
monitor *Monitor, MPI_Comm Comm_here);
|
||||
|
||||
@@ -1,725 +0,0 @@
|
||||
#include "macrodef.h"
|
||||
#include "bssn_rhs.h"
|
||||
#include "fmisc.h"
|
||||
#include "ricci_gamma.h"
|
||||
#include "share_func.h"
|
||||
#include "tool.h"
|
||||
#include <vector>
|
||||
|
||||
#ifdef fortran1
|
||||
#define f_constraint_bssn constraint_bssn
|
||||
#define f_z4c_rhs_point z4c_rhs_point
|
||||
#endif
|
||||
#ifdef fortran2
|
||||
#define f_constraint_bssn CONSTRAINT_BSSN
|
||||
#define f_z4c_rhs_point Z4C_RHS_POINT
|
||||
#endif
|
||||
#ifdef fortran3
|
||||
#define f_constraint_bssn constraint_bssn_
|
||||
#define f_z4c_rhs_point z4c_rhs_point_
|
||||
#endif
|
||||
|
||||
extern "C" void f_constraint_bssn(int *, double *, double *, double *,
|
||||
double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
int &);
|
||||
|
||||
extern "C" void f_z4c_rhs_point(
|
||||
double &A11,
|
||||
double &A12,
|
||||
double &A13,
|
||||
double &A22,
|
||||
double &A23,
|
||||
double &A33,
|
||||
double &alpha,
|
||||
double &B1,
|
||||
double &B2,
|
||||
double &B3,
|
||||
double &beta1,
|
||||
double &beta2,
|
||||
double &beta3,
|
||||
double &chi,
|
||||
double &chiDivFloor,
|
||||
double &da1,
|
||||
double &dA111,
|
||||
double &dA112,
|
||||
double &dA113,
|
||||
double &dA122,
|
||||
double &dA123,
|
||||
double &dA133,
|
||||
double &da2,
|
||||
double &dA211,
|
||||
double &dA212,
|
||||
double &dA213,
|
||||
double &dA222,
|
||||
double &dA223,
|
||||
double &dA233,
|
||||
double &da3,
|
||||
double &dA311,
|
||||
double &dA312,
|
||||
double &dA313,
|
||||
double &dA322,
|
||||
double &dA323,
|
||||
double &dA333,
|
||||
double &db11,
|
||||
double &dB11,
|
||||
double &db12,
|
||||
double &dB12,
|
||||
double &db13,
|
||||
double &dB13,
|
||||
double &db21,
|
||||
double &dB21,
|
||||
double &db22,
|
||||
double &dB22,
|
||||
double &db23,
|
||||
double &dB23,
|
||||
double &db31,
|
||||
double &dB31,
|
||||
double &db32,
|
||||
double &dB32,
|
||||
double &db33,
|
||||
double &dB33,
|
||||
double &dchi1,
|
||||
double &dchi2,
|
||||
double &dchi3,
|
||||
double &dda11,
|
||||
double &dda12,
|
||||
double &dda13,
|
||||
double &dda22,
|
||||
double &dda23,
|
||||
double &dda33,
|
||||
double &ddb111,
|
||||
double &ddb112,
|
||||
double &ddb113,
|
||||
double &ddb121,
|
||||
double &ddb122,
|
||||
double &ddb123,
|
||||
double &ddb131,
|
||||
double &ddb132,
|
||||
double &ddb133,
|
||||
double &ddb221,
|
||||
double &ddb222,
|
||||
double &ddb223,
|
||||
double &ddb231,
|
||||
double &ddb232,
|
||||
double &ddb233,
|
||||
double &ddb331,
|
||||
double &ddb332,
|
||||
double &ddb333,
|
||||
double &ddchi11,
|
||||
double &ddchi12,
|
||||
double &ddchi13,
|
||||
double &ddchi22,
|
||||
double &ddchi23,
|
||||
double &ddchi33,
|
||||
double &deldelg1111,
|
||||
double &deldelg1112,
|
||||
double &deldelg1113,
|
||||
double &deldelg1122,
|
||||
double &deldelg1123,
|
||||
double &deldelg1133,
|
||||
double &deldelg1211,
|
||||
double &deldelg1212,
|
||||
double &deldelg1213,
|
||||
double &deldelg1222,
|
||||
double &deldelg1223,
|
||||
double &deldelg1233,
|
||||
double &deldelg1311,
|
||||
double &deldelg1312,
|
||||
double &deldelg1313,
|
||||
double &deldelg1322,
|
||||
double &deldelg1323,
|
||||
double &deldelg1333,
|
||||
double &deldelg2211,
|
||||
double &deldelg2212,
|
||||
double &deldelg2213,
|
||||
double &deldelg2222,
|
||||
double &deldelg2223,
|
||||
double &deldelg2233,
|
||||
double &deldelg2311,
|
||||
double &deldelg2312,
|
||||
double &deldelg2313,
|
||||
double &deldelg2322,
|
||||
double &deldelg2323,
|
||||
double &deldelg2333,
|
||||
double &deldelg3311,
|
||||
double &deldelg3312,
|
||||
double &deldelg3313,
|
||||
double &deldelg3322,
|
||||
double &deldelg3323,
|
||||
double &deldelg3333,
|
||||
double &delG11,
|
||||
double &delg111,
|
||||
double &delg112,
|
||||
double &delg113,
|
||||
double &delG12,
|
||||
double &delg122,
|
||||
double &delg123,
|
||||
double &delG13,
|
||||
double &delg133,
|
||||
double &delG21,
|
||||
double &delg211,
|
||||
double &delg212,
|
||||
double &delg213,
|
||||
double &delG22,
|
||||
double &delg222,
|
||||
double &delg223,
|
||||
double &delG23,
|
||||
double &delg233,
|
||||
double &delG31,
|
||||
double &delg311,
|
||||
double &delg312,
|
||||
double &delg313,
|
||||
double &delG32,
|
||||
double &delg322,
|
||||
double &delg323,
|
||||
double &delG33,
|
||||
double &delg333,
|
||||
double &dKhat1,
|
||||
double &dKhat2,
|
||||
double &dKhat3,
|
||||
double &dTheta1,
|
||||
double &dTheta2,
|
||||
double &dTheta3,
|
||||
double &G1,
|
||||
double &g11,
|
||||
double &g12,
|
||||
double &g13,
|
||||
double &G2,
|
||||
double &g22,
|
||||
double &g23,
|
||||
double &G3,
|
||||
double &g33,
|
||||
double &kappa1,
|
||||
double &kappa2,
|
||||
double &Khat,
|
||||
double &rA11,
|
||||
double &rA12,
|
||||
double &rA13,
|
||||
double &rA22,
|
||||
double &rA23,
|
||||
double &rA33,
|
||||
double &rchi,
|
||||
double &rG1,
|
||||
double &rg11,
|
||||
double &rg12,
|
||||
double &rg13,
|
||||
double &rG2,
|
||||
double &rg22,
|
||||
double &rg23,
|
||||
double &rG3,
|
||||
double &rg33,
|
||||
double &rKhat,
|
||||
double &rTheta,
|
||||
double &Theta);
|
||||
|
||||
static inline void z4c_contract_gamma(
|
||||
const double gxx, const double gxy, const double gxz,
|
||||
const double gyy, const double gyz, const double gzz,
|
||||
const double gxxx, const double gxyx, const double gxzx,
|
||||
const double gyyx, const double gyzx, const double gzzx,
|
||||
const double gxxy, const double gxyy, const double gxzy,
|
||||
const double gyyy, const double gyzy, const double gzzy,
|
||||
const double gxxz, const double gxyz, const double gxzz,
|
||||
const double gyyz, const double gyzz, const double gzzz,
|
||||
double &Gamxa, double &Gamya, double &Gamza)
|
||||
{
|
||||
double det = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz -
|
||||
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz;
|
||||
const double gupxx = (gyy * gzz - gyz * gyz) / det;
|
||||
const double gupxy = -(gxy * gzz - gyz * gxz) / det;
|
||||
const double gupxz = (gxy * gyz - gyy * gxz) / det;
|
||||
const double gupyy = (gxx * gzz - gxz * gxz) / det;
|
||||
const double gupyz = -(gxx * gyz - gxy * gxz) / det;
|
||||
const double gupzz = (gxx * gyy - gxy * gxy) / det;
|
||||
|
||||
const double Gamxxx = 0.5 * (gupxx * gxxx + gupxy * (2.0 * gxyx - gxxy) + gupxz * (2.0 * gxzx - gxxz));
|
||||
const double Gamyxx = 0.5 * (gupxy * gxxx + gupyy * (2.0 * gxyx - gxxy) + gupyz * (2.0 * gxzx - gxxz));
|
||||
const double Gamzxx = 0.5 * (gupxz * gxxx + gupyz * (2.0 * gxyx - gxxy) + gupzz * (2.0 * gxzx - gxxz));
|
||||
|
||||
const double Gamxyy = 0.5 * (gupxx * (2.0 * gxyy - gyyx) + gupxy * gyyy + gupxz * (2.0 * gyzy - gyyz));
|
||||
const double Gamyyy = 0.5 * (gupxy * (2.0 * gxyy - gyyx) + gupyy * gyyy + gupyz * (2.0 * gyzy - gyyz));
|
||||
const double Gamzyy = 0.5 * (gupxz * (2.0 * gxyy - gyyx) + gupyz * gyyy + gupzz * (2.0 * gyzy - gyyz));
|
||||
|
||||
const double Gamxzz = 0.5 * (gupxx * (2.0 * gxzz - gzzx) + gupxy * (2.0 * gyzz - gzzy) + gupxz * gzzz);
|
||||
const double Gamyzz = 0.5 * (gupxy * (2.0 * gxzz - gzzx) + gupyy * (2.0 * gyzz - gzzy) + gupyz * gzzz);
|
||||
const double Gamzzz = 0.5 * (gupxz * (2.0 * gxzz - gzzx) + gupyz * (2.0 * gyzz - gzzy) + gupzz * gzzz);
|
||||
|
||||
const double Gamxxy = 0.5 * (gupxx * gxxy + gupxy * gyyx + gupxz * (gxzy + gyzx - gxyz));
|
||||
const double Gamyxy = 0.5 * (gupxy * gxxy + gupyy * gyyx + gupyz * (gxzy + gyzx - gxyz));
|
||||
const double Gamzxy = 0.5 * (gupxz * gxxy + gupyz * gyyx + gupzz * (gxzy + gyzx - gxyz));
|
||||
|
||||
const double Gamxxz = 0.5 * (gupxx * gxxz + gupxy * (gxyz + gyzx - gxzy) + gupxz * gzzx);
|
||||
const double Gamyxz = 0.5 * (gupxy * gxxz + gupyy * (gxyz + gyzx - gxzy) + gupyz * gzzx);
|
||||
const double Gamzxz = 0.5 * (gupxz * gxxz + gupyz * (gxyz + gyzx - gxzy) + gupzz * gzzx);
|
||||
|
||||
const double Gamxyz = 0.5 * (gupxx * (gxyz + gxzy - gyzx) + gupxy * gyyz + gupxz * gzzy);
|
||||
const double Gamyyz = 0.5 * (gupxy * (gxyz + gxzy - gyzx) + gupyy * gyyz + gupyz * gzzy);
|
||||
const double Gamzyz = 0.5 * (gupxz * (gxyz + gxzy - gyzx) + gupyz * gyyz + gupzz * gzzy);
|
||||
|
||||
Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz +
|
||||
2.0 * (gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz);
|
||||
Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz +
|
||||
2.0 * (gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz);
|
||||
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz +
|
||||
2.0 * (gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz);
|
||||
}
|
||||
|
||||
static int compute_rhs_z4c_cartesian(
|
||||
int *ex, double &T, double *X, double *Y, double *Z,
|
||||
double *chi_state, double *chi_constraints, double *trK,
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
double *TZ,
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
double *TZ_rhs,
|
||||
double *rho, double *Sx, double *Sy, double *Sz,
|
||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
|
||||
int &Symmetry, int &Lev, double &eps, int &co)
|
||||
{
|
||||
(void)T;
|
||||
|
||||
const int nx = ex[0];
|
||||
const int ny = ex[1];
|
||||
const int nz = ex[2];
|
||||
const int all = nx * ny * nz;
|
||||
|
||||
double alpn1[all], chin1[all], gxx[all], gyy[all], gzz[all];
|
||||
double chix[all], chiy[all], chiz[all], chixx[all], chixy[all], chixz[all], chiyy[all], chiyz[all], chizz[all];
|
||||
double gxxx[all], gxyx[all], gxzx[all], gyyx[all], gyzx[all], gzzx[all];
|
||||
double gxxy[all], gxyy[all], gxzy[all], gyyy[all], gyzy[all], gzzy[all];
|
||||
double gxxz[all], gxyz[all], gxzz[all], gyyz[all], gyzz[all], gzzz[all];
|
||||
double gxxxx[all], gxxxy[all], gxxxz[all], gxxyy[all], gxxyz[all], gxxzz[all];
|
||||
double gxyxx[all], gxyxy[all], gxyxz[all], gxyyy[all], gxyyz[all], gxyzz[all];
|
||||
double gxzxx[all], gxzxy[all], gxzxz[all], gxzyy[all], gxzyz[all], gxzzz[all];
|
||||
double gyyxx[all], gyyxy[all], gyyxz[all], gyyyy[all], gyyyz[all], gyyzz[all];
|
||||
double gyzxx[all], gyzxy[all], gyzxz[all], gyzyy[all], gyzyz[all], gyzzz[all];
|
||||
double gzzxx[all], gzzxy[all], gzzxz[all], gzzyy[all], gzzyz[all], gzzzz[all];
|
||||
double Lapx[all], Lapy[all], Lapz[all], Lapxx[all], Lapxy[all], Lapxz[all], Lapyy[all], Lapyz[all], Lapzz[all];
|
||||
double betaxx[all], betaxy[all], betaxz[all], betayx[all], betayy[all], betayz[all], betazx[all], betazy[all], betazz[all];
|
||||
double dBxx[all], dBxy[all], dBxz[all], dByx[all], dByy[all], dByz[all], dBzx[all], dBzy[all], dBzz[all];
|
||||
double sfxxx[all], sfxxy[all], sfxxz[all], sfxyy[all], sfxyz[all], sfxzz[all];
|
||||
double sfyxx[all], sfyxy[all], sfyxz[all], sfyyy[all], sfyyz[all], sfyzz[all];
|
||||
double sfzxx[all], sfzxy[all], sfzxz[all], sfzyy[all], sfzyz[all], sfzzz[all];
|
||||
double Gamxx[all], Gamxy[all], Gamxz[all], Gamyx[all], Gamyy[all], Gamyz[all], Gamzx[all], Gamzy[all], Gamzz[all];
|
||||
double Kx[all], Ky[all], Kz[all], TZx[all], TZy[all], TZz[all];
|
||||
double Axxx[all], Axxy[all], Axxz[all], Axyx[all], Axyy[all], Axyz[all];
|
||||
double Axzx[all], Axzy[all], Axzz[all], Ayyx[all], Ayyy[all], Ayyz[all];
|
||||
double Ayzx[all], Ayzy[all], Ayzz[all], Azzx[all], Azzy[all], Azzz[all];
|
||||
|
||||
const double SSS[3] = {1.0, 1.0, 1.0};
|
||||
const double AAS[3] = {-1.0, -1.0, 1.0};
|
||||
const double ASA[3] = {-1.0, 1.0, -1.0};
|
||||
const double SAA[3] = {1.0, -1.0, -1.0};
|
||||
const double ASS[3] = {-1.0, 1.0, 1.0};
|
||||
const double SAS[3] = {1.0, -1.0, 1.0};
|
||||
const double SSA[3] = {1.0, 1.0, -1.0};
|
||||
|
||||
const double ONE = 1.0;
|
||||
const double TWO = 2.0;
|
||||
const double ZEO = 0.0;
|
||||
double chiDivfloor = 1.0e-5;
|
||||
|
||||
double kappa1 = 2.0e-2;
|
||||
double kappa2 = 0.0;
|
||||
double FF = 0.75;
|
||||
double eta = 2.0;
|
||||
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
alpn1[idx] = Lap[idx] + ONE;
|
||||
chin1[idx] = chi_state[idx] + ONE;
|
||||
gxx[idx] = dxx[idx] + ONE;
|
||||
gyy[idx] = dyy[idx] + ONE;
|
||||
gzz[idx] = dzz[idx] + ONE;
|
||||
}
|
||||
|
||||
fderivs(ex, betax, betaxx, betaxy, betaxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, betay, betayx, betayy, betayz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, betaz, betazx, betazy, betazz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, dtSfx, dBxx, dBxy, dBxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, dtSfy, dByx, dByy, dByz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, dtSfz, dBzx, dBzy, dBzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, chi_state, chix, chiy, chiz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, dxx, gxxx, gxxy, gxxz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, gxy, gxyx, gxyy, gxyz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, gxz, gxzx, gxzy, gxzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, dyy, gyyx, gyyy, gyyz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, gyz, gyzx, gyzy, gyzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, dzz, gzzx, gzzy, gzzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
|
||||
fdderivs(ex, dxx, gxxxx, gxxxy, gxxxz, gxxyy, gxxyz, gxxzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, dyy, gyyxx, gyyxy, gyyxz, gyyyy, gyyyz, gyyzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, dzz, gzzxx, gzzxy, gzzxz, gzzyy, gzzyz, gzzzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, gxy, gxyxx, gxyxy, gxyxz, gxyyy, gxyyz, gxyzz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, gxz, gxzxx, gxzxy, gxzxz, gxzyy, gxzyz, gxzzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fdderivs(ex, gyz, gyzxx, gyzxy, gyzxz, gyzyy, gyzyz, gyzzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
|
||||
|
||||
fderivs(ex, Gamx, Gamxx, Gamxy, Gamxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Gamy, Gamyx, Gamyy, Gamyz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Gamz, Gamzx, Gamzy, Gamzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
|
||||
fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, trK, Kx, Ky, Kz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, TZ, TZx, TZy, TZz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
|
||||
fdderivs(ex, betax, sfxxx, sfxxy, sfxxz, sfxyy, sfxyz, sfxzz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, betay, sfyxx, sfyxy, sfyxz, sfyyy, sfyyz, sfyzz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, betaz, sfzxx, sfzxy, sfzxz, sfzyy, sfzyz, sfzzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
|
||||
fdderivs(ex, chi_state, chixx, chixy, chixz, chiyy, chiyz, chizz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, Lap, Lapxx, Lapxy, Lapxz, Lapyy, Lapyz, Lapzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
|
||||
fderivs(ex, Axx, Axxx, Axxy, Axxz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Axy, Axyx, Axyy, Axyz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Axz, Axzx, Axzy, Axzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, Ayy, Ayyx, Ayyy, Ayyz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Ayz, Ayzx, Ayzy, Ayzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, Azz, Azzx, Azzy, Azzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
double point_kappa1 = 0.0;
|
||||
f_z4c_rhs_point(
|
||||
Axx[idx], Axy[idx], Axz[idx], Ayy[idx], Ayz[idx], Azz[idx],
|
||||
alpn1[idx], dtSfx[idx], dtSfy[idx], dtSfz[idx],
|
||||
betax[idx], betay[idx], betaz[idx],
|
||||
chin1[idx], chiDivfloor,
|
||||
Lapx[idx],
|
||||
Axxx[idx], Axyx[idx], Axzx[idx], Ayyx[idx], Ayzx[idx], Azzx[idx],
|
||||
Lapy[idx],
|
||||
Axxy[idx], Axyy[idx], Axzy[idx], Ayyy[idx], Ayzy[idx], Azzy[idx],
|
||||
Lapz[idx],
|
||||
Axxz[idx], Axyz[idx], Axzz[idx], Ayyz[idx], Ayzz[idx], Azzz[idx],
|
||||
betaxx[idx], dBxx[idx], betayx[idx], dByx[idx], betazx[idx], dBzx[idx],
|
||||
betaxy[idx], dBxy[idx], betayy[idx], dByy[idx], betazy[idx], dBzy[idx],
|
||||
betaxz[idx], dBxz[idx], betayz[idx], dByz[idx], betazz[idx], dBzz[idx],
|
||||
chix[idx], chiy[idx], chiz[idx],
|
||||
Lapxx[idx], Lapxy[idx], Lapxz[idx], Lapyy[idx], Lapyz[idx], Lapzz[idx],
|
||||
sfxxx[idx], sfyxx[idx], sfzxx[idx],
|
||||
sfxxy[idx], sfyxy[idx], sfzxy[idx],
|
||||
sfxxz[idx], sfyxz[idx], sfzxz[idx],
|
||||
sfxyy[idx], sfyyy[idx], sfzyy[idx],
|
||||
sfxyz[idx], sfyyz[idx], sfzyz[idx],
|
||||
sfxzz[idx], sfyzz[idx], sfzzz[idx],
|
||||
chixx[idx], chixy[idx], chixz[idx], chiyy[idx], chiyz[idx], chizz[idx],
|
||||
gxxxx[idx], gxyxx[idx], gxzxx[idx], gyyxx[idx], gyzxx[idx], gzzxx[idx],
|
||||
gxxxy[idx], gxyxy[idx], gxzxy[idx], gyyxy[idx], gyzxy[idx], gzzxy[idx],
|
||||
gxxxz[idx], gxyxz[idx], gxzxz[idx], gyyxz[idx], gyzxz[idx], gzzxz[idx],
|
||||
gxxyy[idx], gxyyy[idx], gxzyy[idx], gyyyy[idx], gyzyy[idx], gzzyy[idx],
|
||||
gxxyz[idx], gxyyz[idx], gxzyz[idx], gyyyz[idx], gyzyz[idx], gzzyz[idx],
|
||||
gxxzz[idx], gxyzz[idx], gxzzz[idx], gyyzz[idx], gyzzz[idx], gzzzz[idx],
|
||||
Gamxx[idx], gxxx[idx], gxyx[idx], gxzx[idx],
|
||||
Gamyx[idx], gyyx[idx], gyzx[idx],
|
||||
Gamzx[idx], gzzx[idx],
|
||||
Gamxy[idx], gxxy[idx], gxyy[idx], gxzy[idx],
|
||||
Gamyy[idx], gyyy[idx], gyzy[idx],
|
||||
Gamzy[idx], gzzy[idx],
|
||||
Gamxz[idx], gxxz[idx], gxyz[idx], gxzz[idx],
|
||||
Gamyz[idx], gyyz[idx], gyzz[idx],
|
||||
Gamzz[idx], gzzz[idx],
|
||||
Kx[idx], Ky[idx], Kz[idx],
|
||||
TZx[idx], TZy[idx], TZz[idx],
|
||||
Gamx[idx], gxx[idx], gxy[idx], gxz[idx],
|
||||
Gamy[idx], gyy[idx], gyz[idx],
|
||||
Gamz[idx], gzz[idx],
|
||||
point_kappa1, kappa2,
|
||||
trK[idx],
|
||||
Axx_rhs[idx], Axy_rhs[idx], Axz_rhs[idx], Ayy_rhs[idx], Ayz_rhs[idx], Azz_rhs[idx],
|
||||
chi_rhs[idx],
|
||||
Gamx_rhs[idx], gxx_rhs[idx], gxy_rhs[idx], gxz_rhs[idx],
|
||||
Gamy_rhs[idx], gyy_rhs[idx], gyz_rhs[idx],
|
||||
Gamz_rhs[idx], gzz_rhs[idx], trK_rhs[idx], TZ_rhs[idx], TZ[idx]);
|
||||
}
|
||||
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
Lap_rhs[idx] = -TWO * alpn1[idx] * trK[idx];
|
||||
|
||||
#if (GAUGE == 0)
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
betax_rhs[idx] = FF * dtSfx[idx];
|
||||
betay_rhs[idx] = FF * dtSfy[idx];
|
||||
betaz_rhs[idx] = FF * dtSfz[idx];
|
||||
dtSfx_rhs[idx] = Gamx_rhs[idx] - eta * dtSfx[idx];
|
||||
dtSfy_rhs[idx] = Gamy_rhs[idx] - eta * dtSfy[idx];
|
||||
dtSfz_rhs[idx] = Gamz_rhs[idx] - eta * dtSfz[idx];
|
||||
}
|
||||
#elif (GAUGE == 1)
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
betax_rhs[idx] = Gamx[idx] - eta * betax[idx];
|
||||
betay_rhs[idx] = Gamy[idx] - eta * betay[idx];
|
||||
betaz_rhs[idx] = Gamz[idx] - eta * betaz[idx];
|
||||
dtSfx_rhs[idx] = ZEO;
|
||||
dtSfy_rhs[idx] = ZEO;
|
||||
dtSfz_rhs[idx] = ZEO;
|
||||
}
|
||||
#else
|
||||
#error "z4c_rhs_c.C currently supports GAUGE == 0 or GAUGE == 1 for Z4C"
|
||||
#endif
|
||||
|
||||
lopsided(ex, X, Y, Z, gxx, gxx_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, gxy, gxy_rhs, betax, betay, betaz, Symmetry, AAS);
|
||||
lopsided(ex, X, Y, Z, gxz, gxz_rhs, betax, betay, betaz, Symmetry, ASA);
|
||||
lopsided(ex, X, Y, Z, gyy, gyy_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, gyz, gyz_rhs, betax, betay, betaz, Symmetry, SAA);
|
||||
lopsided(ex, X, Y, Z, gzz, gzz_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
|
||||
lopsided(ex, X, Y, Z, Axx, Axx_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, Axy, Axy_rhs, betax, betay, betaz, Symmetry, AAS);
|
||||
lopsided(ex, X, Y, Z, Axz, Axz_rhs, betax, betay, betaz, Symmetry, ASA);
|
||||
lopsided(ex, X, Y, Z, Ayy, Ayy_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, Ayz, Ayz_rhs, betax, betay, betaz, Symmetry, SAA);
|
||||
lopsided(ex, X, Y, Z, Azz, Azz_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
|
||||
lopsided(ex, X, Y, Z, chi_state, chi_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, trK, trK_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
|
||||
lopsided(ex, X, Y, Z, Gamx, Gamx_rhs, betax, betay, betaz, Symmetry, ASS);
|
||||
lopsided(ex, X, Y, Z, Gamy, Gamy_rhs, betax, betay, betaz, Symmetry, SAS);
|
||||
lopsided(ex, X, Y, Z, Gamz, Gamz_rhs, betax, betay, betaz, Symmetry, SSA);
|
||||
|
||||
lopsided(ex, X, Y, Z, Lap, Lap_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, betax, betax_rhs, betax, betay, betaz, Symmetry, ASS);
|
||||
lopsided(ex, X, Y, Z, betay, betay_rhs, betax, betay, betaz, Symmetry, SAS);
|
||||
lopsided(ex, X, Y, Z, betaz, betaz_rhs, betax, betay, betaz, Symmetry, SSA);
|
||||
#if (GAUGE == 0)
|
||||
lopsided(ex, X, Y, Z, dtSfx, dtSfx_rhs, betax, betay, betaz, Symmetry, ASS);
|
||||
lopsided(ex, X, Y, Z, dtSfy, dtSfy_rhs, betax, betay, betaz, Symmetry, SAS);
|
||||
lopsided(ex, X, Y, Z, dtSfz, dtSfz_rhs, betax, betay, betaz, Symmetry, SSA);
|
||||
#endif
|
||||
lopsided(ex, X, Y, Z, TZ, TZ_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
double Gamxa = 0.0, Gamya = 0.0, Gamza = 0.0;
|
||||
z4c_contract_gamma(
|
||||
gxx[idx], gxy[idx], gxz[idx], gyy[idx], gyz[idx], gzz[idx],
|
||||
gxxx[idx], gxyx[idx], gxzx[idx], gyyx[idx], gyzx[idx], gzzx[idx],
|
||||
gxxy[idx], gxyy[idx], gxzy[idx], gyyy[idx], gyzy[idx], gzzy[idx],
|
||||
gxxz[idx], gxyz[idx], gxzz[idx], gyyz[idx], gyzz[idx], gzzz[idx],
|
||||
Gamxa, Gamya, Gamza);
|
||||
|
||||
TZ_rhs[idx] -= alpn1[idx] * (TWO + kappa2) * kappa1 * TZ[idx];
|
||||
trK_rhs[idx] += alpn1[idx] * kappa1 * (ONE - kappa2) * TZ[idx];
|
||||
Gamx_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamx[idx] - Gamxa);
|
||||
Gamy_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamy[idx] - Gamya);
|
||||
Gamz_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamz[idx] - Gamza);
|
||||
}
|
||||
|
||||
if (eps > 0.0)
|
||||
{
|
||||
kodis(ex, X, Y, Z, chi_state, chi_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, trK, trK_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gxx, gxx_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gxy, gxy_rhs, AAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gxz, gxz_rhs, ASA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gyy, gyy_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gyz, gyz_rhs, SAA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gzz, gzz_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Axx, Axx_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Axy, Axy_rhs, AAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Axz, Axz_rhs, ASA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Ayy, Ayy_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Ayz, Ayz_rhs, SAA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Azz, Azz_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Gamx, Gamx_rhs, ASS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Gamy, Gamy_rhs, SAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Gamz, Gamz_rhs, SSA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Lap, Lap_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, betax, betax_rhs, ASS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, betay, betay_rhs, SAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, betaz, betaz_rhs, SSA, Symmetry, eps);
|
||||
#if (GAUGE == 0)
|
||||
kodis(ex, X, Y, Z, dtSfx, dtSfx_rhs, ASS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, dtSfy, dtSfy_rhs, SAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, dtSfz, dtSfz_rhs, SSA, Symmetry, eps);
|
||||
#endif
|
||||
kodis(ex, X, Y, Z, TZ, TZ_rhs, SSS, Symmetry, eps);
|
||||
}
|
||||
|
||||
if (co == 0)
|
||||
{
|
||||
#if (ABV == 0)
|
||||
f_ricci_gamma(ex, X, Y, Z,
|
||||
chi_constraints,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Symmetry);
|
||||
#endif
|
||||
f_constraint_bssn(ex, X, Y, Z,
|
||||
chi_constraints, trK,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Lap, betax, betay, betaz, rho, Sx, Sy, Sz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
|
||||
Symmetry);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" int f_compute_rhs_Z4c(int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
double *chi, double *trK,
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
double *TZ,
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
double *TZ_rhs,
|
||||
double *rho, double *Sx, double *Sy, double *Sz,
|
||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
|
||||
int &Symmetry, int &Lev, double &eps, int &co)
|
||||
{
|
||||
return compute_rhs_z4c_cartesian(
|
||||
ex, T, X, Y, Z,
|
||||
chi, chi, trK,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Lap, betax, betay, betaz,
|
||||
dtSfx, dtSfy, dtSfz,
|
||||
TZ,
|
||||
chi_rhs, trK_rhs,
|
||||
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
|
||||
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
|
||||
Gamx_rhs, Gamy_rhs, Gamz_rhs,
|
||||
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
|
||||
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
|
||||
TZ_rhs,
|
||||
rho, Sx, Sy, Sz,
|
||||
Sxx, Sxy, Sxz, Syy, Syz, Szz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
|
||||
Symmetry, Lev, eps, co);
|
||||
}
|
||||
|
||||
extern "C" int f_compute_rhs_Z4cnot(int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
double *chi, double *trK,
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
double *TZ,
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
double *TZ_rhs,
|
||||
double *rho, double *Sx, double *Sy, double *Sz,
|
||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
|
||||
int &Symmetry, int &Lev, double &eps, int &co, double &chitiny)
|
||||
{
|
||||
const int all = ex[0] * ex[1] * ex[2];
|
||||
std::vector<double> chi_clamped(chi, chi + all);
|
||||
f_lowerboundset(ex, chi_clamped.data(), chitiny);
|
||||
|
||||
const int ret = compute_rhs_z4c_cartesian(
|
||||
ex, T, X, Y, Z,
|
||||
chi_clamped.data(), chi, trK,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Lap, betax, betay, betaz,
|
||||
dtSfx, dtSfy, dtSfz,
|
||||
TZ,
|
||||
chi_rhs, trK_rhs,
|
||||
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
|
||||
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
|
||||
Gamx_rhs, Gamy_rhs, Gamz_rhs,
|
||||
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
|
||||
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
|
||||
TZ_rhs,
|
||||
rho, Sx, Sy, Sz,
|
||||
Sxx, Sxy, Sxz, Syy, Syz, Szz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
|
||||
Symmetry, Lev, eps, co);
|
||||
|
||||
if (ret != 0 || co != 0)
|
||||
return ret;
|
||||
|
||||
#if (ABV == 0)
|
||||
f_ricci_gamma(ex, X, Y, Z,
|
||||
chi,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Symmetry);
|
||||
#endif
|
||||
f_constraint_bssn(ex, X, Y, Z,
|
||||
chi, trK,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Lap, betax, betay, betaz, rho, Sx, Sy, Sz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
|
||||
Symmetry);
|
||||
return ret;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,83 +0,0 @@
|
||||
#ifndef Z4C_RHS_CUDA_H
|
||||
#define Z4C_RHS_CUDA_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
Z4C_CUDA_STATE_COUNT = 25
|
||||
};
|
||||
|
||||
int z4c_cuda_rk4_substep(void *block_tag,
|
||||
int *ex, double *X, double *Y, double *Z,
|
||||
double **state_host_in,
|
||||
double **state_host_out,
|
||||
const double *propspeed,
|
||||
const double *soa_flat,
|
||||
const double *bbox,
|
||||
double &dT,
|
||||
double &T,
|
||||
int &RK4,
|
||||
int &apply_bam_bc,
|
||||
int &Symmetry,
|
||||
int &Lev,
|
||||
double &eps,
|
||||
int &co,
|
||||
int &keep_resident_state,
|
||||
int &apply_enforce_ga,
|
||||
double &chitiny);
|
||||
|
||||
int z4c_cuda_download_resident_state(void *block_tag,
|
||||
int *ex,
|
||||
double **state_host_out);
|
||||
|
||||
int z4c_cuda_pack_state_region_to_host_buffer(void *block_tag,
|
||||
int state_index,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int z4c_cuda_unpack_state_region_from_host_buffer(void *block_tag,
|
||||
int state_index,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int z4c_cuda_pack_state_batch_to_host_buffer(void *block_tag,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int z4c_cuda_unpack_state_batch_from_host_buffer(void *block_tag,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int z4c_cuda_download_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
const int *state_indices,
|
||||
double **state_host_out);
|
||||
|
||||
int z4c_cuda_upload_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
const int *state_indices,
|
||||
double **state_host_in);
|
||||
|
||||
int z4c_cuda_has_resident_state(void *block_tag);
|
||||
|
||||
void z4c_cuda_release_step_ctx(void *block_tag);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -144,67 +144,11 @@ def generate_macrodef_h():
|
||||
print( "#define REGLEV 0", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
# Define fine-grained timing/debug macros.
|
||||
# All of them default to OFF so production builds do not pay profiling overhead.
|
||||
|
||||
fine_timing = getattr(input_data, "Fine_Timing",
|
||||
getattr(input_data, "Finegrained_Timing", "no"))
|
||||
kernel_fine_timing = getattr(input_data, "Kernel_Fine_Timing",
|
||||
getattr(input_data, "BSSN_Kernel_Fine_Timing", "no"))
|
||||
stdin_abort_poll = getattr(input_data, "Enable_Stdin_Abort_Poll",
|
||||
getattr(input_data, "Stdin_Abort_Poll", "no"))
|
||||
timing_report_every = max(1, int(getattr(
|
||||
input_data, "Timing_Every_Steps",
|
||||
getattr(input_data, "Timing_Report_Every", 1))))
|
||||
timing_top_hotspots = max(1, int(getattr(
|
||||
input_data, "Timing_Top_Hotspots", 8)))
|
||||
|
||||
if ( fine_timing == "yes" ):
|
||||
print( "#define BSSN_FINE_TIMING 1", file=file1 )
|
||||
print( file=file1 )
|
||||
elif ( fine_timing == "no" ):
|
||||
print( "#define BSSN_FINE_TIMING 0", file=file1 )
|
||||
print( file=file1 )
|
||||
else:
|
||||
print( "Fine_Timing setting error!!!" )
|
||||
print()
|
||||
print( "# Fine_Timing setting error!!!", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
print( f"#define BSSN_FINE_TIMING_EVERY {timing_report_every}", file=file1 )
|
||||
print( file=file1 )
|
||||
print( f"#define BSSN_FINE_TIMING_TOPN {timing_top_hotspots}", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
if ( kernel_fine_timing == "yes" ):
|
||||
print( "#define BSSN_KERNEL_FINE_TIMING 1", file=file1 )
|
||||
print( file=file1 )
|
||||
elif ( kernel_fine_timing == "no" ):
|
||||
print( "#define BSSN_KERNEL_FINE_TIMING 0", file=file1 )
|
||||
print( file=file1 )
|
||||
else:
|
||||
print( "Kernel_Fine_Timing setting error!!!" )
|
||||
print()
|
||||
print( "# Kernel_Fine_Timing setting error!!!", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
if ( stdin_abort_poll == "yes" ):
|
||||
print( "#define BSSN_ENABLE_STDIN_ABORT_POLL 1", file=file1 )
|
||||
print( file=file1 )
|
||||
elif ( stdin_abort_poll == "no" ):
|
||||
print( "#define BSSN_ENABLE_STDIN_ABORT_POLL 0", file=file1 )
|
||||
print( file=file1 )
|
||||
else:
|
||||
print( "Enable_Stdin_Abort_Poll setting error!!!" )
|
||||
print()
|
||||
print( "# Enable_Stdin_Abort_Poll setting error!!!", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
# Define macro USE_GPU
|
||||
# use GPU or not
|
||||
|
||||
if ( input_data.GPU_Calculation == "yes"):
|
||||
print( "//#define USE_GPU", file=file1 )
|
||||
print( "#define USE_GPU", file=file1 )
|
||||
print( file=file1 )
|
||||
elif ( input_data.GPU_Calculation == "no"):
|
||||
print( "//#define USE_GPU", file=file1 )
|
||||
@@ -280,21 +224,6 @@ def generate_macrodef_h():
|
||||
print( "// 0: for every level;", file=file1 )
|
||||
print( "// 1: for all", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_FINE_TIMING", file=file1 )
|
||||
print( "// enable fine-grained per-timestep timing monitor", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_FINE_TIMING_EVERY", file=file1 )
|
||||
print( "// report timing every N coarse timesteps", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_FINE_TIMING_TOPN", file=file1 )
|
||||
print( "// number of hottest timing buckets shown in stdout", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_KERNEL_FINE_TIMING", file=file1 )
|
||||
print( "// enable split timing inside compute_rhs_bssn", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_ENABLE_STDIN_ABORT_POLL", file=file1 )
|
||||
print( "// poll stdin and broadcast abort flag every coarse step", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define USE_GPU", file=file1 )
|
||||
print( "// use gpu or not", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
|
||||
import AMSS_NCKU_Input as input_data
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
@@ -54,6 +55,44 @@ NUMACTL_CPU_BIND = get_last_n_cores_per_socket(n=32)
|
||||
BUILD_JOBS = 64
|
||||
|
||||
|
||||
def build_abe_runtime_env():
|
||||
"""Inject OpenMP runtime settings only for the main ABE evolution run."""
|
||||
runtime_env = os.environ.copy()
|
||||
omp_threads = max(1, int(getattr(input_data, "OMP_Threads", 1)))
|
||||
runtime_env["OMP_NUM_THREADS"] = str(omp_threads)
|
||||
return runtime_env
|
||||
|
||||
|
||||
def build_twopuncture_runtime_env():
|
||||
"""Let TwoPunctureABE use the runtime default instead of the ABE OMP override."""
|
||||
runtime_env = os.environ.copy()
|
||||
runtime_env.pop("OMP_NUM_THREADS", None)
|
||||
runtime_env.pop("OMP_THREAD_LIMIT", None)
|
||||
return runtime_env
|
||||
|
||||
|
||||
def build_mpi_launch_args():
|
||||
"""Build optional host-distribution arguments for mpirun."""
|
||||
hosts = list(getattr(input_data, "MPI_hosts", []))
|
||||
ppn = int(getattr(input_data, "MPI_processes_per_node", 0))
|
||||
|
||||
if not hosts:
|
||||
return ""
|
||||
|
||||
if ppn > 0:
|
||||
expected = len(hosts) * ppn
|
||||
if int(input_data.MPI_processes) != expected:
|
||||
raise ValueError(
|
||||
f"MPI_processes={input_data.MPI_processes} does not match "
|
||||
f"len(MPI_hosts) * MPI_processes_per_node = {expected}"
|
||||
)
|
||||
|
||||
launch_args = f"-hosts {','.join(hosts)}"
|
||||
if ppn > 0:
|
||||
launch_args += f" -ppn {ppn}"
|
||||
return launch_args
|
||||
|
||||
|
||||
##################################################################
|
||||
|
||||
|
||||
@@ -70,9 +109,9 @@ def makefile_ABE():
|
||||
|
||||
## Build command with CPU binding to nohz_full cores
|
||||
if (input_data.GPU_Calculation == "no"):
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=off USE_CUDA_BSSN=0 USE_CUDA_Z4C=0 ABE"
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=off ABE"
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=off USE_CUDA_BSSN=1 USE_CUDA_Z4C=1 ABE_CUDA"
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
|
||||
else:
|
||||
print( " CPU/GPU numerical calculation setting is wrong " )
|
||||
print( )
|
||||
@@ -143,19 +182,38 @@ def run_ABE():
|
||||
print( )
|
||||
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
|
||||
print( )
|
||||
print( f" MPI processes = {input_data.MPI_processes}, OMP threads per process = {max(1, int(getattr(input_data, 'OMP_Threads', 1)))}" )
|
||||
if getattr(input_data, "MPI_hosts", []):
|
||||
print( f" MPI hosts = {getattr(input_data, 'MPI_hosts', [])}, MPI ranks per node = {int(getattr(input_data, 'MPI_processes_per_node', 0))}" )
|
||||
print( " Multi-node runs require the working directory to be visible on all MPI hosts. " )
|
||||
print( )
|
||||
|
||||
## Define the command to run; cast other values to strings as needed
|
||||
mpi_launch_args = build_mpi_launch_args()
|
||||
|
||||
if (input_data.GPU_Calculation == "no"):
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun "
|
||||
if mpi_launch_args:
|
||||
mpi_command += mpi_launch_args + " "
|
||||
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABE"
|
||||
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||
mpi_command_outfile = "ABE_out.log"
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE_CUDA"
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun "
|
||||
if mpi_launch_args:
|
||||
mpi_command += mpi_launch_args + " "
|
||||
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||
mpi_command_outfile = "ABEGPU_out.log"
|
||||
|
||||
## Execute the MPI command and stream output
|
||||
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||
mpi_process = subprocess.Popen(
|
||||
mpi_command,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
env=build_abe_runtime_env(),
|
||||
)
|
||||
|
||||
## Write ABE run output to file while printing to stdout
|
||||
with open(mpi_command_outfile, 'w') as file0:
|
||||
@@ -195,7 +253,14 @@ def run_TwoPunctureABE():
|
||||
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
|
||||
|
||||
## Execute the command with subprocess.Popen and stream output
|
||||
TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||
TwoPuncture_process = subprocess.Popen(
|
||||
TwoPuncture_command,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
env=build_twopuncture_runtime_env(),
|
||||
)
|
||||
|
||||
## Write TwoPunctureABE run output to file while printing to stdout
|
||||
with open(TwoPuncture_command_outfile, 'w') as file0:
|
||||
|
||||
24
setup.py
24
setup.py
@@ -65,10 +65,14 @@ def print_input_data( File_directory ):
|
||||
|
||||
print( "------------------------------------------------------------------------------------------" )
|
||||
print( )
|
||||
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation " )
|
||||
print( )
|
||||
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes )
|
||||
print( )
|
||||
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation " )
|
||||
print( )
|
||||
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes )
|
||||
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads )
|
||||
if getattr(input_data, "MPI_hosts", []):
|
||||
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts )
|
||||
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node )
|
||||
print( )
|
||||
print( " The form of computational equation = ", input_data.Equation_Class )
|
||||
print( " The initial data in this simulation = ", input_data.Initial_Data_Method )
|
||||
print( )
|
||||
@@ -140,10 +144,14 @@ def print_input_data( File_directory ):
|
||||
file0 = open(filepath, 'w')
|
||||
|
||||
print( file=file0 )
|
||||
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation ", file=file0 )
|
||||
print( file=file0 )
|
||||
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 )
|
||||
print( file=file0 )
|
||||
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation ", file=file0 )
|
||||
print( file=file0 )
|
||||
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 )
|
||||
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads, file=file0 )
|
||||
if getattr(input_data, "MPI_hosts", []):
|
||||
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts, file=file0 )
|
||||
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node, file=file0 )
|
||||
print( file=file0 )
|
||||
print( " The form of computational equation = ", input_data.Equation_Class, file=file0 )
|
||||
print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 )
|
||||
print( file=file0 )
|
||||
|
||||
Reference in New Issue
Block a user