[TEST]UPSTREAM: Pick some source changes from 48080d0a97

* Sync new folder structure
UPSTREAM: Pick source changes from a5b2dd9e3c
2026-04-23 20:55:40 +08:00 · 2026-04-23 20:18:44 +08:00 · 2026-04-23 20:10:12 +08:00 · 2026-04-15 00:49:46 +08:00 · 2026-04-13 16:51:06 +08:00 · 2026-04-13 15:55:43 +08:00
233 changed files with 196637 additions and 190114 deletions
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
--- a/AMSS_NCKU_Program.py
+++ b/AMSS_NCKU_Program.py
@@ -8,6 +8,14 @@
 ##
 ##################################################################

+## Guard against re-execution by multiprocessing child processes.
+## Without this, using 'spawn' or 'forkserver' context would cause every
+## worker to re-run the entire script, spawning exponentially more
+## workers (fork bomb).
+if __name__ != '__main__':
+    import sys as _sys
+    _sys.exit(0)
+

 ##################################################################

@@ -118,12 +126,7 @@ setup.generate_AMSSNCKU_input()
 #inputvalue = input()  ## Wait for user input (press Enter) to proceed
 #print()

-setup.print_puncture_information()
-
-
-##################################################################
-
-## Generate AMSS-NCKU program input files based on the configured parameters
+## Generate AMSS-NCKU program input files based on the configured parameters

 print(                                                                                   )
 print( " Generating the AMSS-NCKU input parfile for the ABE executable. " ) 
@@ -262,6 +265,12 @@ if not os.path.exists( ABE_file ):
 ## Copy the executable ABE (or ABEGPU) into the run directory
 shutil.copy2(ABE_file, output_directory)

+## Copy interp load balance profile if present (for optimize pass)
+interp_lb_profile = os.path.join(AMSS_NCKU_source_copy, "interp_lb_profile.bin")
+if os.path.exists(interp_lb_profile):
+    shutil.copy2(interp_lb_profile, output_directory)
+    print( " Copied interp_lb_profile.bin to run directory " )
+
 ###########################

 ## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
@@ -298,7 +307,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
    
    import generate_TwoPuncture_input
    
-    generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input()
+    generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input(numerical_grid.puncture_data)
    
    print(                                                                                              )
    print( " The input parfile for the TwoPunctureABE executable has been generated. " )
@@ -340,7 +349,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):

 import renew_puncture_parameter
    
-renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory)
+renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory, numerical_grid.puncture_data)


 ## Generated AMSS-NCKU input filename
@@ -424,26 +433,31 @@ print(

 import plot_xiaoqu
 import plot_GW_strain_amplitude_xiaoqu
+from parallel_plot_helper import run_plot_tasks_parallel
+
+plot_tasks = []

 ## Plot black hole trajectory
-plot_xiaoqu.generate_puncture_orbit_plot(   binary_results_directory, figure_directory )
-plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
+plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot,   (binary_results_directory, figure_directory) ) )
+plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) )

 ## Plot black hole separation vs. time
-plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
+plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) )

 ## Plot gravitational waveforms (psi4 and strain amplitude)
 for i in range(input_data.Detector_Number):
-    plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
-    plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
+    plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) )
+    plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) )

 ## Plot ADM mass evolution
 for i in range(input_data.Detector_Number):
-    plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
+    plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) )

 ## Plot Hamiltonian constraint violation over time
 for i in range(input_data.grid_level):
-    plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
+    plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) )
+
+run_plot_tasks_parallel(plot_tasks)

 ## Plot stored binary data
 plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
--- a/AMSS_NCKU_Verify_ASC26.py
+++ b/AMSS_NCKU_Verify_ASC26.py
@@ -1,10 +1,19 @@
 #!/usr/bin/env python3
 """
-AMSS-NCKU GW150914 Simulation Regression Test Script
+AMSS-NCKU GW150914 Simulation Regression Test Script (Comprehensive Version)

-Verification Requirements:
-1. XY-plane trajectory RMS error < 1% (Optimized vs. baseline, max of BH1 and BH2)
-2. ADM constraint violation < 2 (Grid Level 0)
+Verification Requirements:
+1. RMS errors < 1% for:
+   - 3D Vector Total RMS
+   - X Component RMS
+   - Y Component RMS
+   - Z Component RMS
+2. ADM constraint violation < 2 (Grid Level 0)
+3. The following figure PDFs must match GW150914-origin exactly after rasterization:
+   - ADM_Constraint_Grid_Level_0.pdf
+   - BH_Trajectory_21_XY.pdf
+   - BH_Trajectory_XY.pdf
+   The script also reports the percentage of differing pixels for each figure.

 RMS Calculation Method:
 - Computes trajectory deviation on the XY plane independently for BH1 and BH2
@@ -16,9 +25,13 @@ Default: output_dir = GW150914/AMSS_NCKU_output
 Reference: GW150914-origin (baseline simulation)
 """

-import numpy as np
-import sys
-import os
+import numpy as np
+import sys
+import os
+import shutil
+import subprocess
+import tempfile
+from PIL import Image

 # ANSI Color Codes
 class Color:
@@ -45,91 +58,200 @@ def load_bh_trajectory(filepath):
    }


-def load_constraint_data(filepath):
-    """Load constraint violation data"""
-    data = []
+def load_constraint_data(filepath):
+    """Load constraint violation data"""
+    data = []
    with open(filepath, 'r') as f:
        for line in f:
            if line.startswith('#'):
                continue
            parts = line.split()
            if len(parts) >= 8:
-                data.append([float(x) for x in parts[:8]])
-    return np.array(data)
+                data.append([float(x) for x in parts[:8]])
+    return np.array(data)
+
+
+def resolve_figure_dir(path):
+    """Resolve the sibling figure directory from an output or figure path."""
+    normalized = os.path.normpath(path)
+    if os.path.basename(normalized) == "figure":
+        return normalized
+    return os.path.join(os.path.dirname(normalized), "figure")
+
+
+def render_pdf_to_images(pdf_path, dpi=150):
+    """Render a PDF to RGB images using Ghostscript."""
+    gs_path = shutil.which("gs")
+    if gs_path is None:
+        raise RuntimeError("Ghostscript executable 'gs' was not found in PATH")
+
+    with tempfile.TemporaryDirectory(prefix="amss_verify_pdf_") as temp_dir:
+        output_pattern = os.path.join(temp_dir, "page-%03d.ppm")
+        cmd = [
+            gs_path,
+            "-q",
+            "-dSAFER",
+            "-dBATCH",
+            "-dNOPAUSE",
+            "-sDEVICE=ppmraw",
+            f"-r{dpi}",
+            f"-o{output_pattern}",
+            pdf_path
+        ]
+
+        try:
+            subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
+        except subprocess.CalledProcessError as exc:
+            message = exc.stderr.strip() or str(exc)
+            raise RuntimeError(f"Failed to render PDF '{pdf_path}': {message}") from exc
+
+        ppm_files = sorted(
+            os.path.join(temp_dir, filename)
+            for filename in os.listdir(temp_dir)
+            if filename.endswith(".ppm")
+        )
+
+        if not ppm_files:
+            raise RuntimeError(f"No rendered pages were produced for '{pdf_path}'")
+
+        images = []
+        for ppm_file in ppm_files:
+            with Image.open(ppm_file) as img:
+                images.append(np.array(img.convert("RGB"), dtype=np.uint8))
+
+        return images
+
+
+def compare_rendered_pages(ref_img, target_img):
+    """Return (different_pixels, total_pixels) for two rendered RGB pages."""
+    ref_h, ref_w = ref_img.shape[:2]
+    tgt_h, tgt_w = target_img.shape[:2]
+    total_pixels = max(ref_h, tgt_h) * max(ref_w, tgt_w)
+
+    if ref_h == tgt_h and ref_w == tgt_w:
+        different_pixels = int(np.count_nonzero(np.any(ref_img != target_img, axis=2)))
+        return different_pixels, total_pixels
+
+    diff_mask = np.ones((max(ref_h, tgt_h), max(ref_w, tgt_w)), dtype=bool)
+    overlap_h = min(ref_h, tgt_h)
+    overlap_w = min(ref_w, tgt_w)
+    overlap_diff = np.any(ref_img[:overlap_h, :overlap_w] != target_img[:overlap_h, :overlap_w], axis=2)
+    diff_mask[:overlap_h, :overlap_w] = overlap_diff
+    different_pixels = int(np.count_nonzero(diff_mask))
+    return different_pixels, total_pixels
+
+
+def compare_pdf_images(ref_pdf, target_pdf, dpi=150, threshold_percent=0.001):
+    """Compare two PDFs by rasterizing them and counting differing pixels."""
+    ref_pages = render_pdf_to_images(ref_pdf, dpi=dpi)
+    target_pages = render_pdf_to_images(target_pdf, dpi=dpi)
+
+    total_pixels = 0
+    different_pixels = 0
+    max_pages = max(len(ref_pages), len(target_pages))
+
+    for page_idx in range(max_pages):
+        if page_idx < len(ref_pages) and page_idx < len(target_pages):
+            page_diff, page_total = compare_rendered_pages(ref_pages[page_idx], target_pages[page_idx])
+        else:
+            existing_page = ref_pages[page_idx] if page_idx < len(ref_pages) else target_pages[page_idx]
+            page_total = existing_page.shape[0] * existing_page.shape[1]
+            page_diff = page_total
+
+        total_pixels += page_total
+        different_pixels += page_diff
+
+    diff_percent = (different_pixels / total_pixels * 100.0) if total_pixels else 0.0
+    return {
+        "different_pixels": different_pixels,
+        "total_pixels": total_pixels,
+        "diff_percent": diff_percent,
+        "pages_ref": len(ref_pages),
+        "pages_target": len(target_pages),
+        "passed": diff_percent < threshold_percent
+    }
+
+
+def compare_required_figures(reference_figure_dir, target_figure_dir):
+    """Compare the required GW150914 figure PDFs."""
+    figure_names = [
+        "ADM_Constraint_Grid_Level_0.pdf",
+        "BH_Trajectory_21_XY.pdf",
+        "BH_Trajectory_XY.pdf"
+    ]
+
+    results = []
+    for figure_name in figure_names:
+        ref_pdf = os.path.join(reference_figure_dir, figure_name)
+        target_pdf = os.path.join(target_figure_dir, figure_name)
+
+        if not os.path.exists(ref_pdf):
+            raise FileNotFoundError(f"Reference figure not found: {ref_pdf}")
+        if not os.path.exists(target_pdf):
+            raise FileNotFoundError(f"Target figure not found: {target_pdf}")
+
+        comparison = compare_pdf_images(ref_pdf, target_pdf)
+        comparison["name"] = figure_name
+        results.append(comparison)
+
+    return results

-
-def calculate_rms_error(bh_data_ref, bh_data_target):
+def calculate_all_rms_errors(bh_data_ref, bh_data_target):
    """
-    Calculate trajectory-based RMS error on the XY plane between baseline and optimized simulations.
-
-    This function computes the RMS error independently for BH1 and BH2 trajectories,
-    then returns the maximum of the two as the final RMS error metric.
-
-    For each black hole, the RMS is calculated as:
-        RMS = sqrt( (1/M) * sum( (Δr_i / r_i^max)^2 ) ) × 100%
-
-    where:
-        Δr_i = sqrt((x_ref,i - x_new,i)^2 + (y_ref,i - y_new,i)^2)
-        r_i^max = max(sqrt(x_ref,i^2 + y_ref,i^2), sqrt(x_new,i^2 + y_new,i^2))
-
-    Args:
-        bh_data_ref: Reference (baseline) trajectory data
-        bh_data_target: Target (optimized) trajectory data
-
-    Returns:
-        rms_value: Final RMS error as a percentage (max of BH1 and BH2)
-        error: Error message if any
+    Calculate 3D Vector RMS and component-wise RMS (X, Y, Z) independently.
+    Uses r = sqrt(x^2 + y^2) as the denominator for all error normalizations.
+    Returns the maximum error between BH1 and BH2 for each category.
    """
-    # Align data: truncate to the length of the shorter dataset
    M = min(len(bh_data_ref['time']), len(bh_data_target['time']))

    if M < 10:
        return None, "Insufficient data points for comparison"

-    # Extract XY coordinates for both black holes
-    x1_ref = bh_data_ref['x1'][:M]
-    y1_ref = bh_data_ref['y1'][:M]
-    x2_ref = bh_data_ref['x2'][:M]
-    y2_ref = bh_data_ref['y2'][:M]
+    results = {}

-    x1_new = bh_data_target['x1'][:M]
-    y1_new = bh_data_target['y1'][:M]
-    x2_new = bh_data_target['x2'][:M]
-    y2_new = bh_data_target['y2'][:M]
+    for bh in ['1', '2']:
+        x_r, y_r, z_r = bh_data_ref[f'x{bh}'][:M], bh_data_ref[f'y{bh}'][:M], bh_data_ref[f'z{bh}'][:M]
+        x_n, y_n, z_n = bh_data_target[f'x{bh}'][:M], bh_data_target[f'y{bh}'][:M], bh_data_target[f'z{bh}'][:M]

-    # Calculate RMS for BH1
-    delta_r1 = np.sqrt((x1_ref - x1_new)**2 + (y1_ref - y1_new)**2)
-    r1_ref = np.sqrt(x1_ref**2 + y1_ref**2)
-    r1_new = np.sqrt(x1_new**2 + y1_new**2)
-    r1_max = np.maximum(r1_ref, r1_new)
+        # 核心修改：根据组委会的邮件指示，分母统一使用 r = sqrt(x^2 + y^2)
+        r_ref = np.sqrt(x_r**2 + y_r**2)
+        r_new = np.sqrt(x_n**2 + y_n**2)
+        denom_max = np.maximum(r_ref, r_new)

-    # Calculate RMS for BH2
-    delta_r2 = np.sqrt((x2_ref - x2_new)**2 + (y2_ref - y2_new)**2)
-    r2_ref = np.sqrt(x2_ref**2 + y2_ref**2)
-    r2_new = np.sqrt(x2_new**2 + y2_new**2)
-    r2_max = np.maximum(r2_ref, r2_new)
+        valid = denom_max > 1e-15
+        if np.sum(valid) < 10:
+            results[f'BH{bh}'] = { '3D_Vector': 0.0, 'X_Component': 0.0, 'Y_Component': 0.0, 'Z_Component': 0.0 }
+            continue

-    # Avoid division by zero for BH1
-    valid_mask1 = r1_max > 1e-15
-    if np.sum(valid_mask1) < 10:
-        return None, "Insufficient valid data points for BH1"
+        def calc_rms(delta):
+            # 将对应分量的偏差除以统一的轨道半径分母 denom_max
+            return np.sqrt(np.mean((delta[valid] / denom_max[valid])**2)) * 100

-    terms1 = (delta_r1[valid_mask1] / r1_max[valid_mask1])**2
-    rms_bh1 = np.sqrt(np.mean(terms1)) * 100
+        # 1. Total 3D Vector RMS
+        delta_vec = np.sqrt((x_r - x_n)**2 + (y_r - y_n)**2 + (z_r - z_n)**2)
+        rms_3d = calc_rms(delta_vec)

-    # Avoid division by zero for BH2
-    valid_mask2 = r2_max > 1e-15
-    if np.sum(valid_mask2) < 10:
-        return None, "Insufficient valid data points for BH2"
+        # 2. Component-wise RMS (分离计算各轴，但共用半径分母)
+        rms_x = calc_rms(np.abs(x_r - x_n))
+        rms_y = calc_rms(np.abs(y_r - y_n))
+        rms_z = calc_rms(np.abs(z_r - z_n))

-    terms2 = (delta_r2[valid_mask2] / r2_max[valid_mask2])**2
-    rms_bh2 = np.sqrt(np.mean(terms2)) * 100
+        results[f'BH{bh}'] = {
+            '3D_Vector': rms_3d,
+            'X_Component': rms_x,
+            'Y_Component': rms_y,
+            'Z_Component': rms_z
+        }

-    # Final RMS is the maximum of BH1 and BH2
-    rms_final = max(rms_bh1, rms_bh2)
-
-    return rms_final, None
+    # 获取 BH1 和 BH2 中的最大误差
+    max_rms = {
+        '3D_Vector': max(results['BH1']['3D_Vector'], results['BH2']['3D_Vector']),
+        'X_Component': max(results['BH1']['X_Component'], results['BH2']['X_Component']),
+        'Y_Component': max(results['BH1']['Y_Component'], results['BH2']['Y_Component']),
+        'Z_Component': max(results['BH1']['Z_Component'], results['BH2']['Z_Component'])
+    }

+    return max_rms, None

 def analyze_constraint_violation(constraint_data, n_levels=9):
    """
@@ -155,34 +277,32 @@ def analyze_constraint_violation(constraint_data, n_levels=9):


 def print_header():
-    """Print report header"""
    print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
-    print(Color.BOLD + "   AMSS-NCKU GW150914 Simulation Regression Test Report" + Color.RESET)
+    print(Color.BOLD + "   AMSS-NCKU GW150914 Comprehensive Regression Test" + Color.RESET)
    print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)

-
-def print_rms_results(rms_rel, error, threshold=1.0):
-    """Print RMS error results"""
-    print(f"\n{Color.BOLD}1. RMS Error Analysis (Baseline vs Optimized){Color.RESET}")
-    print("-" * 45)
+def print_rms_results(rms_dict, error, threshold=1.0):
+    print(f"\n{Color.BOLD}1. RMS Error Analysis (Maximums of BH1 & BH2){Color.RESET}")
+    print("-" * 65)

    if error:
        print(f"   {Color.RED}Error: {error}{Color.RESET}")
        return False

-    passed = rms_rel < threshold
+    all_passed = True
+    print(f"   Requirement: < {threshold}%\n")

-    print(f"   RMS relative error: {rms_rel:.4f}%")
-    print(f"   Requirement:        < {threshold}%")
-    print(f"   Status:             {get_status_text(passed)}")
+    for key, val in rms_dict.items():
+        passed = val < threshold
+        all_passed = all_passed and passed
+        status = get_status_text(passed)
+        print(f"   {key:15}: {val:8.4f}%   |   Status: {status}")

-    return passed
+    return all_passed

-
-def print_constraint_results(results, threshold=2.0):
-    """Print constraint violation results"""
+def print_constraint_results(results, threshold=2.0):
    print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}")
-    print("-" * 45)
+    print("-" * 65)

    names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz']
    for i, name in enumerate(names):
@@ -195,23 +315,49 @@ def print_constraint_results(results, threshold=2.0):
    print(f"\n   Maximum violation:  {results['max_violation']:.6f}")
    print(f"   Requirement:        < {threshold}")
    print(f"   Status:             {get_status_text(passed)}")
-
-    return passed
-
-
-def print_summary(rms_passed, constraint_passed):
-    """Print summary"""
-    print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
-    print(Color.BOLD + "Verification Summary" + Color.RESET)
-    print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
-
-    all_passed = rms_passed and constraint_passed
-    
-    res_rms = get_status_text(rms_passed)
-    res_con = get_status_text(constraint_passed)
-
-    print(f"   [1] RMS trajectory check:         {res_rms}")
-    print(f"   [2] ADM constraint check:         {res_con}")
+
+    return passed
+
+
+def print_figure_results(results, threshold_percent=0.001):
+    print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
+    print("-" * 65)
+    print(f"   Requirement: < {threshold_percent:.3f}% differing pixels\n")
+
+    all_passed = True
+    for result in results:
+        passed = result["passed"]
+        all_passed = all_passed and passed
+        status = get_status_text(passed)
+        print(f"   {result['name']:32}: {result['diff_percent']:10.6f}%   |   Status: {status}")
+
+        if result["pages_ref"] != result["pages_target"]:
+            print(f"   {'':32}  pages(ref/target): {result['pages_ref']}/{result['pages_target']}")
+
+    return all_passed
+
+
+def print_figure_error(error_message):
+    print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
+    print("-" * 65)
+    print(f"   {Color.RED}Error: {error_message}{Color.RESET}")
+    return False
+
+
+def print_summary(rms_passed, constraint_passed, figure_passed):
+    print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
+    print(Color.BOLD + "Verification Summary" + Color.RESET)
+    print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
+
+    all_passed = rms_passed and constraint_passed and figure_passed
+    
+    res_rms = get_status_text(rms_passed)
+    res_con = get_status_text(constraint_passed)
+    res_fig = get_status_text(figure_passed)
+
+    print(f"   [1] Comprehensive RMS check:      {res_rms}")
+    print(f"   [2] ADM constraint check:         {res_con}")
+    print(f"   [3] Figure pixel comparison:      {res_fig}")
    
    final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
    print(f"\n   Overall result: {final_status}")
@@ -219,61 +365,58 @@ def print_summary(rms_passed, constraint_passed):

    return all_passed

-
 def main():
-    # Determine target (optimized) output directory
    if len(sys.argv) > 1:
        target_dir = sys.argv[1]
    else:
        script_dir = os.path.dirname(os.path.abspath(__file__))
        target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")

-    # Determine reference (baseline) directory
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
+    target_figure_dir = resolve_figure_dir(target_dir)
+    reference_figure_dir = os.path.join(script_dir, "GW150914-origin/figure")
+
+    bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
+    bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
+    constraint_file = os.path.join(target_dir, "bssn_constraint.dat")

-    # Data file paths
-    bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
-    bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
-    constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
-
-    # Check if files exist
    if not os.path.exists(bh_file_ref):
        print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}")
        sys.exit(1)
-
    if not os.path.exists(bh_file_target):
        print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}")
        sys.exit(1)
-
    if not os.path.exists(constraint_file):
        print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}")
        sys.exit(1)

-    # Print header
-    print_header()
-    print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
-    print(f"{Color.BOLD}Target (Optimized):  {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
+    print_header()
+    print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
+    print(f"{Color.BOLD}Target (Optimized):  {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
+    print(f"{Color.BOLD}Reference Figures:   {Color.RESET} {Color.BLUE}{reference_figure_dir}{Color.RESET}")
+    print(f"{Color.BOLD}Target Figures:      {Color.RESET} {Color.BLUE}{target_figure_dir}{Color.RESET}")

-    # Load data
    bh_data_ref = load_bh_trajectory(bh_file_ref)
    bh_data_target = load_bh_trajectory(bh_file_target)
    constraint_data = load_constraint_data(constraint_file)

-    # Calculate RMS error
-    rms_rel, error = calculate_rms_error(bh_data_ref, bh_data_target)
-    rms_passed = print_rms_results(rms_rel, error)
-
-    # Analyze constraint violation
-    constraint_results = analyze_constraint_violation(constraint_data)
-    constraint_passed = print_constraint_results(constraint_results)
-
-    # Print summary
-    all_passed = print_summary(rms_passed, constraint_passed)
-
-    # Return exit code
-    sys.exit(0 if all_passed else 1)
+    # Output modified RMS results
+    rms_dict, error = calculate_all_rms_errors(bh_data_ref, bh_data_target)
+    rms_passed = print_rms_results(rms_dict, error)

+    # Output constraint results
+    constraint_results = analyze_constraint_violation(constraint_data)
+    constraint_passed = print_constraint_results(constraint_results)
+
+    try:
+        figure_results = compare_required_figures(reference_figure_dir, target_figure_dir)
+        figure_passed = print_figure_results(figure_results)
+    except (FileNotFoundError, RuntimeError) as exc:
+        figure_passed = print_figure_error(str(exc))
+
+    all_passed = print_summary(rms_passed, constraint_passed, figure_passed)
+    sys.exit(0 if all_passed else 1)

 if __name__ == "__main__":
    main()
--- a/AMSS_NCKU_source/AHF_Direct/BH_diagnostics.C
+++ b/AMSS_NCKU_source/AHF_Direct/BH_diagnostics.C
--- a/AMSS_NCKU_source/AHF_Direct/BH_diagnostics.h
+++ b/AMSS_NCKU_source/AHF_Direct/BH_diagnostics.h
@@ -1,101 +1,101 @@
-#ifndef BH_DIAGNOSTICS_H
-#define BH_DIAGNOSTICS_H
-namespace AHFinderDirect
-{
-
-	struct BH_diagnostics
-	{
-	public:
-		// mean x,y,z
-		fp centroid_x, centroid_y, centroid_z;
-
-		// these are quadrupole moments about the centroid, i.e.
-		// mean(xi*xj) - centroid_i*centroid_j
-		fp quadrupole_xx, quadrupole_xy, quadrupole_xz,
-			quadrupole_yy, quadrupole_yz,
-			quadrupole_zz;
-
-		// min,max,mean surface radius about local coordinate origin
-		fp min_radius, max_radius, mean_radius;
-
-		// xyz bounding box
-		fp min_x, max_x,
-			min_y, max_y,
-			min_z, max_z;
-
-		// proper circumference
-		// (computed using induced metric along these local-coordinate planes)
-		fp circumference_xy,
-			circumference_xz,
-			circumference_yz;
-
-		// surface area (computed using induced metric)
-		// and quantities derived from it
-		fp area, irreducible_mass, areal_radius;
-
-		double Px, Py, Pz, Sx, Sy, Sz;
-
-	public:
-		// position of diagnostics in buffer and number of diagnostics
-		enum
-		{
-			posn__centroid_x = 0,
-			posn__centroid_y,
-			posn__centroid_z,
-			posn__quadrupole_xx,
-			posn__quadrupole_xy,
-			posn__quadrupole_xz,
-			posn__quadrupole_yy,
-			posn__quadrupole_yz,
-			posn__quadrupole_zz,
-			posn__min_radius,
-			posn__max_radius,
-			posn__mean_radius,
-
-			posn__min_x,
-			posn__max_x,
-			posn__min_y,
-			posn__max_y,
-			posn__min_z,
-			posn__max_z,
-
-			posn__circumference_xy,
-			posn__circumference_xz,
-			posn__circumference_yz,
-
-			posn__area,
-			posn__irreducible_mass,
-			posn__areal_radius,
-
-			N_buffer // no comma	// size of buffer
-		};
-
-		// copy diagnostics to/from buffer
-		void copy_to_buffer(double buffer[N_buffer]) const;
-		void copy_from_buffer(const double buffer[N_buffer]);
-
-	public:
-		void compute(patch_system &ps);
-
-		void compute_signature(patch_system &ps, const double dT);
-
-		FILE *setup_output_file(int N_horizons, int hn)
-			const;
-
-		void output(FILE *fileptr, double time)
-			const;
-
-		BH_diagnostics();
-
-	private:
-		static double surface_integral(const patch_system &ps,
-									   int src_gfn, bool src_gfn_is_even_across_xy_plane,
-									   bool src_gfn_is_even_across_xz_plane,
-									   bool src_gfn_is_even_across_yz_plane,
-									   enum patch::integration_method method);
-	};
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /* BH_DIAGNOSTICS_H */
+#ifndef BH_DIAGNOSTICS_H
+#define BH_DIAGNOSTICS_H
+namespace AHFinderDirect
+{
+
+	struct BH_diagnostics
+	{
+	public:
+		// mean x,y,z
+		fp centroid_x, centroid_y, centroid_z;
+
+		// these are quadrupole moments about the centroid, i.e.
+		// mean(xi*xj) - centroid_i*centroid_j
+		fp quadrupole_xx, quadrupole_xy, quadrupole_xz,
+			quadrupole_yy, quadrupole_yz,
+			quadrupole_zz;
+
+		// min,max,mean surface radius about local coordinate origin
+		fp min_radius, max_radius, mean_radius;
+
+		// xyz bounding box
+		fp min_x, max_x,
+			min_y, max_y,
+			min_z, max_z;
+
+		// proper circumference
+		// (computed using induced metric along these local-coordinate planes)
+		fp circumference_xy,
+			circumference_xz,
+			circumference_yz;
+
+		// surface area (computed using induced metric)
+		// and quantities derived from it
+		fp area, irreducible_mass, areal_radius;
+
+		double Px, Py, Pz, Sx, Sy, Sz;
+
+	public:
+		// position of diagnostics in buffer and number of diagnostics
+		enum
+		{
+			posn__centroid_x = 0,
+			posn__centroid_y,
+			posn__centroid_z,
+			posn__quadrupole_xx,
+			posn__quadrupole_xy,
+			posn__quadrupole_xz,
+			posn__quadrupole_yy,
+			posn__quadrupole_yz,
+			posn__quadrupole_zz,
+			posn__min_radius,
+			posn__max_radius,
+			posn__mean_radius,
+
+			posn__min_x,
+			posn__max_x,
+			posn__min_y,
+			posn__max_y,
+			posn__min_z,
+			posn__max_z,
+
+			posn__circumference_xy,
+			posn__circumference_xz,
+			posn__circumference_yz,
+
+			posn__area,
+			posn__irreducible_mass,
+			posn__areal_radius,
+
+			N_buffer // no comma	// size of buffer
+		};
+
+		// copy diagnostics to/from buffer
+		void copy_to_buffer(double buffer[N_buffer]) const;
+		void copy_from_buffer(const double buffer[N_buffer]);
+
+	public:
+		void compute(patch_system &ps);
+
+		void compute_signature(patch_system &ps, const double dT);
+
+		FILE *setup_output_file(int N_horizons, int hn)
+			const;
+
+		void output(FILE *fileptr, double time)
+			const;
+
+		BH_diagnostics();
+
+	private:
+		static double surface_integral(const patch_system &ps,
+									   int src_gfn, bool src_gfn_is_even_across_xy_plane,
+									   bool src_gfn_is_even_across_xz_plane,
+									   bool src_gfn_is_even_across_yz_plane,
+									   enum patch::integration_method method);
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* BH_DIAGNOSTICS_H */
--- a/AMSS_NCKU_source/AHF_Direct/FFT.f90
+++ b/AMSS_NCKU_source/AHF_Direct/FFT.f90
@@ -1,87 +1,87 @@
-
-
-#if 0
-program checkFFT
-use dfport
-implicit none
-double precision::x
-integer,parameter::N=256
-double precision,dimension(N*2)::p
-double precision,dimension(N/2)::s
-integer::ncount,j,idum
-character(len=8)::tt
-tt=clock()
-idum=iachar(tt(8:8))-48
-p=0.0
-open(77,file='prime.dat',status='unknown')
-loop1:do ncount=1,N
-   x=ran(idum)
-   p(2*ncount-1)=x
-   write(77,'(f15.3)')x
-enddo loop1
-close(77)
-call four1(p,N,1)
-do j=1,N/2
-  s(j)=p(2*j)*p(2*j)+p(2*j-1)*p(2*j-1)
-enddo
-x=0.0
-do j=1,N/2
-  x=x+s(j)
-enddo
-s=s/x
-open(77,file='power.dat',status='unknown')
-do j=1,N/2
-  write(77,'(2(1x,f15.3))')dble(j-1)/dble(N),s(j)
-enddo
-close(77)
-end program checkFFT
-#endif
-
-!-------------
-! Optimized FFT using Intel oneMKL DFTI
-! Mathematical equivalence: Standard DFT definition
-!   Forward (isign=1):  X[k] = sum_{n=0}^{N-1} x[n] * exp(-2*pi*i*k*n/N)
-!   Backward (isign=-1): X[k] = sum_{n=0}^{N-1} x[n] * exp(+2*pi*i*k*n/N)
-! Input/Output: dataa is interleaved complex array [Re(0),Im(0),Re(1),Im(1),...]
-!-------------
-SUBROUTINE four1(dataa,nn,isign)
-use MKL_DFTI
-implicit none
-INTEGER, intent(in) :: isign, nn
-DOUBLE PRECISION, dimension(2*nn), intent(inout) :: dataa
-
-type(DFTI_DESCRIPTOR), pointer :: desc
-integer :: status
-
-! Create DFTI descriptor for 1D complex-to-complex transform
-status = DftiCreateDescriptor(desc, DFTI_DOUBLE, DFTI_COMPLEX, 1, nn)
-if (status /= 0) return
-
-! Set input/output storage as interleaved complex (default)
-status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_INPLACE)
-if (status /= 0) then
-   status = DftiFreeDescriptor(desc)
-   return
-endif
-
-! Commit the descriptor
-status = DftiCommitDescriptor(desc)
-if (status /= 0) then
-   status = DftiFreeDescriptor(desc)
-   return
-endif
-
-! Execute FFT based on direction
-if (isign == 1) then
-   ! Forward FFT: exp(-2*pi*i*k*n/N)
-   status = DftiComputeForward(desc, dataa)
-else
-   ! Backward FFT: exp(+2*pi*i*k*n/N)
-   status = DftiComputeBackward(desc, dataa)
-endif
-
-! Free descriptor
-status = DftiFreeDescriptor(desc)
-
-return
-END SUBROUTINE four1
+
+
+#if 0
+program checkFFT
+use dfport
+implicit none
+double precision::x
+integer,parameter::N=256
+double precision,dimension(N*2)::p
+double precision,dimension(N/2)::s
+integer::ncount,j,idum
+character(len=8)::tt
+tt=clock()
+idum=iachar(tt(8:8))-48
+p=0.0
+open(77,file='prime.dat',status='unknown')
+loop1:do ncount=1,N
+   x=ran(idum)
+   p(2*ncount-1)=x
+   write(77,'(f15.3)')x
+enddo loop1
+close(77)
+call four1(p,N,1)
+do j=1,N/2
+  s(j)=p(2*j)*p(2*j)+p(2*j-1)*p(2*j-1)
+enddo
+x=0.0
+do j=1,N/2
+  x=x+s(j)
+enddo
+s=s/x
+open(77,file='power.dat',status='unknown')
+do j=1,N/2
+  write(77,'(2(1x,f15.3))')dble(j-1)/dble(N),s(j)
+enddo
+close(77)
+end program checkFFT
+#endif
+
+!-------------
+! Optimized FFT using Intel oneMKL DFTI
+! Mathematical equivalence: Standard DFT definition
+!   Forward (isign=1):  X[k] = sum_{n=0}^{N-1} x[n] * exp(-2*pi*i*k*n/N)
+!   Backward (isign=-1): X[k] = sum_{n=0}^{N-1} x[n] * exp(+2*pi*i*k*n/N)
+! Input/Output: dataa is interleaved complex array [Re(0),Im(0),Re(1),Im(1),...]
+!-------------
+SUBROUTINE four1(dataa,nn,isign)
+use MKL_DFTI
+implicit none
+INTEGER, intent(in) :: isign, nn
+DOUBLE PRECISION, dimension(2*nn), intent(inout) :: dataa
+
+type(DFTI_DESCRIPTOR), pointer :: desc
+integer :: status
+
+! Create DFTI descriptor for 1D complex-to-complex transform
+status = DftiCreateDescriptor(desc, DFTI_DOUBLE, DFTI_COMPLEX, 1, nn)
+if (status /= 0) return
+
+! Set input/output storage as interleaved complex (default)
+status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_INPLACE)
+if (status /= 0) then
+   status = DftiFreeDescriptor(desc)
+   return
+endif
+
+! Commit the descriptor
+status = DftiCommitDescriptor(desc)
+if (status /= 0) then
+   status = DftiFreeDescriptor(desc)
+   return
+endif
+
+! Execute FFT based on direction
+if (isign == 1) then
+   ! Forward FFT: exp(-2*pi*i*k*n/N)
+   status = DftiComputeForward(desc, dataa)
+else
+   ! Backward FFT: exp(+2*pi*i*k*n/N)
+   status = DftiComputeBackward(desc, dataa)
+endif
+
+! Free descriptor
+status = DftiFreeDescriptor(desc)
+
+return
+END SUBROUTINE four1
--- a/AMSS_NCKU_source/AHF_Direct/IntPnts.C
+++ b/AMSS_NCKU_source/AHF_Direct/IntPnts.C
@@ -1,97 +1,97 @@
-//$Id: IntPnts.C,v 1.1 2012/04/03 10:49:42 zjcao Exp $
-
-#include "macrodef.h"
-#ifdef With_AHF
-
-#include <math.h>
-#include <stdio.h>
-
-#include <iostream>
-using namespace std;
-
-#include "myglobal.h"
-
-namespace AHFinderDirect
-{
-  extern struct state state;
-  int globalInterpGFL(double *X, double *Y, double *Z, int Ns,
-                      double *Data)
-  {
-    if (Ns == 0)
-      return 0;
-    int n;
-    double *pox[3];
-    for (int i = 0; i < 3; i++)
-      pox[i] = new double[Ns];
-    for (n = 0; n < Ns; n++)
-    {
-      pox[0][n] = X[n];
-      pox[1][n] = Y[n];
-      pox[2][n] = Z[n];
-    }
-
-    const int InList = 35;
-
-    double *datap;
-    datap = new double[Ns * InList];
-    if (!(state.ADM->AH_Interp_Points(state.AHList, Ns, pox, datap, state.Symmetry)))
-      return 0;
-    // reform data
-    for (int pnt = 0; pnt < Ns; pnt++)
-      for (int ii = 0; ii < InList; ii++)
-      {
-        if (ii == 0 || ii == 12 || ii == 20)
-          Data[pnt + ii * Ns] = datap[ii + pnt * InList] + 1;
-        else if (ii == 24) // from chi-1 to psi
-          Data[pnt + ii * Ns] = pow(datap[ii + pnt * InList] + 1, -0.25);
-        else if (ii == 25 || ii == 26 || ii == 27) // from chi,i to psi,i
-          Data[pnt + ii * Ns] = -pow(datap[24 + pnt * InList] + 1, -1.25) / 4 * datap[ii + pnt * InList];
-        else
-          Data[pnt + ii * Ns] = datap[ii + pnt * InList];
-      }
-    delete[] datap;
-
-    delete[] pox[0];
-    delete[] pox[1];
-    delete[] pox[2];
-
-    return 1;
-  }
-  // inerpolate lapse and shift
-  int globalInterpGFLlash(double *X, double *Y, double *Z, int Ns,
-                          double *Data)
-  {
-    if (Ns == 0)
-      return 0;
-    int n;
-    double *pox[3];
-    for (int i = 0; i < 3; i++)
-      pox[i] = new double[Ns];
-    for (n = 0; n < Ns; n++)
-    {
-      pox[0][n] = X[n];
-      pox[1][n] = Y[n];
-      pox[2][n] = Z[n];
-    }
-
-    double SYM = 1.0, ANT = -1.0;
-    const int InList = 4;
-
-    double *datap;
-    datap = new double[Ns * InList];
-    state.ADM->AH_Interp_Points(state.GaugeList, Ns, pox, datap, state.Symmetry);
-    // reform data
-    for (int pnt = 0; pnt < Ns; pnt++)
-      for (int ii = 0; ii < InList; ii++)
-        Data[pnt + ii * Ns] = datap[ii + pnt * InList];
-
-    delete[] datap;
-    delete[] pox[0];
-    delete[] pox[1];
-    delete[] pox[2];
-
-    return 1;
-  }
-
-} // namespace AHFinderDirect
-#endif
+//$Id: IntPnts.C,v 1.1 2012/04/03 10:49:42 zjcao Exp $
+
+#include "macrodef.h"
+#ifdef With_AHF
+
+#include <math.h>
+#include <stdio.h>
+
+#include <iostream>
+using namespace std;
+
+#include "myglobal.h"
+
+namespace AHFinderDirect
+{
+  extern struct state state;
+  int globalInterpGFL(double *X, double *Y, double *Z, int Ns,
+                      double *Data)
+  {
+    if (Ns == 0)
+      return 0;
+    int n;
+    double *pox[3];
+    for (int i = 0; i < 3; i++)
+      pox[i] = new double[Ns];
+    for (n = 0; n < Ns; n++)
+    {
+      pox[0][n] = X[n];
+      pox[1][n] = Y[n];
+      pox[2][n] = Z[n];
+    }
+
+    const int InList = 35;
+
+    double *datap;
+    datap = new double[Ns * InList];
+    if (!(state.ADM->AH_Interp_Points(state.AHList, Ns, pox, datap, state.Symmetry)))
+      return 0;
+    // reform data
+    for (int pnt = 0; pnt < Ns; pnt++)
+      for (int ii = 0; ii < InList; ii++)
+      {
+        if (ii == 0 || ii == 12 || ii == 20)
+          Data[pnt + ii * Ns] = datap[ii + pnt * InList] + 1;
+        else if (ii == 24) // from chi-1 to psi
+          Data[pnt + ii * Ns] = pow(datap[ii + pnt * InList] + 1, -0.25);
+        else if (ii == 25 || ii == 26 || ii == 27) // from chi,i to psi,i
+          Data[pnt + ii * Ns] = -pow(datap[24 + pnt * InList] + 1, -1.25) / 4 * datap[ii + pnt * InList];
+        else
+          Data[pnt + ii * Ns] = datap[ii + pnt * InList];
+      }
+    delete[] datap;
+
+    delete[] pox[0];
+    delete[] pox[1];
+    delete[] pox[2];
+
+    return 1;
+  }
+  // inerpolate lapse and shift
+  int globalInterpGFLlash(double *X, double *Y, double *Z, int Ns,
+                          double *Data)
+  {
+    if (Ns == 0)
+      return 0;
+    int n;
+    double *pox[3];
+    for (int i = 0; i < 3; i++)
+      pox[i] = new double[Ns];
+    for (n = 0; n < Ns; n++)
+    {
+      pox[0][n] = X[n];
+      pox[1][n] = Y[n];
+      pox[2][n] = Z[n];
+    }
+
+    double SYM = 1.0, ANT = -1.0;
+    const int InList = 4;
+
+    double *datap;
+    datap = new double[Ns * InList];
+    state.ADM->AH_Interp_Points(state.GaugeList, Ns, pox, datap, state.Symmetry);
+    // reform data
+    for (int pnt = 0; pnt < Ns; pnt++)
+      for (int ii = 0; ii < InList; ii++)
+        Data[pnt + ii * Ns] = datap[ii + pnt * InList];
+
+    delete[] datap;
+    delete[] pox[0];
+    delete[] pox[1];
+    delete[] pox[2];
+
+    return 1;
+  }
+
+} // namespace AHFinderDirect
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/IntPnts0.C
+++ b/AMSS_NCKU_source/AHF_Direct/IntPnts0.C
@@ -1,43 +1,43 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-
-#include <mpi.h>
-
-#include "myglobal.h"
-
-int CCTK_VInfo(const char *thorn, const char *format, ...)
-{
-   int myrank;
-   MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
-   if (myrank !=0) return 0;
-   
-   va_list ap;
-   va_start (ap, format);
-   fprintf (stdout, "INFO (%s): ", thorn);
-   vfprintf (stdout, format, ap);
-   fprintf (stdout, "\n");
-   va_end (ap);
-   return 0;
-}
-int CCTK_VWarn (int level,
-                int line,
-                const char *file,
-                const char *thorn,
-                const char *format,
-                ...) 
-{  
-   int myrank;
-   MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
-   if (myrank !=0) return 0;
-   
-   va_list ap;
-   va_start (ap, format);
-   fprintf (stdout, "WARN (%s): ", thorn);
-   vfprintf (stdout, format, ap);
-   fprintf (stdout, "\n");
-   va_end (ap);
-   return 0;
-}
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include <mpi.h>
+
+#include "myglobal.h"
+
+int CCTK_VInfo(const char *thorn, const char *format, ...)
+{
+   int myrank;
+   MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+   if (myrank !=0) return 0;
+   
+   va_list ap;
+   va_start (ap, format);
+   fprintf (stdout, "INFO (%s): ", thorn);
+   vfprintf (stdout, format, ap);
+   fprintf (stdout, "\n");
+   va_end (ap);
+   return 0;
+}
+int CCTK_VWarn (int level,
+                int line,
+                const char *file,
+                const char *thorn,
+                const char *format,
+                ...) 
+{  
+   int myrank;
+   MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+   if (myrank !=0) return 0;
+   
+   va_list ap;
+   va_start (ap, format);
+   fprintf (stdout, "WARN (%s): ", thorn);
+   vfprintf (stdout, format, ap);
+   fprintf (stdout, "\n");
+   va_end (ap);
+   return 0;
+}
--- a/AMSS_NCKU_source/AHF_Direct/Jacobian.C
+++ b/AMSS_NCKU_source/AHF_Direct/Jacobian.C
@@ -1,270 +1,270 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-#include "util_Table.h"
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-
-#include "util.h"
-#include "array.h"
-#include "cpm_map.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "fd_grid.h"
-#include "patch.h"
-#include "patch_edge.h"
-#include "patch_interp.h"
-#include "ghost_zone.h"
-#include "patch_system.h"
-
-#include "Jacobian.h"
-#include "ilucg.h"
-// all the code in this file is inside this namespace
-namespace AHFinderDirect
-{
-	// this represents a single element stored in the matrix for
-	// sort_row_into_column_order()  and  sort_row_into_column_order__cmp()
-	struct matrix_element
-	{
-		int JA;
-		fp A;
-	};
-
-	Jacobian::Jacobian(patch_system &ps)
-		: ps_(ps),
-		  N_rows_(ps.N_grid_points()),
-		  N_nonzeros_(0), current_N_rows_(0), N_nonzeros_allocated_(0),
-		  IA_(new integer[N_rows_ + 1]), JA_(NULL), A_(NULL),
-		  itemp_(NULL), rtemp_(NULL)
-	{
-		IO_ = 1;
-		zero_matrix();
-	}
-
-	Jacobian::~Jacobian()
-	{
-		if (A_)
-			delete[] A_;
-		if (JA_)
-			delete[] JA_;
-		if (IA_)
-			delete[] IA_;
-		if (rtemp_)
-			delete[] rtemp_;
-		if (itemp_)
-			delete[] itemp_;
-	}
-
-	double Jacobian::element(int II, int JJ)
-		const
-	{
-		const int posn = find_element(II, JJ);
-		return (posn >= 0) ? A_[posn] : 0.0;
-	}
-
-	void Jacobian::zero_matrix()
-	{
-
-		N_nonzeros_ = 0;
-		current_N_rows_ = 0;
-		IA_[0] = IO_;
-	}
-
-	void Jacobian::set_element(int II, int JJ, fp value)
-	{
-		const int posn = find_element(II, JJ);
-		if (posn >= 0)
-			then A_[posn] = value;
-		else
-			insert_element(II, JJ, value);
-	}
-
-	void Jacobian::sum_into_element(int II, int JJ, fp value)
-	{
-		const int posn = find_element(II, JJ);
-		if (posn >= 0)
-			then A_[posn] += value;
-		else
-			insert_element(II, JJ, value);
-	}
-
-	int Jacobian::find_element(int II, int JJ)
-		const
-	{
-		if (II >= current_N_rows_)
-			then return -1; // this row not defined yet
-
-		const int start = IA_[II] - IO_;
-		const int stop = IA_[II + 1] - IO_;
-		for (int posn = start; posn < stop; ++posn)
-		{
-			if (JA_[posn] - IO_ == JJ)
-				then return posn; // found
-		}
-
-		return -1; // not found
-	}
-
-	int Jacobian::insert_element(int II, int JJ, double value)
-	{
-		if (!((II == current_N_rows_ - 1) || (II == current_N_rows_)))
-		{
-			printf(
-				"***** row_sparse_Jacobian::insert_element(II=%d, JJ=%d, value=%g):\n"
-				"        attempt to insert element elsewhere than {last row, last row+1}!\n"
-				"        N_rows_=%d   current_N_rows_=%d   IO_=%d\n"
-				"        N_nonzeros_=%d   N_nonzeros_allocated_=%d\n",
-				II, JJ, double(value),
-				N_rows_, current_N_rows_, IO_,
-				N_nonzeros_, N_nonzeros_allocated_);
-			abort();
-		}
-
-		// start a new row if necessary
-		if (II == current_N_rows_)
-			then
-			{
-				assert(current_N_rows_ < N_rows_);
-				IA_[current_N_rows_ + 1] = IA_[current_N_rows_];
-				++current_N_rows_;
-			}
-
-		// insert into current row
-		assert(II == current_N_rows_ - 1);
-		if (IA_[II + 1] - IO_ >= N_nonzeros_allocated_)
-			then grow_arrays();
-		const int posn = IA_[II + 1] - IO_;
-		assert(posn < N_nonzeros_allocated_);
-		JA_[posn] = JJ + IO_;
-		A_[posn] = value;
-		++IA_[II + 1];
-		++N_nonzeros_;
-
-		return posn;
-	}
-
-	void Jacobian::grow_arrays()
-	{
-		N_nonzeros_allocated_ += base_growth_amount + (N_nonzeros_allocated_ >> 1);
-
-		int *const new_JA = new int[N_nonzeros_allocated_];
-		double *const new_A = new double[N_nonzeros_allocated_];
-		for (int posn = 0; posn < N_nonzeros_; ++posn)
-		{
-			new_JA[posn] = JA_[posn];
-			new_A[posn] = A_[posn];
-		}
-		delete[] A_;
-		delete[] JA_;
-		JA_ = new_JA;
-		A_ = new_A;
-	}
-
-	int compare_matrix_elements(const void *x, const void *y)
-	{
-		const struct matrix_element *const px = static_cast<const struct matrix_element *>(x);
-		const struct matrix_element *const py = static_cast<const struct matrix_element *>(y);
-
-		return px->JA - py->JA;
-	}
-
-	void Jacobian::sort_each_row_into_column_order()
-	{
-		// buffer must be big enough to hold the largest row
-		int max_N_in_row = 0;
-		{
-			for (int II = 0; II < N_rows_; ++II)
-			{
-				max_N_in_row = max(max_N_in_row, IA_[II + 1] - IA_[II]);
-			}
-		}
-
-		// contiguous buffer for sorting
-		struct matrix_element *const buffer = new struct matrix_element[max_N_in_row];
-
-		{
-			for (int II = 0; II < N_rows_; ++II)
-			{
-				const int N_in_row = IA_[II + 1] - IA_[II];
-
-				// copy this row's JA_[] and A_[] values to the buffer
-				const int start = IA_[II] - IO_;
-				for (int p = 0; p < N_in_row; ++p)
-				{
-					const int posn = start + p;
-					buffer[p].JA = JA_[posn];
-					buffer[p].A = A_[posn];
-				}
-
-				// sort the buffer
-				qsort(static_cast<void *>(buffer), N_in_row, sizeof(buffer[0]),
-					  &compare_matrix_elements);
-
-				// copy the buffer values back to this row's JA_[] and A_[]
-				for (int p = 0; p < N_in_row; ++p)
-				{
-					const int posn = start + p;
-					JA_[posn] = buffer[p].JA;
-					A_[posn] = buffer[p].A;
-				}
-			}
-		}
-
-		delete[] buffer;
-	}
-
-	double Jacobian::solve_linear_system(int rhs_gfn, int x_gfn, bool print_msg_flag)
-	{
-		assert(IO_ == Fortran_index_origin);
-		assert(current_N_rows_ == N_rows_);
-
-		if (itemp_ == NULL)
-			then
-			{
-				itemp_ = new int[3 * N_rows_ + 3 * N_nonzeros_ + 2];
-				rtemp_ = new double[4 * N_rows_ + N_nonzeros_];
-			}
-
-		// initial guess = all zeros
-		double *x = ps_.gridfn_data(x_gfn);
-		for (int II = 0; II < N_rows_; ++II)
-		{
-			x[II] = 0.0;
-		}
-
-		const int N = N_rows_;
-		const double *rhs = ps_.gridfn_data(rhs_gfn);
-		const double eps = 1e-10;
-		const int max_iterations = N_rows_;
-		int istatus;
-
-		// the actual linear solution
-		f_ilucg(N,
-				IA_, JA_, A_,
-				rhs, x,
-				itemp_, rtemp_,
-				eps, max_iterations,
-				istatus);
-
-		if (istatus < 0)
-		{
-			printf(
-				"***** row_sparse_Jacobian__ILUCG::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n"
-				"        error return from [sd]ilucg() routine!\n"
-				"        istatus=%d < 0 ==> bad matrix structure, eg. zero diagonal element!\n",
-				rhs_gfn, x_gfn,
-				int(istatus));
-			abort();
-		}
-
-		return -1.0;
-	}
-
-} // namespace AHFinderDirect
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+#include "ilucg.h"
+// all the code in this file is inside this namespace
+namespace AHFinderDirect
+{
+	// this represents a single element stored in the matrix for
+	// sort_row_into_column_order()  and  sort_row_into_column_order__cmp()
+	struct matrix_element
+	{
+		int JA;
+		fp A;
+	};
+
+	Jacobian::Jacobian(patch_system &ps)
+		: ps_(ps),
+		  N_rows_(ps.N_grid_points()),
+		  N_nonzeros_(0), current_N_rows_(0), N_nonzeros_allocated_(0),
+		  IA_(new integer[N_rows_ + 1]), JA_(NULL), A_(NULL),
+		  itemp_(NULL), rtemp_(NULL)
+	{
+		IO_ = 1;
+		zero_matrix();
+	}
+
+	Jacobian::~Jacobian()
+	{
+		if (A_)
+			delete[] A_;
+		if (JA_)
+			delete[] JA_;
+		if (IA_)
+			delete[] IA_;
+		if (rtemp_)
+			delete[] rtemp_;
+		if (itemp_)
+			delete[] itemp_;
+	}
+
+	double Jacobian::element(int II, int JJ)
+		const
+	{
+		const int posn = find_element(II, JJ);
+		return (posn >= 0) ? A_[posn] : 0.0;
+	}
+
+	void Jacobian::zero_matrix()
+	{
+
+		N_nonzeros_ = 0;
+		current_N_rows_ = 0;
+		IA_[0] = IO_;
+	}
+
+	void Jacobian::set_element(int II, int JJ, fp value)
+	{
+		const int posn = find_element(II, JJ);
+		if (posn >= 0)
+			then A_[posn] = value;
+		else
+			insert_element(II, JJ, value);
+	}
+
+	void Jacobian::sum_into_element(int II, int JJ, fp value)
+	{
+		const int posn = find_element(II, JJ);
+		if (posn >= 0)
+			then A_[posn] += value;
+		else
+			insert_element(II, JJ, value);
+	}
+
+	int Jacobian::find_element(int II, int JJ)
+		const
+	{
+		if (II >= current_N_rows_)
+			then return -1; // this row not defined yet
+
+		const int start = IA_[II] - IO_;
+		const int stop = IA_[II + 1] - IO_;
+		for (int posn = start; posn < stop; ++posn)
+		{
+			if (JA_[posn] - IO_ == JJ)
+				then return posn; // found
+		}
+
+		return -1; // not found
+	}
+
+	int Jacobian::insert_element(int II, int JJ, double value)
+	{
+		if (!((II == current_N_rows_ - 1) || (II == current_N_rows_)))
+		{
+			printf(
+				"***** row_sparse_Jacobian::insert_element(II=%d, JJ=%d, value=%g):\n"
+				"        attempt to insert element elsewhere than {last row, last row+1}!\n"
+				"        N_rows_=%d   current_N_rows_=%d   IO_=%d\n"
+				"        N_nonzeros_=%d   N_nonzeros_allocated_=%d\n",
+				II, JJ, double(value),
+				N_rows_, current_N_rows_, IO_,
+				N_nonzeros_, N_nonzeros_allocated_);
+			abort();
+		}
+
+		// start a new row if necessary
+		if (II == current_N_rows_)
+			then
+			{
+				assert(current_N_rows_ < N_rows_);
+				IA_[current_N_rows_ + 1] = IA_[current_N_rows_];
+				++current_N_rows_;
+			}
+
+		// insert into current row
+		assert(II == current_N_rows_ - 1);
+		if (IA_[II + 1] - IO_ >= N_nonzeros_allocated_)
+			then grow_arrays();
+		const int posn = IA_[II + 1] - IO_;
+		assert(posn < N_nonzeros_allocated_);
+		JA_[posn] = JJ + IO_;
+		A_[posn] = value;
+		++IA_[II + 1];
+		++N_nonzeros_;
+
+		return posn;
+	}
+
+	void Jacobian::grow_arrays()
+	{
+		N_nonzeros_allocated_ += base_growth_amount + (N_nonzeros_allocated_ >> 1);
+
+		int *const new_JA = new int[N_nonzeros_allocated_];
+		double *const new_A = new double[N_nonzeros_allocated_];
+		for (int posn = 0; posn < N_nonzeros_; ++posn)
+		{
+			new_JA[posn] = JA_[posn];
+			new_A[posn] = A_[posn];
+		}
+		delete[] A_;
+		delete[] JA_;
+		JA_ = new_JA;
+		A_ = new_A;
+	}
+
+	int compare_matrix_elements(const void *x, const void *y)
+	{
+		const struct matrix_element *const px = static_cast<const struct matrix_element *>(x);
+		const struct matrix_element *const py = static_cast<const struct matrix_element *>(y);
+
+		return px->JA - py->JA;
+	}
+
+	void Jacobian::sort_each_row_into_column_order()
+	{
+		// buffer must be big enough to hold the largest row
+		int max_N_in_row = 0;
+		{
+			for (int II = 0; II < N_rows_; ++II)
+			{
+				max_N_in_row = max(max_N_in_row, IA_[II + 1] - IA_[II]);
+			}
+		}
+
+		// contiguous buffer for sorting
+		struct matrix_element *const buffer = new struct matrix_element[max_N_in_row];
+
+		{
+			for (int II = 0; II < N_rows_; ++II)
+			{
+				const int N_in_row = IA_[II + 1] - IA_[II];
+
+				// copy this row's JA_[] and A_[] values to the buffer
+				const int start = IA_[II] - IO_;
+				for (int p = 0; p < N_in_row; ++p)
+				{
+					const int posn = start + p;
+					buffer[p].JA = JA_[posn];
+					buffer[p].A = A_[posn];
+				}
+
+				// sort the buffer
+				qsort(static_cast<void *>(buffer), N_in_row, sizeof(buffer[0]),
+					  &compare_matrix_elements);
+
+				// copy the buffer values back to this row's JA_[] and A_[]
+				for (int p = 0; p < N_in_row; ++p)
+				{
+					const int posn = start + p;
+					JA_[posn] = buffer[p].JA;
+					A_[posn] = buffer[p].A;
+				}
+			}
+		}
+
+		delete[] buffer;
+	}
+
+	double Jacobian::solve_linear_system(int rhs_gfn, int x_gfn, bool print_msg_flag)
+	{
+		assert(IO_ == Fortran_index_origin);
+		assert(current_N_rows_ == N_rows_);
+
+		if (itemp_ == NULL)
+			then
+			{
+				itemp_ = new int[3 * N_rows_ + 3 * N_nonzeros_ + 2];
+				rtemp_ = new double[4 * N_rows_ + N_nonzeros_];
+			}
+
+		// initial guess = all zeros
+		double *x = ps_.gridfn_data(x_gfn);
+		for (int II = 0; II < N_rows_; ++II)
+		{
+			x[II] = 0.0;
+		}
+
+		const int N = N_rows_;
+		const double *rhs = ps_.gridfn_data(rhs_gfn);
+		const double eps = 1e-10;
+		const int max_iterations = N_rows_;
+		int istatus;
+
+		// the actual linear solution
+		f_ilucg(N,
+				IA_, JA_, A_,
+				rhs, x,
+				itemp_, rtemp_,
+				eps, max_iterations,
+				istatus);
+
+		if (istatus < 0)
+		{
+			printf(
+				"***** row_sparse_Jacobian__ILUCG::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n"
+				"        error return from [sd]ilucg() routine!\n"
+				"        istatus=%d < 0 ==> bad matrix structure, eg. zero diagonal element!\n",
+				rhs_gfn, x_gfn,
+				int(istatus));
+			abort();
+		}
+
+		return -1.0;
+	}
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/Jacobian.h
+++ b/AMSS_NCKU_source/AHF_Direct/Jacobian.h
@@ -1,90 +1,90 @@
-#ifndef AHFINDERDIRECT__JACOBIAN_HH
-#define AHFINDERDIRECT__JACOBIAN_HH
-
-namespace AHFinderDirect
-{
-	class Jacobian
-	{
-	public:
-		// basic meta-info
-		patch_system &my_patch_system() const { return ps_; }
-		int N_rows() const { return N_rows_; }
-
-		// convert (patch,irho,isigma) <--> row/column index
-		int II_of_patch_irho_isigma(const patch &p, int irho, int isigma)
-			const
-		{
-			return ps_.gpn_of_patch_irho_isigma(p, irho, isigma);
-		}
-		const patch &patch_irho_isigma_of_II(int II, int &irho, int &isigma)
-			const
-		{
-			return ps_.patch_irho_isigma_of_gpn(II, irho, isigma);
-		}
-
-		double element(int II, int JJ) const;
-
-		// is the matrix element (II,JJ) stored explicitly?
-		bool is_explicitly_stored(int II, int JJ) const
-		{
-			return find_element(II, JJ) > 0;
-		}
-
-		int IO() const { return IO_; }
-		enum
-		{
-			C_index_origin = 0,
-			Fortran_index_origin = 1
-		};
-
-		void zero_matrix();
-
-		void set_element(int II, int JJ, fp value);
-
-		void sum_into_element(int II, int JJ, fp value);
-
-		int find_element(int II, int JJ) const;
-
-		int insert_element(int II, int JJ, fp value);
-
-		void grow_arrays();
-
-		enum
-		{
-			base_growth_amount = 1000
-		};
-
-		void sort_each_row_into_column_order();
-
-		double solve_linear_system(int rhs_gfn, int x_gfn,
-								   bool print_msg_flag);
-
-	public:
-		Jacobian(patch_system &ps);
-		~Jacobian();
-
-	protected:
-		patch_system &ps_;
-		int N_rows_;
-
-		int IO_;
-
-		int N_nonzeros_;
-		int current_N_rows_;
-
-		int N_nonzeros_allocated_;
-
-		int *IA_;
-
-		int *JA_;
-
-		double *A_;
-
-		int *itemp_;
-		double *rtemp_;
-	};
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /* AHFINDERDIRECT__JACOBIAN_HH */
+#ifndef AHFINDERDIRECT__JACOBIAN_HH
+#define AHFINDERDIRECT__JACOBIAN_HH
+
+namespace AHFinderDirect
+{
+	class Jacobian
+	{
+	public:
+		// basic meta-info
+		patch_system &my_patch_system() const { return ps_; }
+		int N_rows() const { return N_rows_; }
+
+		// convert (patch,irho,isigma) <--> row/column index
+		int II_of_patch_irho_isigma(const patch &p, int irho, int isigma)
+			const
+		{
+			return ps_.gpn_of_patch_irho_isigma(p, irho, isigma);
+		}
+		const patch &patch_irho_isigma_of_II(int II, int &irho, int &isigma)
+			const
+		{
+			return ps_.patch_irho_isigma_of_gpn(II, irho, isigma);
+		}
+
+		double element(int II, int JJ) const;
+
+		// is the matrix element (II,JJ) stored explicitly?
+		bool is_explicitly_stored(int II, int JJ) const
+		{
+			return find_element(II, JJ) > 0;
+		}
+
+		int IO() const { return IO_; }
+		enum
+		{
+			C_index_origin = 0,
+			Fortran_index_origin = 1
+		};
+
+		void zero_matrix();
+
+		void set_element(int II, int JJ, fp value);
+
+		void sum_into_element(int II, int JJ, fp value);
+
+		int find_element(int II, int JJ) const;
+
+		int insert_element(int II, int JJ, fp value);
+
+		void grow_arrays();
+
+		enum
+		{
+			base_growth_amount = 1000
+		};
+
+		void sort_each_row_into_column_order();
+
+		double solve_linear_system(int rhs_gfn, int x_gfn,
+								   bool print_msg_flag);
+
+	public:
+		Jacobian(patch_system &ps);
+		~Jacobian();
+
+	protected:
+		patch_system &ps_;
+		int N_rows_;
+
+		int IO_;
+
+		int N_nonzeros_;
+		int current_N_rows_;
+
+		int N_nonzeros_allocated_;
+
+		int *IA_;
+
+		int *JA_;
+
+		double *A_;
+
+		int *itemp_;
+		double *rtemp_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* AHFINDERDIRECT__JACOBIAN_HH */
--- a/AMSS_NCKU_source/AHF_Direct/Newton.C
+++ b/AMSS_NCKU_source/AHF_Direct/Newton.C
--- a/AMSS_NCKU_source/AHF_Direct/array.C
+++ b/AMSS_NCKU_source/AHF_Direct/array.C
@@ -1,186 +1,186 @@
-#include <assert.h>
-#include <stddef.h> // NULL
-#include <stdlib.h> // size_t
-
-#include "cctk.h"
-
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-
-		template <typename T>
-		array1d<T>::array1d(int min_i_in, int max_i_in,
-							T *array_in /* = NULL */,
-							int stride_i_in /* = 0 */)
-			: array_(array_in),
-			  offset_(0), // temp value, changed below
-			  stride_i_(stride_i_in),
-			  min_i_(min_i_in), max_i_(max_i_in),
-			  we_own_array_(array_in == NULL)
-		{
-			if (stride_i_ == 0)
-				then stride_i_ = 1;
-
-			// must use unchecked subscripting here since setup isn't done yet
-			offset_ = -subscript_unchecked(min_i_); // RHS uses offset_ = 0
-			assert(subscript_unchecked(min_i_) == 0);
-			max_subscript_ = subscript_unchecked(max_i_);
-
-			if (we_own_array_)
-				then
-				{
-					// allocate it
-					const int N_allocate = N_i();
-					array_ = new T[N_allocate];
-				}
-
-			// explicitly initialize array (new[] *doesn't* do this automagically)
-			for (int i = min_i(); i <= max_i(); ++i)
-			{
-				operator()(i) = T(0);
-			}
-		}
-
-		//
-		// This function destroys an  array1d  object.
-		//
-		template <typename T>
-		array1d<T>::~array1d()
-		{
-			if (we_own_array_)
-				then delete[] array_;
-		}
-
-		//
-		// This function constructs an  array2d  object.
-		//
-		template <typename T>
-		array2d<T>::array2d(int min_i_in, int max_i_in,
-							int min_j_in, int max_j_in,
-							T *array_in /* = NULL */,
-							int stride_i_in /* = 0 */, int stride_j_in /* = 0 */)
-			: array_(array_in),
-			  offset_(0), // temp value, changed below
-			  stride_i_(stride_i_in), stride_j_(stride_j_in),
-			  min_i_(min_i_in), max_i_(max_i_in),
-			  min_j_(min_j_in), max_j_(max_j_in),
-			  we_own_array_(array_in == NULL)
-		{
-			if (stride_j_ == 0)
-				then stride_j_ = 1;
-			if (stride_i_ == 0)
-				then stride_i_ = N_j();
-
-			// must use unchecked subscripting here since setup isn't done yet
-			offset_ = -subscript_unchecked(min_i_, min_j_); // RHS uses offset_ = 0
-			assert(subscript_unchecked(min_i_, min_j_) == 0);
-			max_subscript_ = subscript_unchecked(max_i_, max_j_);
-
-			if (we_own_array_)
-				then
-				{
-					// allocate it
-					const int N_allocate = N_i() * N_j();
-					array_ = new T[N_allocate];
-				}
-
-			// explicitly initialize array (new[] *doesn't* do this automagically)
-			for (int i = min_i(); i <= max_i(); ++i)
-			{
-				for (int j = min_j(); j <= max_j(); ++j)
-				{
-					operator()(i, j) = T(0);
-				}
-			}
-		}
-
-		//
-		// This function destroys an  array2d  object.
-		//
-		template <typename T>
-		array2d<T>::~array2d()
-		{
-			if (we_own_array_)
-				then delete[] array_;
-		}
-
-		//
-		// This function constructs an  array3d  object.
-		//
-		template <typename T>
-		array3d<T>::array3d(int min_i_in, int max_i_in,
-							int min_j_in, int max_j_in,
-							int min_k_in, int max_k_in,
-							T *array_in /* = NULL */,
-							int stride_i_in /* = 0 */, int stride_j_in /* = 0 */,
-							int stride_k_in /* = 0 */)
-			: array_(array_in),
-			  offset_(0), // temp value, changed below
-			  stride_i_(stride_i_in), stride_j_(stride_j_in),
-			  stride_k_(stride_k_in),
-			  min_i_(min_i_in), max_i_(max_i_in),
-			  min_j_(min_j_in), max_j_(max_j_in),
-			  min_k_(min_k_in), max_k_(max_k_in),
-			  we_own_array_(array_in == NULL)
-		{
-			if (stride_k_ == 0)
-				then stride_k_ = 1;
-			if (stride_j_ == 0)
-				then stride_j_ = N_k();
-			if (stride_i_ == 0)
-				then stride_i_ = N_j() * N_k();
-
-			// must use unchecked subscripting here since setup isn't done yet
-			offset_ = -subscript_unchecked(min_i_, min_j_, min_k_); // RHS uses offset_ = 0
-			assert(subscript_unchecked(min_i_, min_j_, min_k_) == 0);
-			max_subscript_ = subscript_unchecked(max_i_, max_j_, max_k_);
-
-			if (we_own_array_)
-				then
-				{
-					// allocate it
-					const int N_allocate = N_i() * N_j() * N_k();
-					array_ = new T[N_allocate];
-				}
-
-			// explicitly initialize array (new[] *doesn't* do this automagically)
-			for (int i = min_i(); i <= max_i(); ++i)
-			{
-				for (int j = min_j(); j <= max_j(); ++j)
-				{
-					for (int k = min_k(); k <= max_k(); ++k)
-					{
-						operator()(i, j, k) = T(0);
-					}
-				}
-			}
-		}
-		//
-		// This function destroys an  array3d  object.
-		//
-		template <typename T>
-		array3d<T>::~array3d()
-		{
-			if (we_own_array_)
-				then delete[] array_;
-		}
-
-		template class array1d<int>;
-
-		// FIXME: we shouldn't have to instantiate these both, the const one
-		//	  is actually trivially derivable from the non-const one. :(
-		template class array1d<void *>;
-		template class array1d<const void *>;
-
-		template class array1d<CCTK_REAL>;
-		template class array2d<CCTK_INT>;
-		template class array2d<CCTK_REAL>;
-		template class array3d<CCTK_REAL>;
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
+#include <assert.h>
+#include <stddef.h> // NULL
+#include <stdlib.h> // size_t
+
+#include "cctk.h"
+
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename T>
+		array1d<T>::array1d(int min_i_in, int max_i_in,
+							T *array_in /* = NULL */,
+							int stride_i_in /* = 0 */)
+			: array_(array_in),
+			  offset_(0), // temp value, changed below
+			  stride_i_(stride_i_in),
+			  min_i_(min_i_in), max_i_(max_i_in),
+			  we_own_array_(array_in == NULL)
+		{
+			if (stride_i_ == 0)
+				then stride_i_ = 1;
+
+			// must use unchecked subscripting here since setup isn't done yet
+			offset_ = -subscript_unchecked(min_i_); // RHS uses offset_ = 0
+			assert(subscript_unchecked(min_i_) == 0);
+			max_subscript_ = subscript_unchecked(max_i_);
+
+			if (we_own_array_)
+				then
+				{
+					// allocate it
+					const int N_allocate = N_i();
+					array_ = new T[N_allocate];
+				}
+
+			// explicitly initialize array (new[] *doesn't* do this automagically)
+			for (int i = min_i(); i <= max_i(); ++i)
+			{
+				operator()(i) = T(0);
+			}
+		}
+
+		//
+		// This function destroys an  array1d  object.
+		//
+		template <typename T>
+		array1d<T>::~array1d()
+		{
+			if (we_own_array_)
+				then delete[] array_;
+		}
+
+		//
+		// This function constructs an  array2d  object.
+		//
+		template <typename T>
+		array2d<T>::array2d(int min_i_in, int max_i_in,
+							int min_j_in, int max_j_in,
+							T *array_in /* = NULL */,
+							int stride_i_in /* = 0 */, int stride_j_in /* = 0 */)
+			: array_(array_in),
+			  offset_(0), // temp value, changed below
+			  stride_i_(stride_i_in), stride_j_(stride_j_in),
+			  min_i_(min_i_in), max_i_(max_i_in),
+			  min_j_(min_j_in), max_j_(max_j_in),
+			  we_own_array_(array_in == NULL)
+		{
+			if (stride_j_ == 0)
+				then stride_j_ = 1;
+			if (stride_i_ == 0)
+				then stride_i_ = N_j();
+
+			// must use unchecked subscripting here since setup isn't done yet
+			offset_ = -subscript_unchecked(min_i_, min_j_); // RHS uses offset_ = 0
+			assert(subscript_unchecked(min_i_, min_j_) == 0);
+			max_subscript_ = subscript_unchecked(max_i_, max_j_);
+
+			if (we_own_array_)
+				then
+				{
+					// allocate it
+					const int N_allocate = N_i() * N_j();
+					array_ = new T[N_allocate];
+				}
+
+			// explicitly initialize array (new[] *doesn't* do this automagically)
+			for (int i = min_i(); i <= max_i(); ++i)
+			{
+				for (int j = min_j(); j <= max_j(); ++j)
+				{
+					operator()(i, j) = T(0);
+				}
+			}
+		}
+
+		//
+		// This function destroys an  array2d  object.
+		//
+		template <typename T>
+		array2d<T>::~array2d()
+		{
+			if (we_own_array_)
+				then delete[] array_;
+		}
+
+		//
+		// This function constructs an  array3d  object.
+		//
+		template <typename T>
+		array3d<T>::array3d(int min_i_in, int max_i_in,
+							int min_j_in, int max_j_in,
+							int min_k_in, int max_k_in,
+							T *array_in /* = NULL */,
+							int stride_i_in /* = 0 */, int stride_j_in /* = 0 */,
+							int stride_k_in /* = 0 */)
+			: array_(array_in),
+			  offset_(0), // temp value, changed below
+			  stride_i_(stride_i_in), stride_j_(stride_j_in),
+			  stride_k_(stride_k_in),
+			  min_i_(min_i_in), max_i_(max_i_in),
+			  min_j_(min_j_in), max_j_(max_j_in),
+			  min_k_(min_k_in), max_k_(max_k_in),
+			  we_own_array_(array_in == NULL)
+		{
+			if (stride_k_ == 0)
+				then stride_k_ = 1;
+			if (stride_j_ == 0)
+				then stride_j_ = N_k();
+			if (stride_i_ == 0)
+				then stride_i_ = N_j() * N_k();
+
+			// must use unchecked subscripting here since setup isn't done yet
+			offset_ = -subscript_unchecked(min_i_, min_j_, min_k_); // RHS uses offset_ = 0
+			assert(subscript_unchecked(min_i_, min_j_, min_k_) == 0);
+			max_subscript_ = subscript_unchecked(max_i_, max_j_, max_k_);
+
+			if (we_own_array_)
+				then
+				{
+					// allocate it
+					const int N_allocate = N_i() * N_j() * N_k();
+					array_ = new T[N_allocate];
+				}
+
+			// explicitly initialize array (new[] *doesn't* do this automagically)
+			for (int i = min_i(); i <= max_i(); ++i)
+			{
+				for (int j = min_j(); j <= max_j(); ++j)
+				{
+					for (int k = min_k(); k <= max_k(); ++k)
+					{
+						operator()(i, j, k) = T(0);
+					}
+				}
+			}
+		}
+		//
+		// This function destroys an  array3d  object.
+		//
+		template <typename T>
+		array3d<T>::~array3d()
+		{
+			if (we_own_array_)
+				then delete[] array_;
+		}
+
+		template class array1d<int>;
+
+		// FIXME: we shouldn't have to instantiate these both, the const one
+		//	  is actually trivially derivable from the non-const one. :(
+		template class array1d<void *>;
+		template class array1d<const void *>;
+
+		template class array1d<CCTK_REAL>;
+		template class array2d<CCTK_INT>;
+		template class array2d<CCTK_REAL>;
+		template class array3d<CCTK_REAL>;
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/array.h
+++ b/AMSS_NCKU_source/AHF_Direct/array.h
@@ -1,292 +1,292 @@
-#ifndef AHFINDERDIRECT__ARRAY_HH
-#define AHFINDERDIRECT__ARRAY_HH
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-
-		//******************************************************************************
-
-		template <typename T>
-		class array1d
-		{
-		public:
-			int min_i() const { return min_i_; }
-			int max_i() const { return max_i_; }
-			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
-			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
-
-			int subscript_unchecked(int i) const
-			{
-				return offset_ + stride_i_ * i;
-			}
-			int subscript(int i) const
-			{
-				assert(is_valid_i(i));
-				const int posn = subscript_unchecked(i);
-				assert(posn >= 0);
-				assert(posn <= max_subscript_);
-				return posn;
-			}
-			int subscript_offset() const { return offset_; }
-			int subscript_stride_i() const { return stride_i_; }
-
-			// normal-use access functions
-			// ... rvalue
-			const T &operator()(int i) const { return array_[subscript(i)]; }
-			// ... lvalue
-			T &operator()(int i) { return array_[subscript(i)]; }
-
-			// get access to internal 0-origin 1D storage array
-			// (low-level, dangerous, use with caution!)
-			// ... semantics of N_array() may not be what you want
-			//     if strides specify noncontiguous storage
-			int N_array() const { return max_subscript_ + stride_i_; }
-			const T *data_array() const { return const_cast<const T *>(array_); }
-			T *data_array() { return array_; }
-
-			// constructor, destructor
-			// ... constructor initializes all array elements to T(0.0)
-			// ... omitted strides default to C storage order
-			array1d(int min_i_in, int max_i_in,
-					T *array_in = NULL, // caller-provided storage array
-										// if non-NULL
-					int stride_i_in = 0);
-			~array1d();
-
-		private:
-			// we forbid copying and passing by value
-			// by declaring the copy constructor and assignment operator
-			// private, but never defining them
-			array1d(const array1d<T> &rhs);
-			array1d<T> &operator=(const array1d<T> &rhs);
-
-		private:
-			// n.b. we declare the array pointer first in the class
-			// ==> it's probably at 0 offset
-			// ==> we may get slightly faster array access
-			T *array_; // --> new-allocated 1D storage array
-
-			// subscripting info
-			// n.b. put this next in class so it should be in the same
-			//	cpu cache line as  array_  ==> faster array access
-			int offset_, stride_i_;
-
-			// min/max array bounds
-			const int min_i_, max_i_;
-			int max_subscript_;
-
-			// n.b. put this at end of class since performance doesn't matter
-			bool we_own_array_; // true ==> array_ --> new[] array which we own
-								// false ==> array_ --> client-owned storage
-		};
-
-		//******************************************************************************
-
-		template <typename T>
-		class array2d
-		{
-		public:
-			// array info
-			int min_i() const { return min_i_; }
-			int max_i() const { return max_i_; }
-			int min_j() const { return min_j_; }
-			int max_j() const { return max_j_; }
-			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
-			int N_j() const { return jtutil::how_many_in_range(min_j_, max_j_); }
-			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
-			bool is_valid_j(int j) const { return (j >= min_j_) && (j <= max_j_); }
-			bool is_valid_ij(int i, int j) const
-			{
-				return is_valid_i(i) && is_valid_j(j);
-			}
-
-			int subscript_unchecked(int i, int j) const
-			{
-				return offset_ + stride_i_ * i + stride_j_ * j;
-			}
-			int subscript(int i, int j) const
-			{
-				// n.b. we want each assert() here to be on a separate
-				//	source line, so an assert() failure message can
-				//	pinpoint *which* index is bad
-				assert(is_valid_i(i));
-				assert(is_valid_j(j));
-				const int posn = subscript_unchecked(i, j);
-				assert(posn >= 0);
-				assert(posn <= max_subscript_);
-				return posn;
-			}
-			int subscript_offset() const { return offset_; }
-			int subscript_stride_i() const { return stride_i_; }
-			int subscript_stride_j() const { return stride_j_; }
-
-			// normal-use access functions
-			// ... rvalue
-			const T &operator()(int i, int j) const
-			{
-				return array_[subscript(i, j)];
-			}
-			// ... lvalue
-			T &operator()(int i, int j)
-			{
-				return array_[subscript(i, j)];
-			}
-
-			// get access to internal 0-origin 1D storage array
-			// (low-level, dangerous, use with caution!)
-			// ... semantics of N_array() may not be what you want
-			//     if strides specify noncontiguous storage
-			int N_array() const { return max_subscript_ + stride_j_; }
-			const T *data_array() const { return const_cast<const T *>(array_); }
-			T *data_array() { return array_; }
-
-			// constructor, destructor
-			// ... constructor initializes all array elements to T(0.0)
-			// ... omitted strides default to C storage order
-			array2d(int min_i_in, int max_i_in,
-					int min_j_in, int max_j_in,
-					T *array_in = NULL, // caller-provided storage array
-										// if non-NULL
-					int stride_i_in = 0, int stride_j_in = 0);
-			~array2d();
-
-		private:
-			// we forbid copying and passing by value
-			// by declaring the copy constructor and assignment operator
-			// private, but never defining them
-			array2d(const array2d<T> &rhs);
-			array2d<T> &operator=(const array2d<T> &rhs);
-
-		private:
-			// n.b. we declare the array pointer first in the class
-			// ==> it's probably at 0 offset
-			// ==> we may get slightly faster array access
-			T *array_; // --> new-allocated 1D storage array
-
-			// subscripting info
-			// n.b. put this next in class so it should be in the same
-			//	cpu cache line as  array_  ==> faster array access
-			int offset_, stride_i_, stride_j_;
-
-			// min/max array bounds
-			const int min_i_, max_i_;
-			const int min_j_, max_j_;
-			int max_subscript_;
-
-			// n.b. put this at end of class since performance doesn't matter
-			bool we_own_array_; // true ==> array_ --> new[] array which we own
-								// false ==> array_ --> client-owned storage
-		};
-
-		//******************************************************************************
-
-		template <typename T>
-		class array3d
-		{
-		public:
-			// array info
-			int min_i() const { return min_i_; }
-			int max_i() const { return max_i_; }
-			int min_j() const { return min_j_; }
-			int max_j() const { return max_j_; }
-			int min_k() const { return min_k_; }
-			int max_k() const { return max_k_; }
-			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
-			int N_j() const { return jtutil::how_many_in_range(min_j_, max_j_); }
-			int N_k() const { return jtutil::how_many_in_range(min_k_, max_k_); }
-			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
-			bool is_valid_j(int j) const { return (j >= min_j_) && (j <= max_j_); }
-			bool is_valid_k(int k) const { return (k >= min_k_) && (k <= max_k_); }
-			bool is_valid_ijk(int i, int j, int k) const
-			{
-				return is_valid_i(i) && is_valid_j(j) && is_valid_k(k);
-			}
-
-			int subscript_unchecked(int i, int j, int k) const
-			{
-				return offset_ + stride_i_ * i + stride_j_ * j + stride_k_ * k;
-			}
-			int subscript(int i, int j, int k) const
-			{
-				// n.b. we want each assert() here to be on a separate
-				//	source line, so an assert() failure message can
-				//	pinpoint *which* index is bad
-				assert(is_valid_i(i));
-				assert(is_valid_j(j));
-				assert(is_valid_k(k));
-				const int posn = subscript_unchecked(i, j, k);
-				assert(posn >= 0);
-				assert(posn <= max_subscript_);
-				return posn;
-			}
-			int subscript_offset() const { return offset_; }
-			int subscript_stride_i() const { return stride_i_; }
-			int subscript_stride_j() const { return stride_j_; }
-			int subscript_stride_k() const { return stride_k_; }
-
-			// normal-use access functions
-			// ... rvalue
-			const T &operator()(int i, int j, int k) const
-			{
-				return array_[subscript(i, j, k)];
-			}
-			// ... lvalue
-			T &operator()(int i, int j, int k)
-			{
-				return array_[subscript(i, j, k)];
-			}
-
-			// get access to internal 0-origin 1D storage array
-			// (low-level, dangerous, use with caution!)
-			// ... semantics of N_array() may not be what you want
-			//     if strides specify noncontiguous storage
-			int N_array() const { return max_subscript_ + stride_k_; }
-			const T *data_array() const { return const_cast<const T *>(array_); }
-			T *data_array() { return array_; }
-
-			// constructor, destructor
-			// ... constructor initializes all array elements to T(0.0)
-			// ... omitted strides default to C storage order
-			array3d(int min_i_in, int max_i_in,
-					int min_j_in, int max_j_in,
-					int min_k_in, int max_k_in,
-					T *array_in = NULL, // caller-provided storage array
-										// if non-NULL
-					int stride_i_in = 0, int stride_j_in = 0, int stride_k_in = 0);
-			~array3d();
-
-		private:
-			// we forbid copying and passing by value
-			// by declaring the copy constructor and assignment operator
-			// private, but never defining them
-			array3d(const array3d<T> &rhs);
-			array3d<T> &operator=(const array3d<T> &rhs);
-
-		private:
-			// n.b. we declare the array pointer first in the class
-			// ==> it's probably at 0 offset
-			// ==> we may get slightly faster array access
-			T *array_; // --> new-allocated 1D storage array
-
-			// subscripting info
-			// n.b. put this next in class so it should be in the same
-			//	cpu cache line as  array_  ==> faster array access
-			int offset_, stride_i_, stride_j_, stride_k_;
-
-			// min/max array bounds
-			const int min_i_, max_i_;
-			const int min_j_, max_j_;
-			const int min_k_, max_k_;
-			int max_subscript_;
-
-			// n.b. put this at end of class since performance doesn't matter
-			bool we_own_array_; // true ==> array_ --> new[] array which we own
-								// false ==> array_ --> client-owned storage
-		};
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
-
-#endif /* AHFINDERDIRECT__ARRAY_HH */
+#ifndef AHFINDERDIRECT__ARRAY_HH
+#define AHFINDERDIRECT__ARRAY_HH
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		//******************************************************************************
+
+		template <typename T>
+		class array1d
+		{
+		public:
+			int min_i() const { return min_i_; }
+			int max_i() const { return max_i_; }
+			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
+			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
+
+			int subscript_unchecked(int i) const
+			{
+				return offset_ + stride_i_ * i;
+			}
+			int subscript(int i) const
+			{
+				assert(is_valid_i(i));
+				const int posn = subscript_unchecked(i);
+				assert(posn >= 0);
+				assert(posn <= max_subscript_);
+				return posn;
+			}
+			int subscript_offset() const { return offset_; }
+			int subscript_stride_i() const { return stride_i_; }
+
+			// normal-use access functions
+			// ... rvalue
+			const T &operator()(int i) const { return array_[subscript(i)]; }
+			// ... lvalue
+			T &operator()(int i) { return array_[subscript(i)]; }
+
+			// get access to internal 0-origin 1D storage array
+			// (low-level, dangerous, use with caution!)
+			// ... semantics of N_array() may not be what you want
+			//     if strides specify noncontiguous storage
+			int N_array() const { return max_subscript_ + stride_i_; }
+			const T *data_array() const { return const_cast<const T *>(array_); }
+			T *data_array() { return array_; }
+
+			// constructor, destructor
+			// ... constructor initializes all array elements to T(0.0)
+			// ... omitted strides default to C storage order
+			array1d(int min_i_in, int max_i_in,
+					T *array_in = NULL, // caller-provided storage array
+										// if non-NULL
+					int stride_i_in = 0);
+			~array1d();
+
+		private:
+			// we forbid copying and passing by value
+			// by declaring the copy constructor and assignment operator
+			// private, but never defining them
+			array1d(const array1d<T> &rhs);
+			array1d<T> &operator=(const array1d<T> &rhs);
+
+		private:
+			// n.b. we declare the array pointer first in the class
+			// ==> it's probably at 0 offset
+			// ==> we may get slightly faster array access
+			T *array_; // --> new-allocated 1D storage array
+
+			// subscripting info
+			// n.b. put this next in class so it should be in the same
+			//	cpu cache line as  array_  ==> faster array access
+			int offset_, stride_i_;
+
+			// min/max array bounds
+			const int min_i_, max_i_;
+			int max_subscript_;
+
+			// n.b. put this at end of class since performance doesn't matter
+			bool we_own_array_; // true ==> array_ --> new[] array which we own
+								// false ==> array_ --> client-owned storage
+		};
+
+		//******************************************************************************
+
+		template <typename T>
+		class array2d
+		{
+		public:
+			// array info
+			int min_i() const { return min_i_; }
+			int max_i() const { return max_i_; }
+			int min_j() const { return min_j_; }
+			int max_j() const { return max_j_; }
+			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
+			int N_j() const { return jtutil::how_many_in_range(min_j_, max_j_); }
+			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
+			bool is_valid_j(int j) const { return (j >= min_j_) && (j <= max_j_); }
+			bool is_valid_ij(int i, int j) const
+			{
+				return is_valid_i(i) && is_valid_j(j);
+			}
+
+			int subscript_unchecked(int i, int j) const
+			{
+				return offset_ + stride_i_ * i + stride_j_ * j;
+			}
+			int subscript(int i, int j) const
+			{
+				// n.b. we want each assert() here to be on a separate
+				//	source line, so an assert() failure message can
+				//	pinpoint *which* index is bad
+				assert(is_valid_i(i));
+				assert(is_valid_j(j));
+				const int posn = subscript_unchecked(i, j);
+				assert(posn >= 0);
+				assert(posn <= max_subscript_);
+				return posn;
+			}
+			int subscript_offset() const { return offset_; }
+			int subscript_stride_i() const { return stride_i_; }
+			int subscript_stride_j() const { return stride_j_; }
+
+			// normal-use access functions
+			// ... rvalue
+			const T &operator()(int i, int j) const
+			{
+				return array_[subscript(i, j)];
+			}
+			// ... lvalue
+			T &operator()(int i, int j)
+			{
+				return array_[subscript(i, j)];
+			}
+
+			// get access to internal 0-origin 1D storage array
+			// (low-level, dangerous, use with caution!)
+			// ... semantics of N_array() may not be what you want
+			//     if strides specify noncontiguous storage
+			int N_array() const { return max_subscript_ + stride_j_; }
+			const T *data_array() const { return const_cast<const T *>(array_); }
+			T *data_array() { return array_; }
+
+			// constructor, destructor
+			// ... constructor initializes all array elements to T(0.0)
+			// ... omitted strides default to C storage order
+			array2d(int min_i_in, int max_i_in,
+					int min_j_in, int max_j_in,
+					T *array_in = NULL, // caller-provided storage array
+										// if non-NULL
+					int stride_i_in = 0, int stride_j_in = 0);
+			~array2d();
+
+		private:
+			// we forbid copying and passing by value
+			// by declaring the copy constructor and assignment operator
+			// private, but never defining them
+			array2d(const array2d<T> &rhs);
+			array2d<T> &operator=(const array2d<T> &rhs);
+
+		private:
+			// n.b. we declare the array pointer first in the class
+			// ==> it's probably at 0 offset
+			// ==> we may get slightly faster array access
+			T *array_; // --> new-allocated 1D storage array
+
+			// subscripting info
+			// n.b. put this next in class so it should be in the same
+			//	cpu cache line as  array_  ==> faster array access
+			int offset_, stride_i_, stride_j_;
+
+			// min/max array bounds
+			const int min_i_, max_i_;
+			const int min_j_, max_j_;
+			int max_subscript_;
+
+			// n.b. put this at end of class since performance doesn't matter
+			bool we_own_array_; // true ==> array_ --> new[] array which we own
+								// false ==> array_ --> client-owned storage
+		};
+
+		//******************************************************************************
+
+		template <typename T>
+		class array3d
+		{
+		public:
+			// array info
+			int min_i() const { return min_i_; }
+			int max_i() const { return max_i_; }
+			int min_j() const { return min_j_; }
+			int max_j() const { return max_j_; }
+			int min_k() const { return min_k_; }
+			int max_k() const { return max_k_; }
+			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
+			int N_j() const { return jtutil::how_many_in_range(min_j_, max_j_); }
+			int N_k() const { return jtutil::how_many_in_range(min_k_, max_k_); }
+			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
+			bool is_valid_j(int j) const { return (j >= min_j_) && (j <= max_j_); }
+			bool is_valid_k(int k) const { return (k >= min_k_) && (k <= max_k_); }
+			bool is_valid_ijk(int i, int j, int k) const
+			{
+				return is_valid_i(i) && is_valid_j(j) && is_valid_k(k);
+			}
+
+			int subscript_unchecked(int i, int j, int k) const
+			{
+				return offset_ + stride_i_ * i + stride_j_ * j + stride_k_ * k;
+			}
+			int subscript(int i, int j, int k) const
+			{
+				// n.b. we want each assert() here to be on a separate
+				//	source line, so an assert() failure message can
+				//	pinpoint *which* index is bad
+				assert(is_valid_i(i));
+				assert(is_valid_j(j));
+				assert(is_valid_k(k));
+				const int posn = subscript_unchecked(i, j, k);
+				assert(posn >= 0);
+				assert(posn <= max_subscript_);
+				return posn;
+			}
+			int subscript_offset() const { return offset_; }
+			int subscript_stride_i() const { return stride_i_; }
+			int subscript_stride_j() const { return stride_j_; }
+			int subscript_stride_k() const { return stride_k_; }
+
+			// normal-use access functions
+			// ... rvalue
+			const T &operator()(int i, int j, int k) const
+			{
+				return array_[subscript(i, j, k)];
+			}
+			// ... lvalue
+			T &operator()(int i, int j, int k)
+			{
+				return array_[subscript(i, j, k)];
+			}
+
+			// get access to internal 0-origin 1D storage array
+			// (low-level, dangerous, use with caution!)
+			// ... semantics of N_array() may not be what you want
+			//     if strides specify noncontiguous storage
+			int N_array() const { return max_subscript_ + stride_k_; }
+			const T *data_array() const { return const_cast<const T *>(array_); }
+			T *data_array() { return array_; }
+
+			// constructor, destructor
+			// ... constructor initializes all array elements to T(0.0)
+			// ... omitted strides default to C storage order
+			array3d(int min_i_in, int max_i_in,
+					int min_j_in, int max_j_in,
+					int min_k_in, int max_k_in,
+					T *array_in = NULL, // caller-provided storage array
+										// if non-NULL
+					int stride_i_in = 0, int stride_j_in = 0, int stride_k_in = 0);
+			~array3d();
+
+		private:
+			// we forbid copying and passing by value
+			// by declaring the copy constructor and assignment operator
+			// private, but never defining them
+			array3d(const array3d<T> &rhs);
+			array3d<T> &operator=(const array3d<T> &rhs);
+
+		private:
+			// n.b. we declare the array pointer first in the class
+			// ==> it's probably at 0 offset
+			// ==> we may get slightly faster array access
+			T *array_; // --> new-allocated 1D storage array
+
+			// subscripting info
+			// n.b. put this next in class so it should be in the same
+			//	cpu cache line as  array_  ==> faster array access
+			int offset_, stride_i_, stride_j_, stride_k_;
+
+			// min/max array bounds
+			const int min_i_, max_i_;
+			const int min_j_, max_j_;
+			const int min_k_, max_k_;
+			int max_subscript_;
+
+			// n.b. put this at end of class since performance doesn't matter
+			bool we_own_array_; // true ==> array_ --> new[] array which we own
+								// false ==> array_ --> client-owned storage
+		};
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
+
+#endif /* AHFINDERDIRECT__ARRAY_HH */
--- a/AMSS_NCKU_source/AHF_Direct/cctk.h
+++ b/AMSS_NCKU_source/AHF_Direct/cctk.h
@@ -1,58 +1,58 @@
-#ifndef _CCTK_H_
-#define _CCTK_H_ 1
-
-/* Grab the main configuration info. */
-#include "cctk_Config.h"
-
-#define CCTK_THORNSTRING "AHFinderDirect"
-
-/* Include the constants */
-#include "cctk_Constants.h"
-
-/* get the definition of ptrdiff_t */
-#include <stddef.h>
-int CCTK_VInfo(const char *thorn, const char *format, ...);
-int CCTK_VWarn(int level,
-               int line,
-               const char *file,
-               const char *thorn,
-               const char *format,
-               ...);
-#define CCTK_ERROR_INTERP_GHOST_SIZE_TOO_SMALL (-1001)
-#ifdef __cplusplus
-#define HAVE_INLINE
-#else
-#ifndef inline
-#define HAVE_INLINE
-#endif
-#endif
-
-#define CCTK_PRINTSEPARATOR \
-  printf("--------------------------------------------------------------------------------\n");
-
-#define _DECLARE_CCTK_ARGUMENTS _DECLARE_CCTK_CARGUMENTS
-#define _DECLARE_CCTK_CARGUMENTS          \
-  ptrdiff_t cctki_dummy_int;              \
-  CCTK_REAL cctk_time = cctkGH->PhysTime; \
-  int cctk_iteration = 1;                 \
-  int cctk_dim = 3;
-
-#define CCTK_EQUALS(a, b) (CCTK_Equals((a), (b)))
-
-#define CCTK_PASS_CTOC cctkGH
-
-#define CCTK_ORIGIN_SPACE(x) (cctk_origin_space[x] + cctk_delta_space[x] / cctk_levfac[x] * cctk_levoff[x] / cctk_levoffdenom[x])
-#define CCTK_DELTA_SPACE(x) (cctk_delta_space[x] / cctk_levfac[x])
-#define CCTK_DELTA_TIME (cctk_delta_time / cctk_timefac)
-#define CCTK_LSSH(stag, dim) cctk_lssh[(stag) + CCTK_NSTAGGER * (dim)]
-#define CCTK_LSSH_IDX(stag, dim) ((stag) + CCTK_NSTAGGER * (dim))
-
-#define CCTK_WARN(a, b) CCTK_Warn(a, __LINE__, __FILE__, CCTK_THORNSTRING, b)
-
-#define CCTK_MALLOC(s) CCTKi_Malloc(s, __LINE__, __FILE__)
-#define CCTK_FREE(p) CCTKi_Free(p)
-
-#define CCTK_INFO(a) CCTK_Info(CCTK_THORNSTRING, (a))
-#define CCTK_PARAMWARN(a) CCTK_ParamWarn(CCTK_THORNSTRING, (a))
-
-#endif
+#ifndef _CCTK_H_
+#define _CCTK_H_ 1
+
+/* Grab the main configuration info. */
+#include "cctk_Config.h"
+
+#define CCTK_THORNSTRING "AHFinderDirect"
+
+/* Include the constants */
+#include "cctk_Constants.h"
+
+/* get the definition of ptrdiff_t */
+#include <stddef.h>
+int CCTK_VInfo(const char *thorn, const char *format, ...);
+int CCTK_VWarn(int level,
+               int line,
+               const char *file,
+               const char *thorn,
+               const char *format,
+               ...);
+#define CCTK_ERROR_INTERP_GHOST_SIZE_TOO_SMALL (-1001)
+#ifdef __cplusplus
+#define HAVE_INLINE
+#else
+#ifndef inline
+#define HAVE_INLINE
+#endif
+#endif
+
+#define CCTK_PRINTSEPARATOR \
+  printf("--------------------------------------------------------------------------------\n");
+
+#define _DECLARE_CCTK_ARGUMENTS _DECLARE_CCTK_CARGUMENTS
+#define _DECLARE_CCTK_CARGUMENTS          \
+  ptrdiff_t cctki_dummy_int;              \
+  CCTK_REAL cctk_time = cctkGH->PhysTime; \
+  int cctk_iteration = 1;                 \
+  int cctk_dim = 3;
+
+#define CCTK_EQUALS(a, b) (CCTK_Equals((a), (b)))
+
+#define CCTK_PASS_CTOC cctkGH
+
+#define CCTK_ORIGIN_SPACE(x) (cctk_origin_space[x] + cctk_delta_space[x] / cctk_levfac[x] * cctk_levoff[x] / cctk_levoffdenom[x])
+#define CCTK_DELTA_SPACE(x) (cctk_delta_space[x] / cctk_levfac[x])
+#define CCTK_DELTA_TIME (cctk_delta_time / cctk_timefac)
+#define CCTK_LSSH(stag, dim) cctk_lssh[(stag) + CCTK_NSTAGGER * (dim)]
+#define CCTK_LSSH_IDX(stag, dim) ((stag) + CCTK_NSTAGGER * (dim))
+
+#define CCTK_WARN(a, b) CCTK_Warn(a, __LINE__, __FILE__, CCTK_THORNSTRING, b)
+
+#define CCTK_MALLOC(s) CCTKi_Malloc(s, __LINE__, __FILE__)
+#define CCTK_FREE(p) CCTKi_Free(p)
+
+#define CCTK_INFO(a) CCTK_Info(CCTK_THORNSTRING, (a))
+#define CCTK_PARAMWARN(a) CCTK_ParamWarn(CCTK_THORNSTRING, (a))
+
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/cctk_Config.h
+++ b/AMSS_NCKU_source/AHF_Direct/cctk_Config.h
@@ -1,168 +1,168 @@
-#ifndef _CCTK_CONFIG_H_
-#define _CCTK_CONFIG_H_
-
-#define STDC_HEADERS 1
-
-#define CCTK_FCALL 
-
-#define HAVE_GETHOSTBYNAME 1
-#define HAVE_GETOPT_LONG_ONLY 1
-#define HAVE_CRYPT 1
-#define HAVE_FINITE 1
-#define HAVE_ISNAN 1
-#define HAVE_ISINF 1
-#define HAVE_MKSTEMP 1
-#define HAVE_VA_COPY 1
-
-/* Do we have mode_t ? */
-#define HAVE_MODE_T 1
-
-#define HAVE_SOCKLEN_T 1
-#ifdef HAVE_SOCKLEN_T
-#  define CCTK_SOCKLEN_T socklen_t
-#else
-#  define CCTK_SOCKLEN_T int
-#endif
-
-#define HAVE_TIME_H 1
-#define HAVE_SYS_IOCTL_H 1
-#define HAVE_SYS_SOCKET_H 1
-#define HAVE_SYS_TIME_H 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_UNISTD_H 1
-#define HAVE_STRING_H 1
-#define HAVE_ASSERT_H 1
-#define HAVE_TGMATH_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_GETOPT_H 1
-#define HAVE_REGEX_H 1
-#define HAVE_NETINET_IN_H 1
-#define HAVE_NETDB_H 1
-#define HAVE_ARPA_INET_H 1
-#define HAVE_CRYPT_H 1
-#define HAVE_DIRENT_H 1
-#define HAVE_SIGNAL_H 1
-#define HAVE_MALLOC_H 1
-#define HAVE_MALLINFO 1
-#define HAVE_MALLOPT 1
-#define HAVE_M_MMAP_THRESHOLD_VALUE 1
-
-#define TIME_WITH_SYS_TIME 1
-
-#define HAVE_VECTOR 1
-#define HAVE_VECTOR_H 1
-
-#define GETTIMEOFDAY_NEEDS_TIMEZONE 1
-
-#define CCTK_CACHELINE_BYTES 64
-#define CCTK_CACHE_SIZE 1024*1024
-
-#define NULL_DEVICE "/dev/null"
-
-#define CCTK_BUILD_OS "linux-gnu"
-#define CCTK_BUILD_CPU "x86_64"
-#define CCTK_BUILD_VENDOR "unknown"
-
-#define SIZEOF_SHORT_INT 2
-#define SIZEOF_INT 4
-#define SIZEOF_LONG_INT 8
-#define SIZEOF_LONG_LONG 8
-#define SIZEOF_LONG_DOUBLE 16
-#define SIZEOF_DOUBLE 8
-#define SIZEOF_FLOAT 4
-#define SIZEOF_CHAR_P 8
-
-#define CCTK_REAL_PRECISION_8 1
-
-#define CCTK_INTEGER_PRECISION_4 1
-
-#define HAVE_CCTK_INT8 1
-#define HAVE_CCTK_INT4 1
-#define HAVE_CCTK_INT2 1
-#define HAVE_CCTK_INT1 1
-
-#define HAVE_CCTK_REAL16 1
-#define HAVE_CCTK_REAL8 1
-#define HAVE_CCTK_REAL4 1
-
-#define CCTK_INT8 long int
-#define CCTK_INT4 int
-#define CCTK_INT2 short int
-#define CCTK_INT1 signed char
-
-#define CCTK_REAL16 long double
-#define CCTK_REAL8 double
-#define CCTK_REAL4 float
-
-#ifndef __cplusplus
-
-#ifdef CCTK_C_RESTRICT
-#define restrict CCTK_C_RESTRICT
-#endif
-
-/* Allow the use of CCTK_RESTRICT as a qualifier always. */
-#ifdef CCTK_C_RESTRICT
-#define CCTK_RESTRICT CCTK_C_RESTRICT
-#else
-#define CCTK_RESTRICT restrict
-#endif
-
-#ifdef HAVE_CCTK_C_BOOL
-#define CCTK_HAVE_C_BOOL
-#endif
-
-#endif /* ! defined __cplusplus */
-/****************************************************************************/
-
-/****************************************************************************/
-/* C++ specific stuff */
-/****************************************************************************/
-#ifdef __cplusplus
-
-/* Some C++ compilers don't have bool ! */
-#define HAVE_CCTK_CXX_BOOL 1
-
-#ifndef HAVE_CCTK_CXX_BOOL
-typedef enum {false, true} bool;
-#else
-/* deprecated in beta15 */
-#define CCTK_HAVE_CXX_BOOL
-#endif
-
-/* Some C++ compilers recognise the restrict keyword */
-#define CCTK_CXX_RESTRICT __restrict__
-
-/* Since this is non-standard leave commented out for the moment */
-#if 0
-/* Define to empty if the keyword does not work. */
-#ifdef CCTK_CXX_RESTRICT
-#define restrict CCTK_CXX_RESTRICT
-#endif
-#endif
-
-/* Allow the use of CCTK_RESTRICT as a qualifier always. */
-#ifdef CCTK_CXX_RESTRICT
-#define CCTK_RESTRICT CCTK_CXX_RESTRICT
-#else
-#define CCTK_RESTRICT restrict
-#endif
-
-#endif /* __cplusplus */
-/****************************************************************************/
-
-#ifdef FCODE
-
-#define HAVE_CCTK_FORTRAN_REAL4 1
-#define HAVE_CCTK_FORTRAN_REAL8 1
-#define HAVE_CCTK_FORTRAN_REAL16 1
-
-#define HAVE_CCTK_FORTRAN_COMPLEX8 1
-#define HAVE_CCTK_FORTRAN_COMPLEX16 1
-#define HAVE_CCTK_FORTRAN_COMPLEX32 1
-
-#endif /* FCODE */
-
-/* Now include the code to pick an appropriate precison for reals and ints */
-#include "cctk_Types.h"
-
-#endif /* _CCTK_CONFIG_H_ */
+#ifndef _CCTK_CONFIG_H_
+#define _CCTK_CONFIG_H_
+
+#define STDC_HEADERS 1
+
+#define CCTK_FCALL 
+
+#define HAVE_GETHOSTBYNAME 1
+#define HAVE_GETOPT_LONG_ONLY 1
+#define HAVE_CRYPT 1
+#define HAVE_FINITE 1
+#define HAVE_ISNAN 1
+#define HAVE_ISINF 1
+#define HAVE_MKSTEMP 1
+#define HAVE_VA_COPY 1
+
+/* Do we have mode_t ? */
+#define HAVE_MODE_T 1
+
+#define HAVE_SOCKLEN_T 1
+#ifdef HAVE_SOCKLEN_T
+#  define CCTK_SOCKLEN_T socklen_t
+#else
+#  define CCTK_SOCKLEN_T int
+#endif
+
+#define HAVE_TIME_H 1
+#define HAVE_SYS_IOCTL_H 1
+#define HAVE_SYS_SOCKET_H 1
+#define HAVE_SYS_TIME_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_UNISTD_H 1
+#define HAVE_STRING_H 1
+#define HAVE_ASSERT_H 1
+#define HAVE_TGMATH_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_GETOPT_H 1
+#define HAVE_REGEX_H 1
+#define HAVE_NETINET_IN_H 1
+#define HAVE_NETDB_H 1
+#define HAVE_ARPA_INET_H 1
+#define HAVE_CRYPT_H 1
+#define HAVE_DIRENT_H 1
+#define HAVE_SIGNAL_H 1
+#define HAVE_MALLOC_H 1
+#define HAVE_MALLINFO 1
+#define HAVE_MALLOPT 1
+#define HAVE_M_MMAP_THRESHOLD_VALUE 1
+
+#define TIME_WITH_SYS_TIME 1
+
+#define HAVE_VECTOR 1
+#define HAVE_VECTOR_H 1
+
+#define GETTIMEOFDAY_NEEDS_TIMEZONE 1
+
+#define CCTK_CACHELINE_BYTES 64
+#define CCTK_CACHE_SIZE 1024*1024
+
+#define NULL_DEVICE "/dev/null"
+
+#define CCTK_BUILD_OS "linux-gnu"
+#define CCTK_BUILD_CPU "x86_64"
+#define CCTK_BUILD_VENDOR "unknown"
+
+#define SIZEOF_SHORT_INT 2
+#define SIZEOF_INT 4
+#define SIZEOF_LONG_INT 8
+#define SIZEOF_LONG_LONG 8
+#define SIZEOF_LONG_DOUBLE 16
+#define SIZEOF_DOUBLE 8
+#define SIZEOF_FLOAT 4
+#define SIZEOF_CHAR_P 8
+
+#define CCTK_REAL_PRECISION_8 1
+
+#define CCTK_INTEGER_PRECISION_4 1
+
+#define HAVE_CCTK_INT8 1
+#define HAVE_CCTK_INT4 1
+#define HAVE_CCTK_INT2 1
+#define HAVE_CCTK_INT1 1
+
+#define HAVE_CCTK_REAL16 1
+#define HAVE_CCTK_REAL8 1
+#define HAVE_CCTK_REAL4 1
+
+#define CCTK_INT8 long int
+#define CCTK_INT4 int
+#define CCTK_INT2 short int
+#define CCTK_INT1 signed char
+
+#define CCTK_REAL16 long double
+#define CCTK_REAL8 double
+#define CCTK_REAL4 float
+
+#ifndef __cplusplus
+
+#ifdef CCTK_C_RESTRICT
+#define restrict CCTK_C_RESTRICT
+#endif
+
+/* Allow the use of CCTK_RESTRICT as a qualifier always. */
+#ifdef CCTK_C_RESTRICT
+#define CCTK_RESTRICT CCTK_C_RESTRICT
+#else
+#define CCTK_RESTRICT restrict
+#endif
+
+#ifdef HAVE_CCTK_C_BOOL
+#define CCTK_HAVE_C_BOOL
+#endif
+
+#endif /* ! defined __cplusplus */
+/****************************************************************************/
+
+/****************************************************************************/
+/* C++ specific stuff */
+/****************************************************************************/
+#ifdef __cplusplus
+
+/* Some C++ compilers don't have bool ! */
+#define HAVE_CCTK_CXX_BOOL 1
+
+#ifndef HAVE_CCTK_CXX_BOOL
+typedef enum {false, true} bool;
+#else
+/* deprecated in beta15 */
+#define CCTK_HAVE_CXX_BOOL
+#endif
+
+/* Some C++ compilers recognise the restrict keyword */
+#define CCTK_CXX_RESTRICT __restrict__
+
+/* Since this is non-standard leave commented out for the moment */
+#if 0
+/* Define to empty if the keyword does not work. */
+#ifdef CCTK_CXX_RESTRICT
+#define restrict CCTK_CXX_RESTRICT
+#endif
+#endif
+
+/* Allow the use of CCTK_RESTRICT as a qualifier always. */
+#ifdef CCTK_CXX_RESTRICT
+#define CCTK_RESTRICT CCTK_CXX_RESTRICT
+#else
+#define CCTK_RESTRICT restrict
+#endif
+
+#endif /* __cplusplus */
+/****************************************************************************/
+
+#ifdef FCODE
+
+#define HAVE_CCTK_FORTRAN_REAL4 1
+#define HAVE_CCTK_FORTRAN_REAL8 1
+#define HAVE_CCTK_FORTRAN_REAL16 1
+
+#define HAVE_CCTK_FORTRAN_COMPLEX8 1
+#define HAVE_CCTK_FORTRAN_COMPLEX16 1
+#define HAVE_CCTK_FORTRAN_COMPLEX32 1
+
+#endif /* FCODE */
+
+/* Now include the code to pick an appropriate precison for reals and ints */
+#include "cctk_Types.h"
+
+#endif /* _CCTK_CONFIG_H_ */
--- a/AMSS_NCKU_source/AHF_Direct/cctk_Constants.h
+++ b/AMSS_NCKU_source/AHF_Direct/cctk_Constants.h
@@ -1,57 +1,57 @@
-#ifndef _CCTK_CONSTANTS_H_
-#define _CCTK_CONSTANTS_H_
-
-#define CCTK_VARIABLE_VOID             100
-#define CCTK_VARIABLE_BYTE             101
-#define CCTK_VARIABLE_INT              102
-#define CCTK_VARIABLE_INT1             103
-#define CCTK_VARIABLE_INT2             104
-#define CCTK_VARIABLE_INT4             105
-#define CCTK_VARIABLE_INT8             106
-#define CCTK_VARIABLE_REAL             107
-#define CCTK_VARIABLE_REAL4            108
-#define CCTK_VARIABLE_REAL8            109
-#define CCTK_VARIABLE_REAL16           110
-#define CCTK_VARIABLE_COMPLEX          111
-#define CCTK_VARIABLE_COMPLEX8         112
-#define CCTK_VARIABLE_COMPLEX16        113
-#define CCTK_VARIABLE_COMPLEX32        114
-#define CCTK_VARIABLE_CHAR             115
-#define CCTK_VARIABLE_STRING           116
-#define CCTK_VARIABLE_POINTER          117
-#define CCTK_VARIABLE_POINTER_TO_CONST 118
-#define CCTK_VARIABLE_FPOINTER         119
-
-/* DEPRECATED IN BETA 12 */
-#define CCTK_VARIABLE_FN_POINTER CCTK_VARIABLE_FPOINTER
-
-/* steerable status of parameters */
-#define CCTK_STEERABLE_NEVER   200
-#define CCTK_STEERABLE_ALWAYS  201
-#define CCTK_STEERABLE_RECOVER 202
-
-/* number of staggerings */
-#define CCTK_NSTAGGER      3
-
-/* group distributions */
-#define CCTK_DISTRIB_CONSTANT 301
-#define CCTK_DISTRIB_DEFAULT  302
-
-/* group types */
-#define CCTK_SCALAR 401
-#define CCTK_GF     402
-#define CCTK_ARRAY  403
-
-/* group scopes */
-#define CCTK_PRIVATE   501
-#define CCTK_PROTECTED 502
-#define CCTK_PUBLIC    503
-
-/* constants for CCTK_TraverseString() */
-#define CCTK_VAR          601
-#define CCTK_GROUP        602
-#define CCTK_GROUP_OR_VAR 603
-
-
-#endif /* _CCTK_CONSTANTS_ */
-
+#ifndef _CCTK_CONSTANTS_H_
+#define _CCTK_CONSTANTS_H_
+
+#define CCTK_VARIABLE_VOID             100
+#define CCTK_VARIABLE_BYTE             101
+#define CCTK_VARIABLE_INT              102
+#define CCTK_VARIABLE_INT1             103
+#define CCTK_VARIABLE_INT2             104
+#define CCTK_VARIABLE_INT4             105
+#define CCTK_VARIABLE_INT8             106
+#define CCTK_VARIABLE_REAL             107
+#define CCTK_VARIABLE_REAL4            108
+#define CCTK_VARIABLE_REAL8            109
+#define CCTK_VARIABLE_REAL16           110
+#define CCTK_VARIABLE_COMPLEX          111
+#define CCTK_VARIABLE_COMPLEX8         112
+#define CCTK_VARIABLE_COMPLEX16        113
+#define CCTK_VARIABLE_COMPLEX32        114
+#define CCTK_VARIABLE_CHAR             115
+#define CCTK_VARIABLE_STRING           116
+#define CCTK_VARIABLE_POINTER          117
+#define CCTK_VARIABLE_POINTER_TO_CONST 118
+#define CCTK_VARIABLE_FPOINTER         119
+
+/* DEPRECATED IN BETA 12 */
+#define CCTK_VARIABLE_FN_POINTER CCTK_VARIABLE_FPOINTER
+
+/* steerable status of parameters */
+#define CCTK_STEERABLE_NEVER   200
+#define CCTK_STEERABLE_ALWAYS  201
+#define CCTK_STEERABLE_RECOVER 202
+
+/* number of staggerings */
+#define CCTK_NSTAGGER      3
+
+/* group distributions */
+#define CCTK_DISTRIB_CONSTANT 301
+#define CCTK_DISTRIB_DEFAULT  302
+
+/* group types */
+#define CCTK_SCALAR 401
+#define CCTK_GF     402
+#define CCTK_ARRAY  403
+
+/* group scopes */
+#define CCTK_PRIVATE   501
+#define CCTK_PROTECTED 502
+#define CCTK_PUBLIC    503
+
+/* constants for CCTK_TraverseString() */
+#define CCTK_VAR          601
+#define CCTK_GROUP        602
+#define CCTK_GROUP_OR_VAR 603
+
+
+#endif /* _CCTK_CONSTANTS_ */
+
--- a/AMSS_NCKU_source/AHF_Direct/cctk_Types.h
+++ b/AMSS_NCKU_source/AHF_Direct/cctk_Types.h
@@ -1,180 +1,180 @@
-#ifndef _CCTK_TYPES_H_
-#define _CCTK_TYPES_H_
-
-#ifndef _CCTK_CONFIG_H_
-#include "cctk_Config.h"
-#endif
-
-typedef void *CCTK_POINTER;
-typedef const void *CCTK_POINTER_TO_CONST;
-typedef void (*CCTK_FPOINTER)(void);
-#define HAVE_CCTK_POINTER 1
-#define HAVE_CCTK_POINTER_TO_CONST 1
-#define HAVE_CCTK_FPOINTER 1
-
-/* Character types */
-typedef char CCTK_CHAR;
-typedef const char * CCTK_STRING;
-#define HAVE_CCTK_CHAR 1
-#define HAVE_CCTK_STRING 1
-
-/* Structures for complex types */
-
-#ifdef HAVE_CCTK_REAL16
-#define HAVE_CCTK_COMPLEX32 1
-typedef struct CCTK_COMPLEX32
-{
-  CCTK_REAL16 Re;
-  CCTK_REAL16 Im;
-#ifdef __cplusplus
-  CCTK_REAL16 real() const { return Re; }
-  CCTK_REAL16 imag() const { return Im; }
-#endif
-} CCTK_COMPLEX32;
-#endif
-
-#ifdef HAVE_CCTK_REAL8
-#define HAVE_CCTK_COMPLEX16 1
-typedef struct CCTK_COMPLEX16
-{
-  CCTK_REAL8 Re;
-  CCTK_REAL8 Im;
-#ifdef __cplusplus
-  CCTK_REAL8 real() const { return Re; }
-  CCTK_REAL8 imag() const { return Im; }
-#endif
-} CCTK_COMPLEX16;
-#endif
-
-#ifdef HAVE_CCTK_REAL4
-#define HAVE_CCTK_COMPLEX8 1
-typedef struct CCTK_COMPLEX8
-{
-  CCTK_REAL4 Re;
-  CCTK_REAL4 Im;
-#ifdef __cplusplus
-  CCTK_REAL4 real() const { return Re; }
-  CCTK_REAL4 imag() const { return Im; }
-#endif
-} CCTK_COMPLEX8;
-#endif
-
-/* Small positive integer type */
-typedef unsigned char CCTK_BYTE;
-#define HAVE_CCTK_BYTE 1
-
-/* Define stuff for fortran. */
-#ifdef FCODE
-
-#define CCTK_POINTER          integer*SIZEOF_CHAR_P
-#define CCTK_POINTER_TO_CONST integer*SIZEOF_CHAR_P
-/* TODO: add autoconf for determining the size of function pointers */
-#define CCTK_FPOINTER         integer*SIZEOF_CHAR_P
-#define HAVE_CCTK_POINTER 1
-#define HAVE_CCTK_POINTER_TO_CONST 1
-#define HAVE_CCTK_FPOINTER 1
-
-/* Character types */
-/* A single character does not exist in Fortran; in Fortran, all
-   character types are strings.  Hence we do not define CCTK_CHAR.  */
-/* #define CCTK_CHAR   CHARACTER */
-/* #define HAVE_CCTK_CHAR 1 */
-/* This is a C-string, i.e., only a pointer */
-#define CCTK_STRING CCTK_POINTER_TO_CONST
-#define HAVE_CCTK_STRING 1
-
-#ifdef HAVE_CCTK_INT8
-#define CCTK_INT8 INTEGER*8
-#endif
-#ifdef HAVE_CCTK_INT4
-#define CCTK_INT4 INTEGER*4
-#endif
-#ifdef HAVE_CCTK_INT2
-#define CCTK_INT2 INTEGER*2
-#endif
-#ifdef HAVE_CCTK_INT1
-#define CCTK_INT1 INTEGER*1
-#endif
-
-#ifdef HAVE_CCTK_REAL16
-#define CCTK_REAL16 REAL*16
-#define HAVE_CCTK_COMPLEX32 1
-#define CCTK_COMPLEX32  COMPLEX*32
-#endif
-
-#ifdef HAVE_CCTK_REAL8
-#define CCTK_REAL8  REAL*8
-#define HAVE_CCTK_COMPLEX16 1
-#define CCTK_COMPLEX16  COMPLEX*16
-#endif
-
-#ifdef HAVE_CCTK_REAL4
-#define CCTK_REAL4  REAL*4
-#define HAVE_CCTK_COMPLEX8 1
-#define CCTK_COMPLEX8   COMPLEX*8
-#endif
-
-/* Should be unsigned, but Fortran doesn't have that */
-#define CCTK_BYTE INTEGER*1
-#define HAVE_CCTK_BYTE 1
-
-#endif /*FCODE */
-
-/* Now pick the types based upon the precision variable. */
-
-/* Floating point precision */
-#ifdef CCTK_REAL_PRECISION_16
-#define CCTK_REAL_PRECISION 16
-#define CCTK_REAL CCTK_REAL16
-#endif
-
-#ifdef CCTK_REAL_PRECISION_8
-#define CCTK_REAL_PRECISION 8
-#define CCTK_REAL CCTK_REAL8
-#endif
-
-#ifdef CCTK_REAL_PRECISION_4
-#define CCTK_REAL_PRECISION 4
-#define CCTK_REAL CCTK_REAL4
-#endif
-
-/* Integer precision */
-
-#ifdef CCTK_INTEGER_PRECISION_8
-#define CCTK_INTEGER_PRECISION 8
-#define CCTK_INT CCTK_INT8
-#endif
-
-#ifdef CCTK_INTEGER_PRECISION_4
-#define CCTK_INTEGER_PRECISION 4
-#define CCTK_INT CCTK_INT4
-#endif
-
-#ifdef CCTK_INTEGER_PRECISION_2
-#define CCTK_INTEGER_PRECISION 2
-#define CCTK_INT CCTK_INT2
-#endif
-
-#ifdef CCTK_INTEGER_PRECISION_1
-#define CCTK_INTEGER_PRECISION 1
-#define CCTK_INT CCTK_INT1
-#endif
-
-/* Complex precision */
-#ifdef CCTK_REAL_PRECISION_16
-#define CCTK_COMPLEX_PRECISION 32
-#define CCTK_COMPLEX CCTK_COMPLEX32
-#endif
-
-#ifdef CCTK_REAL_PRECISION_8
-#define CCTK_COMPLEX_PRECISION 16
-#define CCTK_COMPLEX CCTK_COMPLEX16
-#endif
-
-#ifdef CCTK_REAL_PRECISION_4
-#define CCTK_COMPLEX_PRECISION 8
-#define CCTK_COMPLEX CCTK_COMPLEX8
-#endif
-
-#endif /*_CCTK_TYPES_H_ */
-
+#ifndef _CCTK_TYPES_H_
+#define _CCTK_TYPES_H_
+
+#ifndef _CCTK_CONFIG_H_
+#include "cctk_Config.h"
+#endif
+
+typedef void *CCTK_POINTER;
+typedef const void *CCTK_POINTER_TO_CONST;
+typedef void (*CCTK_FPOINTER)(void);
+#define HAVE_CCTK_POINTER 1
+#define HAVE_CCTK_POINTER_TO_CONST 1
+#define HAVE_CCTK_FPOINTER 1
+
+/* Character types */
+typedef char CCTK_CHAR;
+typedef const char * CCTK_STRING;
+#define HAVE_CCTK_CHAR 1
+#define HAVE_CCTK_STRING 1
+
+/* Structures for complex types */
+
+#ifdef HAVE_CCTK_REAL16
+#define HAVE_CCTK_COMPLEX32 1
+typedef struct CCTK_COMPLEX32
+{
+  CCTK_REAL16 Re;
+  CCTK_REAL16 Im;
+#ifdef __cplusplus
+  CCTK_REAL16 real() const { return Re; }
+  CCTK_REAL16 imag() const { return Im; }
+#endif
+} CCTK_COMPLEX32;
+#endif
+
+#ifdef HAVE_CCTK_REAL8
+#define HAVE_CCTK_COMPLEX16 1
+typedef struct CCTK_COMPLEX16
+{
+  CCTK_REAL8 Re;
+  CCTK_REAL8 Im;
+#ifdef __cplusplus
+  CCTK_REAL8 real() const { return Re; }
+  CCTK_REAL8 imag() const { return Im; }
+#endif
+} CCTK_COMPLEX16;
+#endif
+
+#ifdef HAVE_CCTK_REAL4
+#define HAVE_CCTK_COMPLEX8 1
+typedef struct CCTK_COMPLEX8
+{
+  CCTK_REAL4 Re;
+  CCTK_REAL4 Im;
+#ifdef __cplusplus
+  CCTK_REAL4 real() const { return Re; }
+  CCTK_REAL4 imag() const { return Im; }
+#endif
+} CCTK_COMPLEX8;
+#endif
+
+/* Small positive integer type */
+typedef unsigned char CCTK_BYTE;
+#define HAVE_CCTK_BYTE 1
+
+/* Define stuff for fortran. */
+#ifdef FCODE
+
+#define CCTK_POINTER          integer*SIZEOF_CHAR_P
+#define CCTK_POINTER_TO_CONST integer*SIZEOF_CHAR_P
+/* TODO: add autoconf for determining the size of function pointers */
+#define CCTK_FPOINTER         integer*SIZEOF_CHAR_P
+#define HAVE_CCTK_POINTER 1
+#define HAVE_CCTK_POINTER_TO_CONST 1
+#define HAVE_CCTK_FPOINTER 1
+
+/* Character types */
+/* A single character does not exist in Fortran; in Fortran, all
+   character types are strings.  Hence we do not define CCTK_CHAR.  */
+/* #define CCTK_CHAR   CHARACTER */
+/* #define HAVE_CCTK_CHAR 1 */
+/* This is a C-string, i.e., only a pointer */
+#define CCTK_STRING CCTK_POINTER_TO_CONST
+#define HAVE_CCTK_STRING 1
+
+#ifdef HAVE_CCTK_INT8
+#define CCTK_INT8 INTEGER*8
+#endif
+#ifdef HAVE_CCTK_INT4
+#define CCTK_INT4 INTEGER*4
+#endif
+#ifdef HAVE_CCTK_INT2
+#define CCTK_INT2 INTEGER*2
+#endif
+#ifdef HAVE_CCTK_INT1
+#define CCTK_INT1 INTEGER*1
+#endif
+
+#ifdef HAVE_CCTK_REAL16
+#define CCTK_REAL16 REAL*16
+#define HAVE_CCTK_COMPLEX32 1
+#define CCTK_COMPLEX32  COMPLEX*32
+#endif
+
+#ifdef HAVE_CCTK_REAL8
+#define CCTK_REAL8  REAL*8
+#define HAVE_CCTK_COMPLEX16 1
+#define CCTK_COMPLEX16  COMPLEX*16
+#endif
+
+#ifdef HAVE_CCTK_REAL4
+#define CCTK_REAL4  REAL*4
+#define HAVE_CCTK_COMPLEX8 1
+#define CCTK_COMPLEX8   COMPLEX*8
+#endif
+
+/* Should be unsigned, but Fortran doesn't have that */
+#define CCTK_BYTE INTEGER*1
+#define HAVE_CCTK_BYTE 1
+
+#endif /*FCODE */
+
+/* Now pick the types based upon the precision variable. */
+
+/* Floating point precision */
+#ifdef CCTK_REAL_PRECISION_16
+#define CCTK_REAL_PRECISION 16
+#define CCTK_REAL CCTK_REAL16
+#endif
+
+#ifdef CCTK_REAL_PRECISION_8
+#define CCTK_REAL_PRECISION 8
+#define CCTK_REAL CCTK_REAL8
+#endif
+
+#ifdef CCTK_REAL_PRECISION_4
+#define CCTK_REAL_PRECISION 4
+#define CCTK_REAL CCTK_REAL4
+#endif
+
+/* Integer precision */
+
+#ifdef CCTK_INTEGER_PRECISION_8
+#define CCTK_INTEGER_PRECISION 8
+#define CCTK_INT CCTK_INT8
+#endif
+
+#ifdef CCTK_INTEGER_PRECISION_4
+#define CCTK_INTEGER_PRECISION 4
+#define CCTK_INT CCTK_INT4
+#endif
+
+#ifdef CCTK_INTEGER_PRECISION_2
+#define CCTK_INTEGER_PRECISION 2
+#define CCTK_INT CCTK_INT2
+#endif
+
+#ifdef CCTK_INTEGER_PRECISION_1
+#define CCTK_INTEGER_PRECISION 1
+#define CCTK_INT CCTK_INT1
+#endif
+
+/* Complex precision */
+#ifdef CCTK_REAL_PRECISION_16
+#define CCTK_COMPLEX_PRECISION 32
+#define CCTK_COMPLEX CCTK_COMPLEX32
+#endif
+
+#ifdef CCTK_REAL_PRECISION_8
+#define CCTK_COMPLEX_PRECISION 16
+#define CCTK_COMPLEX CCTK_COMPLEX16
+#endif
+
+#ifdef CCTK_REAL_PRECISION_4
+#define CCTK_COMPLEX_PRECISION 8
+#define CCTK_COMPLEX CCTK_COMPLEX8
+#endif
+
+#endif /*_CCTK_TYPES_H_ */
+
--- a/AMSS_NCKU_source/AHF_Direct/config.h
+++ b/AMSS_NCKU_source/AHF_Direct/config.h
@@ -1,16 +1,16 @@
-#ifndef AHFINDERDIRECT__CONFIG_H
-#define AHFINDERDIRECT__CONFIG_H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-
-size_t Util_Strlcat(char* dst, const char* src, size_t dst_size);
-size_t Util_Strlcpy(char* dst, const char* src, size_t dst_size);
-
-typedef CCTK_REAL fp;
-
-typedef CCTK_INT integer;
-
-#endif	/* AHFINDERDIRECT__CONFIG_H */
+#ifndef AHFINDERDIRECT__CONFIG_H
+#define AHFINDERDIRECT__CONFIG_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+size_t Util_Strlcat(char* dst, const char* src, size_t dst_size);
+size_t Util_Strlcpy(char* dst, const char* src, size_t dst_size);
+
+typedef CCTK_REAL fp;
+
+typedef CCTK_INT integer;
+
+#endif	/* AHFINDERDIRECT__CONFIG_H */
--- a/AMSS_NCKU_source/AHF_Direct/coords.C
+++ b/AMSS_NCKU_source/AHF_Direct/coords.C
--- a/AMSS_NCKU_source/AHF_Direct/coords.h
+++ b/AMSS_NCKU_source/AHF_Direct/coords.h
@@ -1,173 +1,173 @@
-#ifndef COORDS_H
-#define COORDS_H
-namespace AHFinderDirect
-{
-	namespace local_coords
-	{
-
-		// compare if two angles are fuzzily equal mod 2*pi radians (360 degrees)
-		bool fuzzy_EQ_ang(fp ang1, fp ang2);	// radians
-		bool fuzzy_EQ_dang(fp dang1, fp dang2); // degrees
-
-		// modulo-reduce  {ang,dang}  to be (fuzzily) within the range
-		// [min,max]_{ang,dang}, or error_exit() if no such value exists
-		fp modulo_reduce_ang(fp ang, fp min_ang, fp max_ang);
-		fp modulo_reduce_dang(fp dang, fp min_dang, fp max_dang);
-
-	} // close namespace local_coords::
-
-	namespace local_coords
-	{
-		// (r,(mu,nu,phi)) <--> (x,y,z)
-		void xyz_of_r_mu_nu(fp r, fp mu, fp nu, fp &x, fp &y, fp &z);
-		void xyz_of_r_mu_phi(fp r, fp mu, fp phi, fp &x, fp &y, fp &z);
-		void xyz_of_r_nu_phi(fp r, fp nu, fp phi, fp &x, fp &y, fp &z);
-		fp r_of_xyz(fp x, fp y, fp z);
-		fp mu_of_yz(fp y, fp z);
-		fp nu_of_xz(fp x, fp z);
-		fp phi_of_xy(fp x, fp y);
-
-		// ((mu,nu,phi)) --> the 3rd
-		fp phi_of_mu_nu(fp mu, fp nu);
-		fp nu_of_mu_phi(fp mu, fp phi);
-		fp mu_of_nu_phi(fp nu, fp phi);
-
-		// partial {x,y,z} / partial {mu,nu,phi}
-		void partial_xyz_wrt_r_mu_nu(fp r, fp mu, fp nu,
-									 fp &partial_x_wrt_r, fp &partial_x_wrt_mu, fp &partial_x_wrt_nu,
-									 fp &partial_y_wrt_r, fp &partial_y_wrt_mu, fp &partial_y_wrt_nu,
-									 fp &partial_z_wrt_r, fp &partial_z_wrt_mu, fp &partial_z_wrt_nu);
-		void partial_xyz_wrt_r_mu_phi(fp r, fp mu, fp phi,
-									  fp &partial_x_wrt_r, fp &partial_x_wrt_mu, fp &partial_x_wrt_phi,
-									  fp &partial_y_wrt_r, fp &partial_y_wrt_mu, fp &partial_y_wrt_phi,
-									  fp &partial_z_wrt_r, fp &partial_z_wrt_mu, fp &partial_z_wrt_phi);
-		void partial_xyz_wrt_r_nu_phi(fp r, fp nu, fp phi,
-									  fp &partial_x_wrt_r, fp &partial_x_wrt_nu, fp &partial_x_wrt_phi,
-									  fp &partial_y_wrt_r, fp &partial_y_wrt_nu, fp &partial_y_wrt_phi,
-									  fp &partial_z_wrt_r, fp &partial_z_wrt_nu, fp &partial_z_wrt_phi);
-
-		// partial {mu,nu,phi} / partial {x,y,z}
-		fp partial_mu_wrt_y(fp y, fp z);
-		fp partial_mu_wrt_z(fp y, fp z);
-		fp partial_nu_wrt_x(fp x, fp z);
-		fp partial_nu_wrt_z(fp x, fp z);
-		fp partial_phi_wrt_x(fp x, fp y);
-		fp partial_phi_wrt_y(fp x, fp y);
-
-		// partial^2 {mu,nu,phi} / partial {x,y,z}{x,y,z}
-		fp partial2_mu_wrt_yy(fp y, fp z);
-		fp partial2_mu_wrt_yz(fp y, fp z);
-		fp partial2_mu_wrt_zz(fp y, fp z);
-		fp partial2_nu_wrt_xx(fp x, fp z);
-		fp partial2_nu_wrt_xz(fp x, fp z);
-		fp partial2_nu_wrt_zz(fp x, fp z);
-		fp partial2_phi_wrt_xx(fp x, fp y);
-		fp partial2_phi_wrt_xy(fp x, fp y);
-		fp partial2_phi_wrt_yy(fp x, fp y);
-
-		// usual polar spherical (r,theta,phi) <--> (x,y,z)
-		void xyz_of_r_theta_phi(fp r, fp theta, fp phi, fp &x, fp &y, fp &z);
-		void r_theta_phi_of_xyz(fp x, fp y, fp z, fp &r, fp &theta, fp &phi);
-		// ... already have r_of_xyz()
-		// ... already have phi_of_xy()
-		fp theta_of_xyz(fp x, fp y, fp z);
-
-		// ((mu,nu,phi)) <--> usual polar spherical (theta,phi)
-		// ... note phi is the same coordinate in both systems
-		void theta_phi_of_mu_nu(fp mu, fp nu, fp &ps_theta, fp &ps_phi);
-		void theta_phi_of_mu_phi(fp mu, fp phi, fp &ps_theta, fp &ps_phi);
-		void theta_phi_of_nu_phi(fp nu, fp phi, fp &ps_theta, fp &ps_phi);
-		void mu_nu_of_theta_phi(fp ps_theta, fp ps_phi, fp &mu, fp &nu);
-		void mu_phi_of_theta_phi(fp ps_theta, fp ps_phi, fp &mu, fp &phi);
-		void nu_phi_of_theta_phi(fp ps_theta, fp ps_phi, fp &nu, fp &phi);
-
-		// ((mu,nu,phi)) --> direction cosines (xcos,ycos,zcos)
-		void xyzcos_of_mu_nu(fp mu, fp nu, fp &xcos, fp &ycos, fp &zcos);
-		void xyzcos_of_mu_phi(fp mu, fp phi, fp &xcos, fp &ycos, fp &zcos);
-		void xyzcos_of_nu_phi(fp nu, fp phi, fp &xcos, fp &ycos, fp &zcos);
-	} // close namespace local_coords::
-
-	//*****************************************************************************
-
-	//
-	// ***** bit masks for coordinates ****
-	//
-
-	//
-	// We need to manipulate coordinates to do calculations like "which
-	// coordinate do these two patches have in common".  We do these by
-	// Boolean operations on integers using the following bit masks:
-	//
-
-	namespace local_coords
-	{
-
-		typedef int coords_set;
-
-		enum
-		{
-			coords_set_mu = 0x1,
-			coords_set_nu = 0x2,
-			coords_set_phi = 0x4,
-
-			coords_set_empty = 0x0,
-			coords_set_all = coords_set_mu | coords_set_nu | coords_set_phi // no comma
-		};
-
-		// human-readable coordinate names for debugging etc
-		const char *name_of_coords_set(coords_set S);
-
-		// set complement of coordinates
-		inline coords_set coords_set_not(coords_set S)
-		{
-			return coords_set_all & ~S;
-		}
-
-	} // close namespace local_coords::
-
-	//******************************************************************************
-
-	//
-	// This class stores the origin point of our local coordinates, and
-	// provides conversions between local and global coordinates.
-	//
-	class global_coords
-	{
-	public:
-		// get global (x,y,z) coordinates of local origin point
-		fp origin_x() const { return origin_x_; }
-		fp origin_y() const { return origin_y_; }
-		fp origin_z() const { return origin_z_; }
-
-		// constructor: specify global (x,y,z) coordinates of local origin point
-		global_coords(fp origin_x_in, fp origin_y_in, fp origin_z_in)
-			: origin_x_(origin_x_in),
-			  origin_y_(origin_y_in),
-			  origin_z_(origin_z_in)
-		{
-		}
-		// destructor: compiler-generated no-op is ok
-
-		void recentering(fp x, fp y, fp z)
-		{
-			origin_x_ = x;
-			origin_y_ = y;
-			origin_z_ = z;
-		}
-
-	private:
-		// we forbid copying and passing by value
-		// by declaring the copy constructor and assignment operator
-		// private, but never defining them
-		global_coords(const global_coords &rhs);
-		global_coords &operator=(const global_coords &rhs);
-
-	private:
-		// global (x,y,z) coordinates of local origin point
-		fp origin_x_, origin_y_, origin_z_;
-	};
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /*  COORDS_H  */
+#ifndef COORDS_H
+#define COORDS_H
+namespace AHFinderDirect
+{
+	namespace local_coords
+	{
+
+		// compare if two angles are fuzzily equal mod 2*pi radians (360 degrees)
+		bool fuzzy_EQ_ang(fp ang1, fp ang2);	// radians
+		bool fuzzy_EQ_dang(fp dang1, fp dang2); // degrees
+
+		// modulo-reduce  {ang,dang}  to be (fuzzily) within the range
+		// [min,max]_{ang,dang}, or error_exit() if no such value exists
+		fp modulo_reduce_ang(fp ang, fp min_ang, fp max_ang);
+		fp modulo_reduce_dang(fp dang, fp min_dang, fp max_dang);
+
+	} // close namespace local_coords::
+
+	namespace local_coords
+	{
+		// (r,(mu,nu,phi)) <--> (x,y,z)
+		void xyz_of_r_mu_nu(fp r, fp mu, fp nu, fp &x, fp &y, fp &z);
+		void xyz_of_r_mu_phi(fp r, fp mu, fp phi, fp &x, fp &y, fp &z);
+		void xyz_of_r_nu_phi(fp r, fp nu, fp phi, fp &x, fp &y, fp &z);
+		fp r_of_xyz(fp x, fp y, fp z);
+		fp mu_of_yz(fp y, fp z);
+		fp nu_of_xz(fp x, fp z);
+		fp phi_of_xy(fp x, fp y);
+
+		// ((mu,nu,phi)) --> the 3rd
+		fp phi_of_mu_nu(fp mu, fp nu);
+		fp nu_of_mu_phi(fp mu, fp phi);
+		fp mu_of_nu_phi(fp nu, fp phi);
+
+		// partial {x,y,z} / partial {mu,nu,phi}
+		void partial_xyz_wrt_r_mu_nu(fp r, fp mu, fp nu,
+									 fp &partial_x_wrt_r, fp &partial_x_wrt_mu, fp &partial_x_wrt_nu,
+									 fp &partial_y_wrt_r, fp &partial_y_wrt_mu, fp &partial_y_wrt_nu,
+									 fp &partial_z_wrt_r, fp &partial_z_wrt_mu, fp &partial_z_wrt_nu);
+		void partial_xyz_wrt_r_mu_phi(fp r, fp mu, fp phi,
+									  fp &partial_x_wrt_r, fp &partial_x_wrt_mu, fp &partial_x_wrt_phi,
+									  fp &partial_y_wrt_r, fp &partial_y_wrt_mu, fp &partial_y_wrt_phi,
+									  fp &partial_z_wrt_r, fp &partial_z_wrt_mu, fp &partial_z_wrt_phi);
+		void partial_xyz_wrt_r_nu_phi(fp r, fp nu, fp phi,
+									  fp &partial_x_wrt_r, fp &partial_x_wrt_nu, fp &partial_x_wrt_phi,
+									  fp &partial_y_wrt_r, fp &partial_y_wrt_nu, fp &partial_y_wrt_phi,
+									  fp &partial_z_wrt_r, fp &partial_z_wrt_nu, fp &partial_z_wrt_phi);
+
+		// partial {mu,nu,phi} / partial {x,y,z}
+		fp partial_mu_wrt_y(fp y, fp z);
+		fp partial_mu_wrt_z(fp y, fp z);
+		fp partial_nu_wrt_x(fp x, fp z);
+		fp partial_nu_wrt_z(fp x, fp z);
+		fp partial_phi_wrt_x(fp x, fp y);
+		fp partial_phi_wrt_y(fp x, fp y);
+
+		// partial^2 {mu,nu,phi} / partial {x,y,z}{x,y,z}
+		fp partial2_mu_wrt_yy(fp y, fp z);
+		fp partial2_mu_wrt_yz(fp y, fp z);
+		fp partial2_mu_wrt_zz(fp y, fp z);
+		fp partial2_nu_wrt_xx(fp x, fp z);
+		fp partial2_nu_wrt_xz(fp x, fp z);
+		fp partial2_nu_wrt_zz(fp x, fp z);
+		fp partial2_phi_wrt_xx(fp x, fp y);
+		fp partial2_phi_wrt_xy(fp x, fp y);
+		fp partial2_phi_wrt_yy(fp x, fp y);
+
+		// usual polar spherical (r,theta,phi) <--> (x,y,z)
+		void xyz_of_r_theta_phi(fp r, fp theta, fp phi, fp &x, fp &y, fp &z);
+		void r_theta_phi_of_xyz(fp x, fp y, fp z, fp &r, fp &theta, fp &phi);
+		// ... already have r_of_xyz()
+		// ... already have phi_of_xy()
+		fp theta_of_xyz(fp x, fp y, fp z);
+
+		// ((mu,nu,phi)) <--> usual polar spherical (theta,phi)
+		// ... note phi is the same coordinate in both systems
+		void theta_phi_of_mu_nu(fp mu, fp nu, fp &ps_theta, fp &ps_phi);
+		void theta_phi_of_mu_phi(fp mu, fp phi, fp &ps_theta, fp &ps_phi);
+		void theta_phi_of_nu_phi(fp nu, fp phi, fp &ps_theta, fp &ps_phi);
+		void mu_nu_of_theta_phi(fp ps_theta, fp ps_phi, fp &mu, fp &nu);
+		void mu_phi_of_theta_phi(fp ps_theta, fp ps_phi, fp &mu, fp &phi);
+		void nu_phi_of_theta_phi(fp ps_theta, fp ps_phi, fp &nu, fp &phi);
+
+		// ((mu,nu,phi)) --> direction cosines (xcos,ycos,zcos)
+		void xyzcos_of_mu_nu(fp mu, fp nu, fp &xcos, fp &ycos, fp &zcos);
+		void xyzcos_of_mu_phi(fp mu, fp phi, fp &xcos, fp &ycos, fp &zcos);
+		void xyzcos_of_nu_phi(fp nu, fp phi, fp &xcos, fp &ycos, fp &zcos);
+	} // close namespace local_coords::
+
+	//*****************************************************************************
+
+	//
+	// ***** bit masks for coordinates ****
+	//
+
+	//
+	// We need to manipulate coordinates to do calculations like "which
+	// coordinate do these two patches have in common".  We do these by
+	// Boolean operations on integers using the following bit masks:
+	//
+
+	namespace local_coords
+	{
+
+		typedef int coords_set;
+
+		enum
+		{
+			coords_set_mu = 0x1,
+			coords_set_nu = 0x2,
+			coords_set_phi = 0x4,
+
+			coords_set_empty = 0x0,
+			coords_set_all = coords_set_mu | coords_set_nu | coords_set_phi // no comma
+		};
+
+		// human-readable coordinate names for debugging etc
+		const char *name_of_coords_set(coords_set S);
+
+		// set complement of coordinates
+		inline coords_set coords_set_not(coords_set S)
+		{
+			return coords_set_all & ~S;
+		}
+
+	} // close namespace local_coords::
+
+	//******************************************************************************
+
+	//
+	// This class stores the origin point of our local coordinates, and
+	// provides conversions between local and global coordinates.
+	//
+	class global_coords
+	{
+	public:
+		// get global (x,y,z) coordinates of local origin point
+		fp origin_x() const { return origin_x_; }
+		fp origin_y() const { return origin_y_; }
+		fp origin_z() const { return origin_z_; }
+
+		// constructor: specify global (x,y,z) coordinates of local origin point
+		global_coords(fp origin_x_in, fp origin_y_in, fp origin_z_in)
+			: origin_x_(origin_x_in),
+			  origin_y_(origin_y_in),
+			  origin_z_(origin_z_in)
+		{
+		}
+		// destructor: compiler-generated no-op is ok
+
+		void recentering(fp x, fp y, fp z)
+		{
+			origin_x_ = x;
+			origin_y_ = y;
+			origin_z_ = z;
+		}
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		global_coords(const global_coords &rhs);
+		global_coords &operator=(const global_coords &rhs);
+
+	private:
+		// global (x,y,z) coordinates of local origin point
+		fp origin_x_, origin_y_, origin_z_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /*  COORDS_H  */
--- a/AMSS_NCKU_source/AHF_Direct/cpm_map.C
+++ b/AMSS_NCKU_source/AHF_Direct/cpm_map.C
@@ -1,93 +1,93 @@
-#include <assert.h>
-#include <stdio.h>
-
-#include "stdc.h"
-#include "util.h"
-#include "cpm_map.h"
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-
-		template <typename fp_t>
-		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
-							   fp_t fixed_point)
-			: min_i_(min_i_in), max_i_(max_i_in),
-			  map_is_plus_(false)
-		{
-			const fp_t d_offset = 2.0 * fixed_point;
-			if (!fuzzy<fp_t>::is_integer(d_offset))
-				then error_exit(ERROR_EXIT,
-								"***** cpm_map::cpm_map (mirror):\n"
-								"        fixed_point=%g isn't (fuzzily) integral or half-integral!\n",
-								double(fixed_point)); /*NOTREACHED*/
-
-			offset_ = round<fp_t>::to_integer(d_offset);
-
-			assert(
-				map_unchecked(fuzzy<fp_t>::floor(fixed_point)) ==
-				fuzzy<fp_t>::ceiling(fixed_point));
-		}
-
-		//******************************************************************************
-
-		//
-		// This function constructs a generic  cpm_map  object, with the mapping
-		// specified by a sample point  sample_i --> sample_j  and by sign.
-		// The sample point need not be in the map's domain/range.
-		//
-		template <typename fp_t>
-		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
-							   int sample_i, int sample_j,
-							   bool map_is_plus_in)
-			: min_i_(min_i_in), max_i_(max_i_in),
-			  offset_(map_is_plus_in ? sample_j - sample_i
-									 : sample_j + sample_i),
-			  map_is_plus_(map_is_plus_in)
-		{
-			assert(map_unchecked(sample_i) == sample_j);
-		}
-
-		//******************************************************************************
-
-		//
-		// This function constructs a generic  cpm_map  object, with the mapping
-		// specified by a *fp* sample point  sample_i --> sample_j  (which
-		// must specify an  integer --> integer  mapping, i.e.  4.2 --> 4.2  is
-		// ok for a + map, and 4.5 --> 4.5 is ok for a minus map, but  4.2 --> 4.7
-		// is never ok) and by sign.  The sample point need not be in the map's
-		// domain/range.
-		//
-		template <typename fp_t>
-		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
-							   fp_t sample_i, fp_t sample_j,
-							   bool map_is_plus_in)
-			: min_i_(min_i_in), max_i_(max_i_in),
-			  map_is_plus_(map_is_plus_in)
-		{
-			const fp_t fp_offset = map_is_plus_in ? sample_j - sample_i
-												  : sample_j + sample_i;
-			if (!fuzzy<fp_t>::is_integer(fp_offset))
-				then error_exit(ERROR_EXIT,
-								"***** cpm_map::cpm_map (generic via fp sample point):\n"
-								"        fp_offset=%g isn't fuzzily integral!\n"
-								"        ==> sample_i=%g --> sample_j=%g\n"
-								"            doesn't fuzzily specify an  integer --> integer  mapping!\n",
-								double(fp_offset),
-								double(sample_i), double(sample_j)); /*NOTREACHED*/
-
-			offset_ = round<fp_t>::to_integer(fp_offset);
-
-			// verify that we have setup correct
-			assert(
-				map_unchecked(fuzzy<fp_t>::floor(sample_i)) ==
-				(map_is_plus_in ? fuzzy<fp_t>::floor(sample_j)
-								: fuzzy<fp_t>::ceiling(sample_j)));
-		}
-
-		template class cpm_map<float>;
-		template class cpm_map<double>;
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
+#include <assert.h>
+#include <stdio.h>
+
+#include "stdc.h"
+#include "util.h"
+#include "cpm_map.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename fp_t>
+		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
+							   fp_t fixed_point)
+			: min_i_(min_i_in), max_i_(max_i_in),
+			  map_is_plus_(false)
+		{
+			const fp_t d_offset = 2.0 * fixed_point;
+			if (!fuzzy<fp_t>::is_integer(d_offset))
+				then error_exit(ERROR_EXIT,
+								"***** cpm_map::cpm_map (mirror):\n"
+								"        fixed_point=%g isn't (fuzzily) integral or half-integral!\n",
+								double(fixed_point)); /*NOTREACHED*/
+
+			offset_ = round<fp_t>::to_integer(d_offset);
+
+			assert(
+				map_unchecked(fuzzy<fp_t>::floor(fixed_point)) ==
+				fuzzy<fp_t>::ceiling(fixed_point));
+		}
+
+		//******************************************************************************
+
+		//
+		// This function constructs a generic  cpm_map  object, with the mapping
+		// specified by a sample point  sample_i --> sample_j  and by sign.
+		// The sample point need not be in the map's domain/range.
+		//
+		template <typename fp_t>
+		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
+							   int sample_i, int sample_j,
+							   bool map_is_plus_in)
+			: min_i_(min_i_in), max_i_(max_i_in),
+			  offset_(map_is_plus_in ? sample_j - sample_i
+									 : sample_j + sample_i),
+			  map_is_plus_(map_is_plus_in)
+		{
+			assert(map_unchecked(sample_i) == sample_j);
+		}
+
+		//******************************************************************************
+
+		//
+		// This function constructs a generic  cpm_map  object, with the mapping
+		// specified by a *fp* sample point  sample_i --> sample_j  (which
+		// must specify an  integer --> integer  mapping, i.e.  4.2 --> 4.2  is
+		// ok for a + map, and 4.5 --> 4.5 is ok for a minus map, but  4.2 --> 4.7
+		// is never ok) and by sign.  The sample point need not be in the map's
+		// domain/range.
+		//
+		template <typename fp_t>
+		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
+							   fp_t sample_i, fp_t sample_j,
+							   bool map_is_plus_in)
+			: min_i_(min_i_in), max_i_(max_i_in),
+			  map_is_plus_(map_is_plus_in)
+		{
+			const fp_t fp_offset = map_is_plus_in ? sample_j - sample_i
+												  : sample_j + sample_i;
+			if (!fuzzy<fp_t>::is_integer(fp_offset))
+				then error_exit(ERROR_EXIT,
+								"***** cpm_map::cpm_map (generic via fp sample point):\n"
+								"        fp_offset=%g isn't fuzzily integral!\n"
+								"        ==> sample_i=%g --> sample_j=%g\n"
+								"            doesn't fuzzily specify an  integer --> integer  mapping!\n",
+								double(fp_offset),
+								double(sample_i), double(sample_j)); /*NOTREACHED*/
+
+			offset_ = round<fp_t>::to_integer(fp_offset);
+
+			// verify that we have setup correct
+			assert(
+				map_unchecked(fuzzy<fp_t>::floor(sample_i)) ==
+				(map_is_plus_in ? fuzzy<fp_t>::floor(sample_j)
+								: fuzzy<fp_t>::ceiling(sample_j)));
+		}
+
+		template class cpm_map<float>;
+		template class cpm_map<double>;
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/cpm_map.h
+++ b/AMSS_NCKU_source/AHF_Direct/cpm_map.h
@@ -1,120 +1,120 @@
-#ifndef AHFINDERDIRECT__CPM_MAP_HH
-#define AHFINDERDIRECT__CPM_MAP_HH
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-
-		template <typename fp_t>
-		class cpm_map
-		{
-		public:
-			// bounds info -- domain
-			int min_i() const { return min_i_; }
-			int max_i() const { return max_i_; }
-			int N_points() const
-			{
-				return jtutil::how_many_in_range(min_i_, max_i_);
-			}
-			bool in_domain(int i) const { return (i >= min_i_) && (i <= max_i_); }
-
-			// is the mapping + or - ?
-			bool is_plus() const { return map_is_plus_; }
-			bool is_minus() const { return !map_is_plus_; }
-			int sign() const { return map_is_plus_ ? +1 : -1; }
-			fp_t fp_sign() const { return map_is_plus_ ? +1.0 : -1.0; }
-
-			// the mapping itself
-			int map_unchecked(int i) const
-			{
-				return map_is_plus_ ? offset_ + i
-									: offset_ - i;
-			}
-			int inv_map_unchecked(int j) const
-			{
-				return map_is_plus_ ? j - offset_
-									: offset_ - j;
-			}
-			int map(int i) const
-			{
-				assert(in_domain(i));
-				return map_unchecked(i);
-			}
-			int inv_map(int j) const
-			{
-				int i = inv_map_unchecked(j);
-				assert(in_domain(i));
-				return i;
-			}
-
-			// bounds info -- range
-			// ... we use the unchecked map here in case the domain is empty
-			int min_j() const
-			{
-				return map_is_plus_ ? map_unchecked(min_i_)
-									: map_unchecked(max_i_);
-			}
-			int max_j() const
-			{
-				return map_is_plus_ ? map_unchecked(max_i_)
-									: map_unchecked(min_i_);
-			}
-			bool in_range(int j) const { return in_domain(inv_map_unchecked(j)); }
-
-			//
-			// constructors
-			//
-
-			// "mirror" map: i --> const - i
-			// ... map specified by fixed point (must be integer or half-integer)
-			// ... fixed point need not be in domain/range
-			cpm_map(int min_i_in, int max_i_in,
-					fp_t fixed_point);
-
-			// "shift" map: i --> const + i
-			// ... map specified by shift amount
-			// ... default is identity map
-			cpm_map(int min_i_in, int max_i_in,
-					int shift_amount = 0)
-				: min_i_(min_i_in), max_i_(max_i_in),
-				  offset_(shift_amount), map_is_plus_(true)
-			{
-			}
-
-			// generic map: i --> const +/- i
-			// ... map specified by sample point sample_i --> sample_j
-			//     and by sign (one of  {plus,minus}_map )
-			// ... sample point need not be in domain/range
-			cpm_map(int min_i_in, int max_i_in,
-					int sample_i, int sample_j,
-					bool map_is_plus_in);
-
-			// generic map: i --> const +/- i
-			// ... map specified by *fp* sample point sample_i --> sample_j
-			//     (must specify an integer --> integer mapping)
-			//     and by sign (one of  {plus,minus}_map )
-			// ... hence if sign is -1, then sample_i and sample_j
-			//     must both be half-integral
-			// ... sample point need *not* be in domain/range
-			cpm_map(int min_i_in, int max_i_in,
-					fp_t sample_i, fp_t sample_j,
-					bool map_is_plus_in);
-
-			// no need for explicit destructor, compiler-generated no-op is ok
-			// ditto for copy constructor and assignment operator
-
-		private:
-			// bounds (inclusive)
-			int min_i_, max_i_;
-
-			// these define the actual mapping
-			int offset_;
-			bool map_is_plus_;
-		};
-
-		//******************************************************************************
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
-
-#endif /* AHFINDERDIRECT__CPM_MAP_HH */
+#ifndef AHFINDERDIRECT__CPM_MAP_HH
+#define AHFINDERDIRECT__CPM_MAP_HH
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename fp_t>
+		class cpm_map
+		{
+		public:
+			// bounds info -- domain
+			int min_i() const { return min_i_; }
+			int max_i() const { return max_i_; }
+			int N_points() const
+			{
+				return jtutil::how_many_in_range(min_i_, max_i_);
+			}
+			bool in_domain(int i) const { return (i >= min_i_) && (i <= max_i_); }
+
+			// is the mapping + or - ?
+			bool is_plus() const { return map_is_plus_; }
+			bool is_minus() const { return !map_is_plus_; }
+			int sign() const { return map_is_plus_ ? +1 : -1; }
+			fp_t fp_sign() const { return map_is_plus_ ? +1.0 : -1.0; }
+
+			// the mapping itself
+			int map_unchecked(int i) const
+			{
+				return map_is_plus_ ? offset_ + i
+									: offset_ - i;
+			}
+			int inv_map_unchecked(int j) const
+			{
+				return map_is_plus_ ? j - offset_
+									: offset_ - j;
+			}
+			int map(int i) const
+			{
+				assert(in_domain(i));
+				return map_unchecked(i);
+			}
+			int inv_map(int j) const
+			{
+				int i = inv_map_unchecked(j);
+				assert(in_domain(i));
+				return i;
+			}
+
+			// bounds info -- range
+			// ... we use the unchecked map here in case the domain is empty
+			int min_j() const
+			{
+				return map_is_plus_ ? map_unchecked(min_i_)
+									: map_unchecked(max_i_);
+			}
+			int max_j() const
+			{
+				return map_is_plus_ ? map_unchecked(max_i_)
+									: map_unchecked(min_i_);
+			}
+			bool in_range(int j) const { return in_domain(inv_map_unchecked(j)); }
+
+			//
+			// constructors
+			//
+
+			// "mirror" map: i --> const - i
+			// ... map specified by fixed point (must be integer or half-integer)
+			// ... fixed point need not be in domain/range
+			cpm_map(int min_i_in, int max_i_in,
+					fp_t fixed_point);
+
+			// "shift" map: i --> const + i
+			// ... map specified by shift amount
+			// ... default is identity map
+			cpm_map(int min_i_in, int max_i_in,
+					int shift_amount = 0)
+				: min_i_(min_i_in), max_i_(max_i_in),
+				  offset_(shift_amount), map_is_plus_(true)
+			{
+			}
+
+			// generic map: i --> const +/- i
+			// ... map specified by sample point sample_i --> sample_j
+			//     and by sign (one of  {plus,minus}_map )
+			// ... sample point need not be in domain/range
+			cpm_map(int min_i_in, int max_i_in,
+					int sample_i, int sample_j,
+					bool map_is_plus_in);
+
+			// generic map: i --> const +/- i
+			// ... map specified by *fp* sample point sample_i --> sample_j
+			//     (must specify an integer --> integer mapping)
+			//     and by sign (one of  {plus,minus}_map )
+			// ... hence if sign is -1, then sample_i and sample_j
+			//     must both be half-integral
+			// ... sample point need *not* be in domain/range
+			cpm_map(int min_i_in, int max_i_in,
+					fp_t sample_i, fp_t sample_j,
+					bool map_is_plus_in);
+
+			// no need for explicit destructor, compiler-generated no-op is ok
+			// ditto for copy constructor and assignment operator
+
+		private:
+			// bounds (inclusive)
+			int min_i_, max_i_;
+
+			// these define the actual mapping
+			int offset_;
+			bool map_is_plus_;
+		};
+
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
+
+#endif /* AHFINDERDIRECT__CPM_MAP_HH */
--- a/AMSS_NCKU_source/AHF_Direct/driver.h
+++ b/AMSS_NCKU_source/AHF_Direct/driver.h
@@ -1,108 +1,108 @@
-#ifndef DRIVER_H
-#define DRIVER_H
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-#include "util_Table.h"
-#include "cctk.h"
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "cpm_map.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "fd_grid.h"
-#include "patch.h"
-#include "patch_edge.h"
-#include "patch_interp.h"
-#include "ghost_zone.h"
-#include "patch_system.h"
-
-#include "Jacobian.h"
-
-#include "gfns.h"
-#include "gr.h"
-
-#include "horizon_sequence.h"
-#include "BH_diagnostics.h"
-
-namespace AHFinderDirect
-{
-	struct iteration_status_buffers
-	{
-		int *hn_buffer;
-		int *iteration_buffer;
-		enum expansion_status *expansion_status_buffer;
-		fp *mean_horizon_radius_buffer;
-		fp *Theta_infinity_norm_buffer;
-		bool *found_horizon_buffer;
-
-		jtutil::array2d<CCTK_REAL> *send_buffer_ptr;
-		jtutil::array2d<CCTK_REAL> *receive_buffer_ptr;
-
-		iteration_status_buffers()
-			: hn_buffer(NULL), iteration_buffer(NULL),
-			  expansion_status_buffer(NULL),
-			  mean_horizon_radius_buffer(NULL),
-			  Theta_infinity_norm_buffer(NULL),
-			  found_horizon_buffer(NULL),
-			  send_buffer_ptr(NULL), receive_buffer_ptr(NULL)
-		{
-		}
-	};
-
-	//
-	// This struct holds interprocessor-communication buffers for broadcasting
-	// the BH diagnostics and horizon shape from the processor which finds a
-	// given horizon, to all processors.
-	//
-	struct horizon_buffers
-	{
-		int N_buffer;
-		double *send_buffer;
-		double *receive_buffer;
-
-		horizon_buffers()
-			: N_buffer(0),
-			  send_buffer(NULL),
-			  receive_buffer(NULL)
-		{
-		}
-	};
-	//
-	struct AH_data
-	{
-		patch_system *ps_ptr;
-		Jacobian *Jac_ptr;
-		double surface_expansion;
-
-		bool initial_find_flag;
-		bool recentering_flag, stop_finding, find_trigger;
-
-		bool found_flag; // did we find this horizon (successfully)
-
-		struct BH_diagnostics BH_diagnostics;
-		FILE *BH_diagnostics_fileptr;
-
-		// interprocessor-communication buffers
-		// for this horizon's BH diagnostics and (optionally) horizon shape
-		struct horizon_buffers horizon_buffers;
-	};
-
-	// initial_guess.cc
-	void setup_initial_guess(patch_system &ps,
-							 fp x_center, fp y_center, fp z_center,
-							 fp x_radius, fp y_radius, fp z_radius);
-
-	// Newton.cc
-	void Newton(int N_procs, int N_active_procs, int my_proc,
-				horizon_sequence &hs, struct AH_data *const AH_data_array[],
-				struct iteration_status_buffers &isb, int *dumpid, double *);
-
-} // namespace AHFinderDirect
-#endif /*     DRIVER_H    */
+#ifndef DRIVER_H
+#define DRIVER_H
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+
+namespace AHFinderDirect
+{
+	struct iteration_status_buffers
+	{
+		int *hn_buffer;
+		int *iteration_buffer;
+		enum expansion_status *expansion_status_buffer;
+		fp *mean_horizon_radius_buffer;
+		fp *Theta_infinity_norm_buffer;
+		bool *found_horizon_buffer;
+
+		jtutil::array2d<CCTK_REAL> *send_buffer_ptr;
+		jtutil::array2d<CCTK_REAL> *receive_buffer_ptr;
+
+		iteration_status_buffers()
+			: hn_buffer(NULL), iteration_buffer(NULL),
+			  expansion_status_buffer(NULL),
+			  mean_horizon_radius_buffer(NULL),
+			  Theta_infinity_norm_buffer(NULL),
+			  found_horizon_buffer(NULL),
+			  send_buffer_ptr(NULL), receive_buffer_ptr(NULL)
+		{
+		}
+	};
+
+	//
+	// This struct holds interprocessor-communication buffers for broadcasting
+	// the BH diagnostics and horizon shape from the processor which finds a
+	// given horizon, to all processors.
+	//
+	struct horizon_buffers
+	{
+		int N_buffer;
+		double *send_buffer;
+		double *receive_buffer;
+
+		horizon_buffers()
+			: N_buffer(0),
+			  send_buffer(NULL),
+			  receive_buffer(NULL)
+		{
+		}
+	};
+	//
+	struct AH_data
+	{
+		patch_system *ps_ptr;
+		Jacobian *Jac_ptr;
+		double surface_expansion;
+
+		bool initial_find_flag;
+		bool recentering_flag, stop_finding, find_trigger;
+
+		bool found_flag; // did we find this horizon (successfully)
+
+		struct BH_diagnostics BH_diagnostics;
+		FILE *BH_diagnostics_fileptr;
+
+		// interprocessor-communication buffers
+		// for this horizon's BH diagnostics and (optionally) horizon shape
+		struct horizon_buffers horizon_buffers;
+	};
+
+	// initial_guess.cc
+	void setup_initial_guess(patch_system &ps,
+							 fp x_center, fp y_center, fp z_center,
+							 fp x_radius, fp y_radius, fp z_radius);
+
+	// Newton.cc
+	void Newton(int N_procs, int N_active_procs, int my_proc,
+				horizon_sequence &hs, struct AH_data *const AH_data_array[],
+				struct iteration_status_buffers &isb, int *dumpid, double *);
+
+} // namespace AHFinderDirect
+#endif /*     DRIVER_H    */
--- a/AMSS_NCKU_source/AHF_Direct/error_exit.C
+++ b/AMSS_NCKU_source/AHF_Direct/error_exit.C
@@ -1,38 +1,38 @@
-#include <stdio.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-		int error_exit(int msg_level, const char *format, ...)
-		{
-			const int N_buffer = 2000;
-			char buffer[N_buffer];
-
-			va_list ap;
-			va_start(ap, format);
-			vsnprintf(buffer, N_buffer, format, ap);
-			va_end(ap);
-
-			const int len = strlen(buffer);
-			if ((len > 0) && (buffer[len - 1] == '\n'))
-				then buffer[len - 1] = '\0';
-
-			CCTK_VWarn(msg_level, __LINE__, __FILE__, CCTK_THORNSTRING, "%s", buffer);
-
-			// if we got here, evidently  msg_level  wasn't drastic enough
-			abort(); /*NOTREACHED*/
-		}
-
-		//******************************************************************************
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+		int error_exit(int msg_level, const char *format, ...)
+		{
+			const int N_buffer = 2000;
+			char buffer[N_buffer];
+
+			va_list ap;
+			va_start(ap, format);
+			vsnprintf(buffer, N_buffer, format, ap);
+			va_end(ap);
+
+			const int len = strlen(buffer);
+			if ((len > 0) && (buffer[len - 1] == '\n'))
+				then buffer[len - 1] = '\0';
+
+			CCTK_VWarn(msg_level, __LINE__, __FILE__, CCTK_THORNSTRING, "%s", buffer);
+
+			// if we got here, evidently  msg_level  wasn't drastic enough
+			abort(); /*NOTREACHED*/
+		}
+
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/expansion.C
+++ b/AMSS_NCKU_source/AHF_Direct/expansion.C
--- a/AMSS_NCKU_source/AHF_Direct/expansion_Jacobian.C
+++ b/AMSS_NCKU_source/AHF_Direct/expansion_Jacobian.C
@@ -1,386 +1,386 @@
-
-
-#include "macrodef.h"
-#ifdef With_AHF
-
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-
-#include "util_Table.h"
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "cpm_map.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "fd_grid.h"
-#include "patch.h"
-#include "patch_edge.h"
-#include "patch_interp.h"
-#include "ghost_zone.h"
-#include "patch_system.h"
-
-#include "Jacobian.h"
-
-#include "gfns.h"
-#include "gr.h"
-
-namespace AHFinderDirect
-{
-	using jtutil::error_exit;
-
-	namespace
-	{
-
-		void expansion_Jacobian_partial_SD(patch_system &ps, Jacobian &Jac,
-										   bool print_msg_flag);
-
-		void add_ghost_zone_Jacobian(const patch_system &ps,
-									 Jacobian &Jac,
-									 fp mol,
-									 const patch &xp, const ghost_zone &xmgz,
-									 int x_II,
-									 int xm_irho, int xm_isigma);
-
-		enum expansion_status
-		expansion_Jacobian_dr_FD(patch_system *ps_ptr, Jacobian *Jac_ptr, fp add_to_expansion,
-								 bool initial_flag,
-								 bool print_msg_flag);
-	}
-
-	//******************************************************************************
-
-	//
-	// If ps_ptr != NULL and Jac_ptr != NULL, this function computes the
-	// Jacobian matrix J[Theta(h)] of the expansion Theta(h).  We assume
-	// that Theta(h) has already been computed.
-	//
-	// If ps_ptr == NULL and Jac_ptr == NULL, this function does a dummy
-	// computation, in which only any expansion() (and hence geometry
-	// interpolator) calls are done, these with the number of interpolation
-	// points set to 0 and all the output array pointers set to NULL.
-	//
-	// It's illegal for one but not both of ps_ptr and Jac_ptr to be NULL.
-	//
-	// Arguments:
-	// ps_ptr --> The patch system, or == NULL to do (only) a dummy computation.
-	// Jac_ptr --> The Jacobian, or == NULL to do (only) a dummy computation.
-	// add_to_expansion = A real number to add to the expansion.
-	//
-	// Results:
-	// This function returns a status code indicating whether the computation
-	// succeeded or failed, and if the latter, what caused the failure.
-	//
-	enum expansion_status
-	expansion_Jacobian(patch_system *ps_ptr, Jacobian *Jac_ptr,
-					   fp add_to_expansion,
-					   bool initial_flag,
-					   bool print_msg_flag /* = false */)
-	{
-		const bool active_flag = (ps_ptr != NULL) && (Jac_ptr != NULL);
-		enum expansion_status status;
-
-		if (active_flag)
-			then expansion_Jacobian_partial_SD(*ps_ptr, *Jac_ptr,
-											   print_msg_flag);
-		// this function looks at ps_ptr and Jac_ptr (non-NULL vs NULL)
-		// to choose a normal vs dummy computation
-		{
-			status = expansion_Jacobian_dr_FD(ps_ptr, Jac_ptr, add_to_expansion,
-											  initial_flag,
-											  print_msg_flag);
-			if (status != expansion_success)
-				then return status; // *** ERROR RETURN ***
-		}
-
-		return expansion_success; // *** NORMAL RETURN ***
-	}
-	//
-	// This function computes the partial derivative terms in the Jacobian
-	// matrix of the expansion Theta(h), by symbolic differentiation from
-	// the Jacobian coefficient (angular) gridfns.  The Jacobian is traversed
-	// by rows, using equation (25) of my 1996 apparent horizon finding paper.
-	//
-	// Inputs (angular gridfns, on ghosted grid):
-	//	h			# shape of trial surface
-	//	Theta			# Theta(h) assumed to already be computed
-	//	partial_Theta_wrt_partial_d_h	# Jacobian coefficients
-	//	partial_Theta_wrt_partial_dd_h	# (also assumed to already be computed)
-	//
-	// Outputs:
-	//	The Jacobian matrix is stored in the Jacobian object Jac.
-	//
-	namespace
-	{
-		void expansion_Jacobian_partial_SD(patch_system &ps, Jacobian &Jac,
-										   bool print_msg_flag)
-		{
-			Jac.zero_matrix();
-			ps.compute_synchronize_Jacobian();
-
-			for (int xpn = 0; xpn < ps.N_patches(); ++xpn)
-			{
-				patch &xp = ps.ith_patch(xpn);
-
-				for (int x_irho = xp.min_irho(); x_irho <= xp.max_irho(); ++x_irho)
-				{
-					for (int x_isigma = xp.min_isigma(); x_isigma <= xp.max_isigma(); ++x_isigma)
-					{
-						//
-						// compute the main Jacobian terms for this grid point, i.e.
-						//	partial Theta(this point x, Jacobian row II)
-						//	---------------------------------------------
-						//	partial h(other points y, Jacobian column JJ)
-						//
-
-						// Jacobian row index
-						const int II = ps.gpn_of_patch_irho_isigma(xp, x_irho, x_isigma);
-
-						// Jacobian coefficients for this point
-						const fp Jacobian_coeff_rho = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_d_h_1,
-																x_irho, x_isigma);
-						const fp Jacobian_coeff_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_d_h_2,
-																  x_irho, x_isigma);
-						const fp Jacobian_coeff_rho_rho = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_11,
-																	x_irho, x_isigma);
-						const fp Jacobian_coeff_rho_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_12,
-																	  x_irho, x_isigma);
-						const fp Jacobian_coeff_sigma_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_22,
-																		x_irho, x_isigma);
-
-						// partial_rho, partial_rho_rho
-						{
-							for (int m_irho = xp.molecule_min_m();
-								 m_irho <= xp.molecule_max_m();
-								 ++m_irho)
-							{
-								const int xm_irho = x_irho + m_irho;
-								const fp Jac_rho = Jacobian_coeff_rho * xp.partial_rho_coeff(m_irho);
-								const fp Jac_rho_rho = Jacobian_coeff_rho_rho * xp.partial_rho_rho_coeff(m_irho);
-								const fp Jac_sum = Jac_rho + Jac_rho_rho;
-								if (xp.is_in_nominal_grid(xm_irho, x_isigma))
-									then
-									{
-										const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, xm_irho, x_isigma);
-										Jac.sum_into_element(II, xm_JJ, Jac_sum);
-									}
-								else
-									add_ghost_zone_Jacobian(ps, Jac,
-															Jac_sum,
-															xp, xp.minmax_rho_ghost_zone(m_irho < 0),
-															II, xm_irho, x_isigma);
-							}
-						}
-
-						// partial_sigma, partial_sigma_sigma
-						{
-							for (int m_isigma = xp.molecule_min_m();
-								 m_isigma <= xp.molecule_max_m();
-								 ++m_isigma)
-							{
-								const int xm_isigma = x_isigma + m_isigma;
-								const fp Jac_sigma = Jacobian_coeff_sigma * xp.partial_sigma_coeff(m_isigma);
-								const fp Jac_sigma_sigma = Jacobian_coeff_sigma_sigma * xp.partial_sigma_sigma_coeff(m_isigma);
-								const fp Jac_sum = Jac_sigma + Jac_sigma_sigma;
-								if (xp.is_in_nominal_grid(x_irho, xm_isigma))
-									then
-									{
-										const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, x_irho, xm_isigma);
-										Jac.sum_into_element(II, xm_JJ, Jac_sum);
-									}
-								else
-									add_ghost_zone_Jacobian(ps, Jac,
-															Jac_sum,
-															xp, xp.minmax_sigma_ghost_zone(m_isigma < 0),
-															II, x_irho, xm_isigma);
-							}
-						}
-
-						// partial_rho_sigma
-						{
-							for (int m_irho = xp.molecule_min_m();
-								 m_irho <= xp.molecule_max_m();
-								 ++m_irho)
-							{
-								for (int m_isigma = xp.molecule_min_m();
-									 m_isigma <= xp.molecule_max_m();
-									 ++m_isigma)
-								{
-									const int xm_irho = x_irho + m_irho;
-									const int xm_isigma = x_isigma + m_isigma;
-									const fp Jac_rho_sigma = Jacobian_coeff_rho_sigma * xp.partial_rho_sigma_coeff(m_irho, m_isigma);
-									if (xp.is_in_nominal_grid(xm_irho, xm_isigma))
-										then
-										{
-											const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, xm_irho, xm_isigma);
-											Jac.sum_into_element(II, xm_JJ, Jac_rho_sigma);
-										}
-									else
-									{
-										const ghost_zone &xmgz = xp.corner_ghost_zone_containing_point(m_irho < 0, m_isigma < 0,
-																									   xm_irho, xm_isigma);
-										add_ghost_zone_Jacobian(ps, Jac,
-																Jac_rho_sigma,
-																xp, xmgz,
-																II, xm_irho, xm_isigma);
-									}
-								}
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-
-	//******************************************************************************
-
-	//
-	// This function adds the ghost-zone Jacobian dependency contributions
-	// for a single ghost-zone point, to a Jacobian matrix.
-	//
-	// Arguments:
-	// ps = The patch system.
-	// Jac = (out) The Jacobian matrix.
-	// mol = The molecule coefficient.
-	// xp = The patch containing the center point of the molecule.
-	// xmgz = If the x+m point is in a ghost zone, this must be that ghost zone.
-	//	  If the x+m point is not in a ghost zone, this argument is ignored.
-	// x_II = The Jacobian row of the x point.
-	// xm_(irho,isigma) = The coordinates (in xp) of the x+m point of the molecule.
-
-	namespace
-	{
-		void add_ghost_zone_Jacobian(const patch_system &ps,
-									 Jacobian &Jac,
-									 fp mol,
-									 const patch &xp, const ghost_zone &xmgz,
-									 int x_II,
-									 int xm_irho, int xm_isigma)
-		{
-			const patch_edge &xme = xmgz.my_edge();
-			const int xm_iperp = xme.iperp_of_irho_isigma(xm_irho, xm_isigma);
-			const int xm_ipar = xme.ipar_of_irho_isigma(xm_irho, xm_isigma);
-
-			// FIXME: this won't change from one call to another
-			//        ==> it would be more efficient to reuse the same buffer
-			//            across multiple calls on this function
-			int global_min_ym, global_max_ym;
-			ps.synchronize_Jacobian_global_minmax_ym(global_min_ym, global_max_ym);
-			jtutil::array1d<fp> Jacobian_buffer(global_min_ym, global_max_ym);
-
-			// on what other points y does this molecule point xm depend
-			// via the patch_system::synchronize() operation?
-			int y_iperp;
-			int y_posn, min_ym, max_ym;
-			const patch_edge &ye = ps.synchronize_Jacobian(xmgz,
-														   xm_iperp, xm_ipar,
-														   y_iperp,
-														   y_posn, min_ym, max_ym,
-														   Jacobian_buffer);
-			patch &yp = ye.my_patch();
-
-			// add the Jacobian contributions from the ym points
-			for (int ym = min_ym; ym <= max_ym; ++ym)
-			{
-				const int y_ipar = y_posn + ym;
-				const int y_irho = ye.irho_of_iperp_ipar(y_iperp, y_ipar);
-				const int y_isigma = ye.isigma_of_iperp_ipar(y_iperp, y_ipar);
-				const int y_JJ = Jac.II_of_patch_irho_isigma(yp, y_irho, y_isigma);
-				Jac.sum_into_element(x_II, y_JJ, mol * Jacobian_buffer(ym));
-			}
-		}
-	}
-
-	//******************************************************************************
-
-	//
-	// If ps_ptr != NULL and Jac_ptr != NULL, this function sums the d/dr
-	// terms into the Jacobian matrix of the expansion Theta(h), computing
-	// those terms by finite differencing.
-	//
-	// If ps_ptr == NULL and Jac_ptr == NULL, this function does a dummy
-	// computation, in which only any expansion() (and hence geometry
-	// interpolator) calls are done, these with the number of interpolation
-	// points set to 0 and all the output array pointers set to NULL.
-	//
-	// It's illegal for one but not both of ps_ptr and Jac_ptr to be NULL.
-	//
-	// The basic algorithm is that
-	//	Jac += diag[ (Theta(h+epsilon) - Theta(h)) / epsilon ]
-	//
-	// Inputs (angular gridfns, on ghosted grid):
-	//	h			# shape of trial surface
-	//	Theta			# Theta(h) assumed to already be computed
-	//
-	// Outputs:
-	//	Jac += d/dr terms
-	//
-	// Results:
-	// This function returns a status code indicating whether the computation
-	// succeeded or failed, and if the latter, what caused the failure.
-	//
-	namespace
-	{
-		enum expansion_status
-		expansion_Jacobian_dr_FD(patch_system *ps_ptr, Jacobian *Jac_ptr, fp add_to_expansion,
-								 bool initial_flag,
-								 bool print_msg_flag)
-		{
-			const bool active_flag = (ps_ptr != NULL) && (Jac_ptr != NULL);
-
-			const double epsilon = 1e-6;
-			// compute Theta(h+epsilon)
-			if (active_flag)
-				then
-				{
-					ps_ptr->gridfn_copy(gfns::gfn__Theta, gfns::gfn__save_Theta);
-					ps_ptr->add_to_ghosted_gridfn(epsilon, gfns::gfn__h);
-				}
-			const enum expansion_status status = expansion(ps_ptr, add_to_expansion,
-														   initial_flag);
-			if (status != expansion_success)
-				then return status; // *** ERROR RETURN ***
-
-			if (active_flag)
-				then
-				{
-					for (int pn = 0; pn < ps_ptr->N_patches(); ++pn)
-					{
-						patch &p = ps_ptr->ith_patch(pn);
-						for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
-						{
-							for (int isigma = p.min_isigma();
-								 isigma <= p.max_isigma();
-								 ++isigma)
-							{
-								const int II = ps_ptr->gpn_of_patch_irho_isigma(p, irho, isigma);
-								const fp old_Theta = p.gridfn(gfns::gfn__save_Theta,
-															  irho, isigma);
-								const fp new_Theta = p.gridfn(gfns::gfn__Theta,
-															  irho, isigma);
-								const fp d_dr_term = (new_Theta - old_Theta) / epsilon;
-								Jac_ptr->sum_into_element(II, II, d_dr_term);
-							}
-						}
-					}
-
-					// restore h and Theta
-					ps_ptr->add_to_ghosted_gridfn(-epsilon, gfns::gfn__h);
-					ps_ptr->gridfn_copy(gfns::gfn__save_Theta, gfns::gfn__Theta);
-				}
-
-			return expansion_success; // *** NORMAL RETURN ***
-		}
-	}
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif
+
+
+#include "macrodef.h"
+#ifdef With_AHF
+
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+namespace AHFinderDirect
+{
+	using jtutil::error_exit;
+
+	namespace
+	{
+
+		void expansion_Jacobian_partial_SD(patch_system &ps, Jacobian &Jac,
+										   bool print_msg_flag);
+
+		void add_ghost_zone_Jacobian(const patch_system &ps,
+									 Jacobian &Jac,
+									 fp mol,
+									 const patch &xp, const ghost_zone &xmgz,
+									 int x_II,
+									 int xm_irho, int xm_isigma);
+
+		enum expansion_status
+		expansion_Jacobian_dr_FD(patch_system *ps_ptr, Jacobian *Jac_ptr, fp add_to_expansion,
+								 bool initial_flag,
+								 bool print_msg_flag);
+	}
+
+	//******************************************************************************
+
+	//
+	// If ps_ptr != NULL and Jac_ptr != NULL, this function computes the
+	// Jacobian matrix J[Theta(h)] of the expansion Theta(h).  We assume
+	// that Theta(h) has already been computed.
+	//
+	// If ps_ptr == NULL and Jac_ptr == NULL, this function does a dummy
+	// computation, in which only any expansion() (and hence geometry
+	// interpolator) calls are done, these with the number of interpolation
+	// points set to 0 and all the output array pointers set to NULL.
+	//
+	// It's illegal for one but not both of ps_ptr and Jac_ptr to be NULL.
+	//
+	// Arguments:
+	// ps_ptr --> The patch system, or == NULL to do (only) a dummy computation.
+	// Jac_ptr --> The Jacobian, or == NULL to do (only) a dummy computation.
+	// add_to_expansion = A real number to add to the expansion.
+	//
+	// Results:
+	// This function returns a status code indicating whether the computation
+	// succeeded or failed, and if the latter, what caused the failure.
+	//
+	enum expansion_status
+	expansion_Jacobian(patch_system *ps_ptr, Jacobian *Jac_ptr,
+					   fp add_to_expansion,
+					   bool initial_flag,
+					   bool print_msg_flag /* = false */)
+	{
+		const bool active_flag = (ps_ptr != NULL) && (Jac_ptr != NULL);
+		enum expansion_status status;
+
+		if (active_flag)
+			then expansion_Jacobian_partial_SD(*ps_ptr, *Jac_ptr,
+											   print_msg_flag);
+		// this function looks at ps_ptr and Jac_ptr (non-NULL vs NULL)
+		// to choose a normal vs dummy computation
+		{
+			status = expansion_Jacobian_dr_FD(ps_ptr, Jac_ptr, add_to_expansion,
+											  initial_flag,
+											  print_msg_flag);
+			if (status != expansion_success)
+				then return status; // *** ERROR RETURN ***
+		}
+
+		return expansion_success; // *** NORMAL RETURN ***
+	}
+	//
+	// This function computes the partial derivative terms in the Jacobian
+	// matrix of the expansion Theta(h), by symbolic differentiation from
+	// the Jacobian coefficient (angular) gridfns.  The Jacobian is traversed
+	// by rows, using equation (25) of my 1996 apparent horizon finding paper.
+	//
+	// Inputs (angular gridfns, on ghosted grid):
+	//	h			# shape of trial surface
+	//	Theta			# Theta(h) assumed to already be computed
+	//	partial_Theta_wrt_partial_d_h	# Jacobian coefficients
+	//	partial_Theta_wrt_partial_dd_h	# (also assumed to already be computed)
+	//
+	// Outputs:
+	//	The Jacobian matrix is stored in the Jacobian object Jac.
+	//
+	namespace
+	{
+		void expansion_Jacobian_partial_SD(patch_system &ps, Jacobian &Jac,
+										   bool print_msg_flag)
+		{
+			Jac.zero_matrix();
+			ps.compute_synchronize_Jacobian();
+
+			for (int xpn = 0; xpn < ps.N_patches(); ++xpn)
+			{
+				patch &xp = ps.ith_patch(xpn);
+
+				for (int x_irho = xp.min_irho(); x_irho <= xp.max_irho(); ++x_irho)
+				{
+					for (int x_isigma = xp.min_isigma(); x_isigma <= xp.max_isigma(); ++x_isigma)
+					{
+						//
+						// compute the main Jacobian terms for this grid point, i.e.
+						//	partial Theta(this point x, Jacobian row II)
+						//	---------------------------------------------
+						//	partial h(other points y, Jacobian column JJ)
+						//
+
+						// Jacobian row index
+						const int II = ps.gpn_of_patch_irho_isigma(xp, x_irho, x_isigma);
+
+						// Jacobian coefficients for this point
+						const fp Jacobian_coeff_rho = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_d_h_1,
+																x_irho, x_isigma);
+						const fp Jacobian_coeff_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_d_h_2,
+																  x_irho, x_isigma);
+						const fp Jacobian_coeff_rho_rho = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_11,
+																	x_irho, x_isigma);
+						const fp Jacobian_coeff_rho_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_12,
+																	  x_irho, x_isigma);
+						const fp Jacobian_coeff_sigma_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_22,
+																		x_irho, x_isigma);
+
+						// partial_rho, partial_rho_rho
+						{
+							for (int m_irho = xp.molecule_min_m();
+								 m_irho <= xp.molecule_max_m();
+								 ++m_irho)
+							{
+								const int xm_irho = x_irho + m_irho;
+								const fp Jac_rho = Jacobian_coeff_rho * xp.partial_rho_coeff(m_irho);
+								const fp Jac_rho_rho = Jacobian_coeff_rho_rho * xp.partial_rho_rho_coeff(m_irho);
+								const fp Jac_sum = Jac_rho + Jac_rho_rho;
+								if (xp.is_in_nominal_grid(xm_irho, x_isigma))
+									then
+									{
+										const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, xm_irho, x_isigma);
+										Jac.sum_into_element(II, xm_JJ, Jac_sum);
+									}
+								else
+									add_ghost_zone_Jacobian(ps, Jac,
+															Jac_sum,
+															xp, xp.minmax_rho_ghost_zone(m_irho < 0),
+															II, xm_irho, x_isigma);
+							}
+						}
+
+						// partial_sigma, partial_sigma_sigma
+						{
+							for (int m_isigma = xp.molecule_min_m();
+								 m_isigma <= xp.molecule_max_m();
+								 ++m_isigma)
+							{
+								const int xm_isigma = x_isigma + m_isigma;
+								const fp Jac_sigma = Jacobian_coeff_sigma * xp.partial_sigma_coeff(m_isigma);
+								const fp Jac_sigma_sigma = Jacobian_coeff_sigma_sigma * xp.partial_sigma_sigma_coeff(m_isigma);
+								const fp Jac_sum = Jac_sigma + Jac_sigma_sigma;
+								if (xp.is_in_nominal_grid(x_irho, xm_isigma))
+									then
+									{
+										const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, x_irho, xm_isigma);
+										Jac.sum_into_element(II, xm_JJ, Jac_sum);
+									}
+								else
+									add_ghost_zone_Jacobian(ps, Jac,
+															Jac_sum,
+															xp, xp.minmax_sigma_ghost_zone(m_isigma < 0),
+															II, x_irho, xm_isigma);
+							}
+						}
+
+						// partial_rho_sigma
+						{
+							for (int m_irho = xp.molecule_min_m();
+								 m_irho <= xp.molecule_max_m();
+								 ++m_irho)
+							{
+								for (int m_isigma = xp.molecule_min_m();
+									 m_isigma <= xp.molecule_max_m();
+									 ++m_isigma)
+								{
+									const int xm_irho = x_irho + m_irho;
+									const int xm_isigma = x_isigma + m_isigma;
+									const fp Jac_rho_sigma = Jacobian_coeff_rho_sigma * xp.partial_rho_sigma_coeff(m_irho, m_isigma);
+									if (xp.is_in_nominal_grid(xm_irho, xm_isigma))
+										then
+										{
+											const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, xm_irho, xm_isigma);
+											Jac.sum_into_element(II, xm_JJ, Jac_rho_sigma);
+										}
+									else
+									{
+										const ghost_zone &xmgz = xp.corner_ghost_zone_containing_point(m_irho < 0, m_isigma < 0,
+																									   xm_irho, xm_isigma);
+										add_ghost_zone_Jacobian(ps, Jac,
+																Jac_rho_sigma,
+																xp, xmgz,
+																II, xm_irho, xm_isigma);
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	//******************************************************************************
+
+	//
+	// This function adds the ghost-zone Jacobian dependency contributions
+	// for a single ghost-zone point, to a Jacobian matrix.
+	//
+	// Arguments:
+	// ps = The patch system.
+	// Jac = (out) The Jacobian matrix.
+	// mol = The molecule coefficient.
+	// xp = The patch containing the center point of the molecule.
+	// xmgz = If the x+m point is in a ghost zone, this must be that ghost zone.
+	//	  If the x+m point is not in a ghost zone, this argument is ignored.
+	// x_II = The Jacobian row of the x point.
+	// xm_(irho,isigma) = The coordinates (in xp) of the x+m point of the molecule.
+
+	namespace
+	{
+		void add_ghost_zone_Jacobian(const patch_system &ps,
+									 Jacobian &Jac,
+									 fp mol,
+									 const patch &xp, const ghost_zone &xmgz,
+									 int x_II,
+									 int xm_irho, int xm_isigma)
+		{
+			const patch_edge &xme = xmgz.my_edge();
+			const int xm_iperp = xme.iperp_of_irho_isigma(xm_irho, xm_isigma);
+			const int xm_ipar = xme.ipar_of_irho_isigma(xm_irho, xm_isigma);
+
+			// FIXME: this won't change from one call to another
+			//        ==> it would be more efficient to reuse the same buffer
+			//            across multiple calls on this function
+			int global_min_ym, global_max_ym;
+			ps.synchronize_Jacobian_global_minmax_ym(global_min_ym, global_max_ym);
+			jtutil::array1d<fp> Jacobian_buffer(global_min_ym, global_max_ym);
+
+			// on what other points y does this molecule point xm depend
+			// via the patch_system::synchronize() operation?
+			int y_iperp;
+			int y_posn, min_ym, max_ym;
+			const patch_edge &ye = ps.synchronize_Jacobian(xmgz,
+														   xm_iperp, xm_ipar,
+														   y_iperp,
+														   y_posn, min_ym, max_ym,
+														   Jacobian_buffer);
+			patch &yp = ye.my_patch();
+
+			// add the Jacobian contributions from the ym points
+			for (int ym = min_ym; ym <= max_ym; ++ym)
+			{
+				const int y_ipar = y_posn + ym;
+				const int y_irho = ye.irho_of_iperp_ipar(y_iperp, y_ipar);
+				const int y_isigma = ye.isigma_of_iperp_ipar(y_iperp, y_ipar);
+				const int y_JJ = Jac.II_of_patch_irho_isigma(yp, y_irho, y_isigma);
+				Jac.sum_into_element(x_II, y_JJ, mol * Jacobian_buffer(ym));
+			}
+		}
+	}
+
+	//******************************************************************************
+
+	//
+	// If ps_ptr != NULL and Jac_ptr != NULL, this function sums the d/dr
+	// terms into the Jacobian matrix of the expansion Theta(h), computing
+	// those terms by finite differencing.
+	//
+	// If ps_ptr == NULL and Jac_ptr == NULL, this function does a dummy
+	// computation, in which only any expansion() (and hence geometry
+	// interpolator) calls are done, these with the number of interpolation
+	// points set to 0 and all the output array pointers set to NULL.
+	//
+	// It's illegal for one but not both of ps_ptr and Jac_ptr to be NULL.
+	//
+	// The basic algorithm is that
+	//	Jac += diag[ (Theta(h+epsilon) - Theta(h)) / epsilon ]
+	//
+	// Inputs (angular gridfns, on ghosted grid):
+	//	h			# shape of trial surface
+	//	Theta			# Theta(h) assumed to already be computed
+	//
+	// Outputs:
+	//	Jac += d/dr terms
+	//
+	// Results:
+	// This function returns a status code indicating whether the computation
+	// succeeded or failed, and if the latter, what caused the failure.
+	//
+	namespace
+	{
+		enum expansion_status
+		expansion_Jacobian_dr_FD(patch_system *ps_ptr, Jacobian *Jac_ptr, fp add_to_expansion,
+								 bool initial_flag,
+								 bool print_msg_flag)
+		{
+			const bool active_flag = (ps_ptr != NULL) && (Jac_ptr != NULL);
+
+			const double epsilon = 1e-6;
+			// compute Theta(h+epsilon)
+			if (active_flag)
+				then
+				{
+					ps_ptr->gridfn_copy(gfns::gfn__Theta, gfns::gfn__save_Theta);
+					ps_ptr->add_to_ghosted_gridfn(epsilon, gfns::gfn__h);
+				}
+			const enum expansion_status status = expansion(ps_ptr, add_to_expansion,
+														   initial_flag);
+			if (status != expansion_success)
+				then return status; // *** ERROR RETURN ***
+
+			if (active_flag)
+				then
+				{
+					for (int pn = 0; pn < ps_ptr->N_patches(); ++pn)
+					{
+						patch &p = ps_ptr->ith_patch(pn);
+						for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+						{
+							for (int isigma = p.min_isigma();
+								 isigma <= p.max_isigma();
+								 ++isigma)
+							{
+								const int II = ps_ptr->gpn_of_patch_irho_isigma(p, irho, isigma);
+								const fp old_Theta = p.gridfn(gfns::gfn__save_Theta,
+															  irho, isigma);
+								const fp new_Theta = p.gridfn(gfns::gfn__Theta,
+															  irho, isigma);
+								const fp d_dr_term = (new_Theta - old_Theta) / epsilon;
+								Jac_ptr->sum_into_element(II, II, d_dr_term);
+							}
+						}
+					}
+
+					// restore h and Theta
+					ps_ptr->add_to_ghosted_gridfn(-epsilon, gfns::gfn__h);
+					ps_ptr->gridfn_copy(gfns::gfn__save_Theta, gfns::gfn__Theta);
+				}
+
+			return expansion_success; // *** NORMAL RETURN ***
+		}
+	}
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/fd_grid.C
+++ b/AMSS_NCKU_source/AHF_Direct/fd_grid.C
@@ -1,79 +1,79 @@
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "fd_grid.h"
-
-namespace AHFinderDirect
-{
-  using jtutil::error_exit;
-
-  //*****************************************************************************
-
-  //
-  // This function computes a single coefficient of a 1st derivative
-  // molecule, for unit grid spacing.
-  //
-  // static
-  fp fd_grid::dx_coeff(int m)
-  {
-    switch (m)
-    {
-    case -2:
-      return FD_GRID__ORDER4__DX__COEFF_M2;
-    case -1:
-      return FD_GRID__ORDER4__DX__COEFF_M1;
-    case 0:
-      return FD_GRID__ORDER4__DX__COEFF_0;
-    case +1:
-      return FD_GRID__ORDER4__DX__COEFF_P1;
-    case +2:
-      return FD_GRID__ORDER4__DX__COEFF_P2;
-
-    default:
-      cout << "***** fd_grid::dx_coeff(): m=" << m << " is outside order=4 molecule radius=" << FD_GRID__MOL_RADIUS << endl;
-      abort();
-    }
-  }
-
-  //*****************************************************************************
-
-  //
-  // This function computes a single coefficient of a 2nd derivative
-  // molecule, for unit grid spacing.
-  //
-  // static
-  fp fd_grid::dxx_coeff(int m)
-  {
-    switch (m)
-    {
-    case -2:
-      return FD_GRID__ORDER4__DXX__COEFF_M2;
-    case -1:
-      return FD_GRID__ORDER4__DXX__COEFF_M1;
-    case 0:
-      return FD_GRID__ORDER4__DXX__COEFF_0;
-    case +1:
-      return FD_GRID__ORDER4__DXX__COEFF_P1;
-    case +2:
-      return FD_GRID__ORDER4__DXX__COEFF_P2;
-
-    default:
-      cout << "***** fd_grid::dx_coeff(): m=" << m << " is outside order=4 molecule radius=" << FD_GRID__MOL_RADIUS << endl;
-      abort();
-    }
-  }
-
-  //******************************************************************************
-
-} // namespace AHFinderDirect
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+
+namespace AHFinderDirect
+{
+  using jtutil::error_exit;
+
+  //*****************************************************************************
+
+  //
+  // This function computes a single coefficient of a 1st derivative
+  // molecule, for unit grid spacing.
+  //
+  // static
+  fp fd_grid::dx_coeff(int m)
+  {
+    switch (m)
+    {
+    case -2:
+      return FD_GRID__ORDER4__DX__COEFF_M2;
+    case -1:
+      return FD_GRID__ORDER4__DX__COEFF_M1;
+    case 0:
+      return FD_GRID__ORDER4__DX__COEFF_0;
+    case +1:
+      return FD_GRID__ORDER4__DX__COEFF_P1;
+    case +2:
+      return FD_GRID__ORDER4__DX__COEFF_P2;
+
+    default:
+      cout << "***** fd_grid::dx_coeff(): m=" << m << " is outside order=4 molecule radius=" << FD_GRID__MOL_RADIUS << endl;
+      abort();
+    }
+  }
+
+  //*****************************************************************************
+
+  //
+  // This function computes a single coefficient of a 2nd derivative
+  // molecule, for unit grid spacing.
+  //
+  // static
+  fp fd_grid::dxx_coeff(int m)
+  {
+    switch (m)
+    {
+    case -2:
+      return FD_GRID__ORDER4__DXX__COEFF_M2;
+    case -1:
+      return FD_GRID__ORDER4__DXX__COEFF_M1;
+    case 0:
+      return FD_GRID__ORDER4__DXX__COEFF_0;
+    case +1:
+      return FD_GRID__ORDER4__DXX__COEFF_P1;
+    case +2:
+      return FD_GRID__ORDER4__DXX__COEFF_P2;
+
+    default:
+      cout << "***** fd_grid::dx_coeff(): m=" << m << " is outside order=4 molecule radius=" << FD_GRID__MOL_RADIUS << endl;
+      abort();
+    }
+  }
+
+  //******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/fd_grid.h
+++ b/AMSS_NCKU_source/AHF_Direct/fd_grid.h
@@ -1,459 +1,459 @@
-#ifndef FD_GRID_H
-#define FD_GRID_H
-namespace AHFinderDirect
-{
-
-	//******************************************************************************
-
-	//
-	// *** Implementation Notes -- Overview ***
-	//
-
-	//
-	// The key design problem for our finite differencing is how to
-	// implement an entire family of 5(9) finite difference operations in
-	// 2D(3D)
-	//
-	//	partial_rho		partial_sigma
-	//	partial_{rho,rho}	partial_{rho,sigma}
-	//				partial_{sigma,sigma}
-	//
-	//	partial_x		partial_y		partial_z
-	//	partial_xx		partial_xy		partial_xz
-	//				partial_yy		partial_yz
-	//							partial_zz
-	//
-	// without having to write out the finite differencing molecules multiple
-	// times, and while still preserving maximum inline-function efficiency.
-	// In particular, mixed 2nd-order derivative operations like partial_xy
-	// should be automatically composed from the two individual 1st derivative
-	// operations (partial_x and partial_y).
-	//
-
-	//
-	// Our basic approach is to define each finite difference molecule in
-	// a generic 1-dimensional form using an abstract "data(m)" interface.
-	// Here we use the terminology that a finite difference molecule is
-	// defined as
-	//	out[k] = sum(m) c[m] * in[k+m]
-	// where c[] is the vector/matrix of molecule coefficients, and m is
-	// the (integer) relative grid coordinate within a molecule.
-	//
-	// That is, for example, we define the usual 2nd order centered 1st
-	// derivative operator as
-	//	diff = 0.5*inv_delta_x*(data(+1) - data(-1))
-	// leaving unspecified just what the data source is.  We then use this
-	// with an appropriate data source (indexing along that gridfn array axis)
-	// for each directional derivative operation, and we compose two of
-	// these, using the first along x as the data source for the second
-	// along y, for the mixed 2nd-order derivative operation.
-	//
-
-	//******************************************************************************
-
-	//
-	// *** Implementation Notes -- Techniques using C++ Templates ***
-	//
-
-	//
-	// There are two plausible ways to use C++ templates
-	//	[C++ templates are described in detail in chapter 13 of
-	//	Stroustrup's "The C++ Programming Language" (3rd Edition),
-	//	hereinafter "C++PL", and chapter 15 of Stroustrup's
-	//	"The Design and Evolution of C++", hereinafter "D&EC++".]
-	// to write the sort of generic-at-compile-time code we want:
-	// - Template specializations for each axis, as discussed in D&EC++
-	//   section 15.10.3.
-	// - Overloaded functions for each axis, with an argument type
-	//   (possibly that of an extra unused argument) selecting the
-	//   appropriate axis and hence the appropriate function.  This
-	//   technique is discussed in D&EC++ section 15.6.3.1.
-	//
-	// Quoting from D&EC++ (section 15.6.3.1),
-	//
-	//	The fundamental observation is that every property
-	//	of a type or an algorithm can be represented by a
-	//	type (possibly defined specificaly to do exactly
-	//	that).  That done, such a type can be used to guide
-	//	the overload resolution to select a function that
-	//	depends on the desired property.  [...]
-	//
-	//	Please note that thanks to inlining this resolution
-	//	is done at compile-time, so the appropriate [...]
-	//	function will be called directly without any run-time
-	//	overhead.
-	//
-	// Quoting from C++PL3 (section 13.4),
-	//
-	//	Passing [...] operations as a template parameter has two
-	//	significant benefits compared to alternatives such as
-	//	passing pointers to functions.  Several operations can
-	//	be passed as a single argument with no run-time cost.
-	//	In addition, the [...] operators [passed this way] are
-	//	trivial to inline, whereas inlininkg a call through a
-	//	pointer to function requires exceptional attention from
-	//	a compiler.
-	//
-
-	//
-	// In my opinion the template-specialization design is cleaner, and it
-	// clearly has no run-time cost (whereas the overloaded-function design
-	// may have a run-time cost for constructing and passing unused objects),
-	// so we use it here.
-	//
-	// There are, however, two (non-fatal) problema with this approach:
-	// - Unfortunately, it appears C++ (or at least gcc 2.95.1) forbids
-	//   template specialization within a class, so some of the functions
-	//   which whould logically be class members, must instead be defined
-	//   outside any class.  We use the namespace  fd_stuff::  to hide
-	//   these from the outside world.
-	// - C++PL3, section C.13.3, states that
-	//	Only class templates can be template arguments.
-	//   so we have to use dummy classes around some of our template
-	//   functions.  To avoid extra constructor/destructor overhead, we
-	//   make these template functions static.
-	//
-
-	//******************************************************************************
-
-	//
-	// *** Implementation Notes -- Techniques using the C/C++ Preprocessor ***
-	//
-
-	//
-	// The fundamental problem with the template approaches is portability:
-	// Although the C++ standard describes powerful template facilities, not
-	// all C++ compilers yet fully support these.  As an alternative, we can
-	// use the C/C++ preprocessor.  This is ugly and dangerous (global names!),
-	// but is probably simpler than any of the template approaches.  It can
-	// provide the same finite differencing functionality and efficiency as
-	// the template-based approaches.
-	//
-	// Because of its greater portability, we use the preprocessor-based
-	// approach here.
-	//
-
-	//******************************************************************************
-
-	//
-	// *** Implementation Notes -- Run-Time Choice of Molecules ***
-	//
-	// *If* we want to allow the finite differencing scheme to be changed
-	// at run-time (e.g. from a parameter file), there are three plausible
-	// ways to do this:
-	// - Using  switch(molecule_type) , as is standard in C.  This is
-	//   simple, and for this particular application quite well-structured
-	//   and maintainable (there are only a few different molecule types,
-	//   all centralized in this file).
-	// - Using virtual functions, with  molecule  a virtual base class
-	//   and individual molecules derived from it.  This is elegant, but
-	//   may have some performance problems (below).  It also requires some
-	//   sort of switch-based "object factory" to interface with with the
-	//   molecule-choice parameters.
-	// - Write all the finite differencing code multiple times, once for
-	//   each finite differencing scheme.
-	//
-	// The typical use of these functions will be from within a loop over
-	// a whole grid.  In both cases we can expect excellent accuracy from
-	// modern hardware branch prediction (and thus minimal performance loss
-	// from the branching).  It's reasonable to expect a compiler to fully
-	// inline the switch-based code, exposing all the gridfn array subscriptings
-	// to strength reduction etc, but this is much trickier for the
-	// virtual-function--based code.  For this reason, the switch-based
-	// design seems superior to the virtual-function--based one.
-	//
-	// However, at present we don't implement any run-time selection: we
-	// "just" fix the finite differencing scheme at compile time via the
-	// preprocessor.
-	//
-
-	//******************************************************************************
-
-	//
-	// *** finite difference molecules ***
-	//
-
-	//**************************************
-
-	//
-	// define the actual molecules
-	//
-	// In the following macros, we first define all the distinct floating-
-	// -point numbers appearing in a molecules as "K" constants (all > 0),
-	// then define the actual derivative and its molecule coefficients
-	// using +/- the "K" constants, with multiplies by 1.0 elided and 0
-	// terms skipped in computing the derivative.  This (hopefully) gives
-	// maximum efficiency by avoiding the generated code loading the same
-	// constants multiple times.
-	//
-
-	//
-	// The molecule macros all take the following arguments:
-	// inv_delta_x_ = inverse of grid spacing in the finite differencing
-	//		  direction
-	// data_= a data-fetching function or macro: data_(ghosted_gfn, irho, isigma)
-	//	  is the data to be finite differenced
-	// irho_plus_m_ = a function or macro: irho_plus_m_(irho,m) returns the
-	//		  rho coordinate to be passed to data_() for the [m]
-	//		  molecule coefficient
-	// isigma_plus_m_ = same thing, for the sigma coordinate
-	//
-	// n.b. We grab the variables ghosted_gfn, irho, and isigma from the calling
-	//      environment, and we define assorted local variables as needed!
-	//
-
-	//**************************************
-
-	//
-	// 2nd order
-	//
-
-#define FD_GRID__ORDER2__MOL_RADIUS 1
-#define FD_GRID__ORDER2__MOL_DIAMETER 3
-
-#define FD_GRID__ORDER2__DX__KPM1 0.5
-#define FD_GRID__ORDER2__DX(inv_delta_x_, data_,                    \
-							irho_plus_m_, isigma_plus_m_)           \
-	const fp data_p1 = data_(ghosted_gfn,                           \
-							 irho_plus_m_(irho, +1),                \
-							 isigma_plus_m_(isigma, +1));           \
-	const fp data_m1 = data_(ghosted_gfn,                           \
-							 irho_plus_m_(irho, -1),                \
-							 isigma_plus_m_(isigma, -1));           \
-	const fp sum = FD_GRID__ORDER2__DX__KPM1 * (data_p1 - data_m1); \
-	return inv_delta_x_ * sum; /* end macro */
-#define FD_GRID__ORDER2__DX__COEFF_M1 (-FD_GRID__ORDER2__DX__KPM1)
-#define FD_GRID__ORDER2__DX__COEFF_0 0.0
-#define FD_GRID__ORDER2__DX__COEFF_P1 (+FD_GRID__ORDER2__DX__KPM1)
-
-#define FD_GRID__ORDER2__DXX__K0 2.0
-#define FD_GRID__ORDER2__DXX(inv_delta_x_, data_,                         \
-							 irho_plus_m_, isigma_plus_m_)                \
-	const fp data_p1 = data_(ghosted_gfn,                                 \
-							 irho_plus_m_(irho, +1),                      \
-							 isigma_plus_m_(isigma, +1));                 \
-	const fp data_0 = data_(ghosted_gfn,                                  \
-							irho_plus_m_(irho, 0),                        \
-							isigma_plus_m_(isigma, 0));                   \
-	const fp data_m1 = data_(ghosted_gfn,                                 \
-							 irho_plus_m_(irho, -1),                      \
-							 isigma_plus_m_(isigma, -1));                 \
-	const fp sum = data_m1 - FD_GRID__ORDER2__DXX__K0 * data_0 + data_p1; \
-	return jtutil::pow2(inv_delta_x_) * sum; /* end macro */
-#define FD_GRID__ORDER2__DXX__COEFF_M1 1.0
-#define FD_GRID__ORDER2__DXX__COEFF_0 (-FD_GRID__ORDER2__DXX__K0)
-#define FD_GRID__ORDER2__DXX__COEFF_P1 1.0
-
-	//**************************************
-
-	//
-	// 4th order
-	//
-
-#define FD_GRID__ORDER4__MOL_RADIUS 2
-#define FD_GRID__ORDER4__MOL_DIAMETER 5
-
-#define FD_GRID__ORDER4__DX__KPM2 (1.0 / 12.0)
-#define FD_GRID__ORDER4__DX__KPM1 (8.0 / 12.0)
-#define FD_GRID__ORDER4__DX(inv_delta_x_, data_,                                                                      \
-							irho_plus_m_, isigma_plus_m_)                                                             \
-	const fp data_p2 = data_(ghosted_gfn,                                                                             \
-							 irho_plus_m_(irho, +2),                                                                  \
-							 isigma_plus_m_(isigma, +2));                                                             \
-	const fp data_p1 = data_(ghosted_gfn,                                                                             \
-							 irho_plus_m_(irho, +1),                                                                  \
-							 isigma_plus_m_(isigma, +1));                                                             \
-	const fp data_m1 = data_(ghosted_gfn,                                                                             \
-							 irho_plus_m_(irho, -1),                                                                  \
-							 isigma_plus_m_(isigma, -1));                                                             \
-	const fp data_m2 = data_(ghosted_gfn,                                                                             \
-							 irho_plus_m_(irho, -2),                                                                  \
-							 isigma_plus_m_(isigma, -2));                                                             \
-	const fp sum = FD_GRID__ORDER4__DX__KPM1 * (data_p1 - data_m1) + FD_GRID__ORDER4__DX__KPM2 * (data_m2 - data_p2); \
-	/*  printf("(%2d %2d) %f %f %f %f\n",irho, isigma,data_m2, data_m1,data_p1, data_p2);*/                           \
-	return inv_delta_x_ * sum; /* end macro */
-#define FD_GRID__ORDER4__DX__COEFF_M2 (+FD_GRID__ORDER4__DX__KPM2)
-#define FD_GRID__ORDER4__DX__COEFF_M1 (-FD_GRID__ORDER4__DX__KPM1)
-#define FD_GRID__ORDER4__DX__COEFF_0 0.0
-#define FD_GRID__ORDER4__DX__COEFF_P1 (+FD_GRID__ORDER4__DX__KPM1)
-#define FD_GRID__ORDER4__DX__COEFF_P2 (-FD_GRID__ORDER4__DX__KPM2)
-
-	//**************************************
-
-#define FD_GRID__ORDER4__DXX__KPM2 (1.0 / 12.0)
-#define FD_GRID__ORDER4__DXX__KPM1 (16.0 / 12.0)
-#define FD_GRID__ORDER4__DXX__K0 (30.0 / 12.0)
-#define FD_GRID__ORDER4__DXX(inv_delta_x_, data_,                                                                                                            \
-							 irho_plus_m_, isigma_plus_m_)                                                                                                   \
-	const fp data_p2 = data_(ghosted_gfn,                                                                                                                    \
-							 irho_plus_m_(irho, +2),                                                                                                         \
-							 isigma_plus_m_(isigma, +2));                                                                                                    \
-	const fp data_p1 = data_(ghosted_gfn,                                                                                                                    \
-							 irho_plus_m_(irho, +1),                                                                                                         \
-							 isigma_plus_m_(isigma, +1));                                                                                                    \
-	const fp data_0 = data_(ghosted_gfn,                                                                                                                     \
-							irho_plus_m_(irho, 0),                                                                                                           \
-							isigma_plus_m_(isigma, 0));                                                                                                      \
-	const fp data_m1 = data_(ghosted_gfn,                                                                                                                    \
-							 irho_plus_m_(irho, -1),                                                                                                         \
-							 isigma_plus_m_(isigma, -1));                                                                                                    \
-	const fp data_m2 = data_(ghosted_gfn,                                                                                                                    \
-							 irho_plus_m_(irho, -2),                                                                                                         \
-							 isigma_plus_m_(isigma, -2));                                                                                                    \
-	const fp sum = -FD_GRID__ORDER4__DXX__K0 * data_0 + FD_GRID__ORDER4__DXX__KPM1 * (data_m1 + data_p1) - FD_GRID__ORDER4__DXX__KPM2 * (data_m2 + data_p2); \
-	return jtutil::pow2(inv_delta_x_) * sum; /* end macro */
-#define FD_GRID__ORDER4__DXX__COEFF_M2 (-FD_GRID__ORDER4__DXX__KPM2)
-#define FD_GRID__ORDER4__DXX__COEFF_M1 (+FD_GRID__ORDER4__DXX__KPM1)
-#define FD_GRID__ORDER4__DXX__COEFF_0 (-FD_GRID__ORDER4__DXX__K0)
-#define FD_GRID__ORDER4__DXX__COEFF_P1 (+FD_GRID__ORDER4__DXX__KPM1)
-#define FD_GRID__ORDER4__DXX__COEFF_P2 (-FD_GRID__ORDER4__DXX__KPM2)
-
-	//******************************************************************************
-#define FD_GRID__MOL_RADIUS FD_GRID__ORDER4__MOL_RADIUS
-#define FD_GRID__MOL_DIAMETER FD_GRID__ORDER4__MOL_DIAMETER
-#define FD_GRID__DX FD_GRID__ORDER4__DX
-#define FD_GRID__DXX FD_GRID__ORDER4__DXX
-
-#define FD_GRID__MOL_AREA (FD_GRID__MOL_DIAMETER * FD_GRID__MOL_DIAMETER)
-
-	//******************************************************************************
-
-	//
-	// ***** fd_grid - grid with finite differencing operations *****
-	//
-	// An  fd_grid  is identical to a  grid  except that it also defines
-	// (rho,sigma)-coordinate finite differencing operations on gridfns.
-	//
-
-	class fd_grid
-		: public grid
-	{
-		//
-		// molecule sizes
-		//
-	public:
-		// n.b. this interface implicitly assumes that all molecules
-		//      are centered and are the same order and size
-		static int finite_diff_order() { return 4; }
-		static int molecule_radius() { return FD_GRID__MOL_RADIUS; }
-		static int molecule_diameter() { return FD_GRID__MOL_DIAMETER; }
-		static int molecule_min_m() { return -FD_GRID__MOL_RADIUS; }
-		static int molecule_max_m() { return FD_GRID__MOL_RADIUS; }
-
-		//
-		// helper functions to compute (irho,isigma) + [m]
-		// along each axis
-		//
-	private:
-		static int rho_axis__irho_plus_m(int irho, int m) { return irho + m; }
-		static int rho_axis__isigma_plus_m(int isigma, int m) { return isigma; }
-		static int sigma_axis__irho_plus_m(int irho, int m) { return irho; }
-		static int sigma_axis__isigma_plus_m(int isigma, int m) { return isigma + m; }
-
-		//
-		// ***** finite differencing *****
-		//
-	public:
-		// 1st derivatives
-		fp partial_rho(int ghosted_gfn, int irho, int isigma)
-			const
-		{
-			FD_GRID__DX(inverse_delta_rho(),
-						ghosted_gridfn,
-						rho_axis__irho_plus_m,
-						rho_axis__isigma_plus_m);
-		}
-		fp partial_sigma(int ghosted_gfn, int irho, int isigma)
-			const
-		{
-			FD_GRID__DX(inverse_delta_sigma(),
-						ghosted_gridfn,
-						sigma_axis__irho_plus_m,
-						sigma_axis__isigma_plus_m);
-		}
-
-		// "pure" 2nd derivatives
-		fp partial_rho_rho(int ghosted_gfn, int irho, int isigma)
-			const
-		{
-			FD_GRID__DXX(inverse_delta_rho(),
-						 ghosted_gridfn,
-						 rho_axis__irho_plus_m,
-						 rho_axis__isigma_plus_m);
-		}
-		fp partial_sigma_sigma(int ghosted_gfn, int irho, int isigma)
-			const
-		{
-			FD_GRID__DXX(inverse_delta_sigma(),
-						 ghosted_gridfn,
-						 sigma_axis__irho_plus_m,
-						 sigma_axis__isigma_plus_m);
-		}
-
-		// mixed 2nd partial derivative
-		fp partial_rho_sigma(int ghosted_gfn, int irho, int isigma)
-			const
-		{
-			FD_GRID__DX(inverse_delta_rho(),
-						partial_sigma,
-						rho_axis__irho_plus_m,
-						rho_axis__isigma_plus_m);
-		}
-
-		//
-		// ***** molecule coefficients *****
-		//
-	public:
-		// molecule coefficients
-		// n.b. this interface implicitly assumes that all molecules
-		//      are position-independent
-		fp partial_rho_coeff(int m) const
-		{
-			return inverse_delta_rho() * dx_coeff(m);
-		}
-		fp partial_sigma_coeff(int m) const
-		{
-			return inverse_delta_sigma() * dx_coeff(m);
-		}
-		fp partial_rho_rho_coeff(int m) const
-		{
-			return jtutil::pow2(inverse_delta_rho()) * dxx_coeff(m);
-		}
-		fp partial_sigma_sigma_coeff(int m) const
-		{
-			return jtutil::pow2(inverse_delta_sigma()) * dxx_coeff(m);
-		}
-		fp partial_rho_sigma_coeff(int m_rho, int m_sigma) const
-		{
-			return partial_rho_coeff(m_rho) * partial_sigma_coeff(m_sigma);
-		}
-
-		// worker functions: molecule coefficients for unit grid spacing
-	private:
-		static fp dx_coeff(int m);
-		static fp dxx_coeff(int m);
-
-		//
-		// ***** constructor, destructor *****
-		//
-	public:
-		// constructor: pass through to grid:: constructor
-		fd_grid(const grid_array_pars &grid_array_pars_in,
-				const grid_pars &grid_pars_in)
-			: grid(grid_array_pars_in, grid_pars_in)
-		{
-		}
-		// compiler-generated default destructor is ok
-
-	private:
-		// we forbid copying and passing by value
-		// by declaring the copy constructor and assignment operator
-		// private, but never defining them
-		fd_grid(const fd_grid &rhs);
-		fd_grid &operator=(const fd_grid &rhs);
-	};
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /* FD_GRID_H  */
+#ifndef FD_GRID_H
+#define FD_GRID_H
+namespace AHFinderDirect
+{
+
+	//******************************************************************************
+
+	//
+	// *** Implementation Notes -- Overview ***
+	//
+
+	//
+	// The key design problem for our finite differencing is how to
+	// implement an entire family of 5(9) finite difference operations in
+	// 2D(3D)
+	//
+	//	partial_rho		partial_sigma
+	//	partial_{rho,rho}	partial_{rho,sigma}
+	//				partial_{sigma,sigma}
+	//
+	//	partial_x		partial_y		partial_z
+	//	partial_xx		partial_xy		partial_xz
+	//				partial_yy		partial_yz
+	//							partial_zz
+	//
+	// without having to write out the finite differencing molecules multiple
+	// times, and while still preserving maximum inline-function efficiency.
+	// In particular, mixed 2nd-order derivative operations like partial_xy
+	// should be automatically composed from the two individual 1st derivative
+	// operations (partial_x and partial_y).
+	//
+
+	//
+	// Our basic approach is to define each finite difference molecule in
+	// a generic 1-dimensional form using an abstract "data(m)" interface.
+	// Here we use the terminology that a finite difference molecule is
+	// defined as
+	//	out[k] = sum(m) c[m] * in[k+m]
+	// where c[] is the vector/matrix of molecule coefficients, and m is
+	// the (integer) relative grid coordinate within a molecule.
+	//
+	// That is, for example, we define the usual 2nd order centered 1st
+	// derivative operator as
+	//	diff = 0.5*inv_delta_x*(data(+1) - data(-1))
+	// leaving unspecified just what the data source is.  We then use this
+	// with an appropriate data source (indexing along that gridfn array axis)
+	// for each directional derivative operation, and we compose two of
+	// these, using the first along x as the data source for the second
+	// along y, for the mixed 2nd-order derivative operation.
+	//
+
+	//******************************************************************************
+
+	//
+	// *** Implementation Notes -- Techniques using C++ Templates ***
+	//
+
+	//
+	// There are two plausible ways to use C++ templates
+	//	[C++ templates are described in detail in chapter 13 of
+	//	Stroustrup's "The C++ Programming Language" (3rd Edition),
+	//	hereinafter "C++PL", and chapter 15 of Stroustrup's
+	//	"The Design and Evolution of C++", hereinafter "D&EC++".]
+	// to write the sort of generic-at-compile-time code we want:
+	// - Template specializations for each axis, as discussed in D&EC++
+	//   section 15.10.3.
+	// - Overloaded functions for each axis, with an argument type
+	//   (possibly that of an extra unused argument) selecting the
+	//   appropriate axis and hence the appropriate function.  This
+	//   technique is discussed in D&EC++ section 15.6.3.1.
+	//
+	// Quoting from D&EC++ (section 15.6.3.1),
+	//
+	//	The fundamental observation is that every property
+	//	of a type or an algorithm can be represented by a
+	//	type (possibly defined specificaly to do exactly
+	//	that).  That done, such a type can be used to guide
+	//	the overload resolution to select a function that
+	//	depends on the desired property.  [...]
+	//
+	//	Please note that thanks to inlining this resolution
+	//	is done at compile-time, so the appropriate [...]
+	//	function will be called directly without any run-time
+	//	overhead.
+	//
+	// Quoting from C++PL3 (section 13.4),
+	//
+	//	Passing [...] operations as a template parameter has two
+	//	significant benefits compared to alternatives such as
+	//	passing pointers to functions.  Several operations can
+	//	be passed as a single argument with no run-time cost.
+	//	In addition, the [...] operators [passed this way] are
+	//	trivial to inline, whereas inlininkg a call through a
+	//	pointer to function requires exceptional attention from
+	//	a compiler.
+	//
+
+	//
+	// In my opinion the template-specialization design is cleaner, and it
+	// clearly has no run-time cost (whereas the overloaded-function design
+	// may have a run-time cost for constructing and passing unused objects),
+	// so we use it here.
+	//
+	// There are, however, two (non-fatal) problema with this approach:
+	// - Unfortunately, it appears C++ (or at least gcc 2.95.1) forbids
+	//   template specialization within a class, so some of the functions
+	//   which whould logically be class members, must instead be defined
+	//   outside any class.  We use the namespace  fd_stuff::  to hide
+	//   these from the outside world.
+	// - C++PL3, section C.13.3, states that
+	//	Only class templates can be template arguments.
+	//   so we have to use dummy classes around some of our template
+	//   functions.  To avoid extra constructor/destructor overhead, we
+	//   make these template functions static.
+	//
+
+	//******************************************************************************
+
+	//
+	// *** Implementation Notes -- Techniques using the C/C++ Preprocessor ***
+	//
+
+	//
+	// The fundamental problem with the template approaches is portability:
+	// Although the C++ standard describes powerful template facilities, not
+	// all C++ compilers yet fully support these.  As an alternative, we can
+	// use the C/C++ preprocessor.  This is ugly and dangerous (global names!),
+	// but is probably simpler than any of the template approaches.  It can
+	// provide the same finite differencing functionality and efficiency as
+	// the template-based approaches.
+	//
+	// Because of its greater portability, we use the preprocessor-based
+	// approach here.
+	//
+
+	//******************************************************************************
+
+	//
+	// *** Implementation Notes -- Run-Time Choice of Molecules ***
+	//
+	// *If* we want to allow the finite differencing scheme to be changed
+	// at run-time (e.g. from a parameter file), there are three plausible
+	// ways to do this:
+	// - Using  switch(molecule_type) , as is standard in C.  This is
+	//   simple, and for this particular application quite well-structured
+	//   and maintainable (there are only a few different molecule types,
+	//   all centralized in this file).
+	// - Using virtual functions, with  molecule  a virtual base class
+	//   and individual molecules derived from it.  This is elegant, but
+	//   may have some performance problems (below).  It also requires some
+	//   sort of switch-based "object factory" to interface with with the
+	//   molecule-choice parameters.
+	// - Write all the finite differencing code multiple times, once for
+	//   each finite differencing scheme.
+	//
+	// The typical use of these functions will be from within a loop over
+	// a whole grid.  In both cases we can expect excellent accuracy from
+	// modern hardware branch prediction (and thus minimal performance loss
+	// from the branching).  It's reasonable to expect a compiler to fully
+	// inline the switch-based code, exposing all the gridfn array subscriptings
+	// to strength reduction etc, but this is much trickier for the
+	// virtual-function--based code.  For this reason, the switch-based
+	// design seems superior to the virtual-function--based one.
+	//
+	// However, at present we don't implement any run-time selection: we
+	// "just" fix the finite differencing scheme at compile time via the
+	// preprocessor.
+	//
+
+	//******************************************************************************
+
+	//
+	// *** finite difference molecules ***
+	//
+
+	//**************************************
+
+	//
+	// define the actual molecules
+	//
+	// In the following macros, we first define all the distinct floating-
+	// -point numbers appearing in a molecules as "K" constants (all > 0),
+	// then define the actual derivative and its molecule coefficients
+	// using +/- the "K" constants, with multiplies by 1.0 elided and 0
+	// terms skipped in computing the derivative.  This (hopefully) gives
+	// maximum efficiency by avoiding the generated code loading the same
+	// constants multiple times.
+	//
+
+	//
+	// The molecule macros all take the following arguments:
+	// inv_delta_x_ = inverse of grid spacing in the finite differencing
+	//		  direction
+	// data_= a data-fetching function or macro: data_(ghosted_gfn, irho, isigma)
+	//	  is the data to be finite differenced
+	// irho_plus_m_ = a function or macro: irho_plus_m_(irho,m) returns the
+	//		  rho coordinate to be passed to data_() for the [m]
+	//		  molecule coefficient
+	// isigma_plus_m_ = same thing, for the sigma coordinate
+	//
+	// n.b. We grab the variables ghosted_gfn, irho, and isigma from the calling
+	//      environment, and we define assorted local variables as needed!
+	//
+
+	//**************************************
+
+	//
+	// 2nd order
+	//
+
+#define FD_GRID__ORDER2__MOL_RADIUS 1
+#define FD_GRID__ORDER2__MOL_DIAMETER 3
+
+#define FD_GRID__ORDER2__DX__KPM1 0.5
+#define FD_GRID__ORDER2__DX(inv_delta_x_, data_,                    \
+							irho_plus_m_, isigma_plus_m_)           \
+	const fp data_p1 = data_(ghosted_gfn,                           \
+							 irho_plus_m_(irho, +1),                \
+							 isigma_plus_m_(isigma, +1));           \
+	const fp data_m1 = data_(ghosted_gfn,                           \
+							 irho_plus_m_(irho, -1),                \
+							 isigma_plus_m_(isigma, -1));           \
+	const fp sum = FD_GRID__ORDER2__DX__KPM1 * (data_p1 - data_m1); \
+	return inv_delta_x_ * sum; /* end macro */
+#define FD_GRID__ORDER2__DX__COEFF_M1 (-FD_GRID__ORDER2__DX__KPM1)
+#define FD_GRID__ORDER2__DX__COEFF_0 0.0
+#define FD_GRID__ORDER2__DX__COEFF_P1 (+FD_GRID__ORDER2__DX__KPM1)
+
+#define FD_GRID__ORDER2__DXX__K0 2.0
+#define FD_GRID__ORDER2__DXX(inv_delta_x_, data_,                         \
+							 irho_plus_m_, isigma_plus_m_)                \
+	const fp data_p1 = data_(ghosted_gfn,                                 \
+							 irho_plus_m_(irho, +1),                      \
+							 isigma_plus_m_(isigma, +1));                 \
+	const fp data_0 = data_(ghosted_gfn,                                  \
+							irho_plus_m_(irho, 0),                        \
+							isigma_plus_m_(isigma, 0));                   \
+	const fp data_m1 = data_(ghosted_gfn,                                 \
+							 irho_plus_m_(irho, -1),                      \
+							 isigma_plus_m_(isigma, -1));                 \
+	const fp sum = data_m1 - FD_GRID__ORDER2__DXX__K0 * data_0 + data_p1; \
+	return jtutil::pow2(inv_delta_x_) * sum; /* end macro */
+#define FD_GRID__ORDER2__DXX__COEFF_M1 1.0
+#define FD_GRID__ORDER2__DXX__COEFF_0 (-FD_GRID__ORDER2__DXX__K0)
+#define FD_GRID__ORDER2__DXX__COEFF_P1 1.0
+
+	//**************************************
+
+	//
+	// 4th order
+	//
+
+#define FD_GRID__ORDER4__MOL_RADIUS 2
+#define FD_GRID__ORDER4__MOL_DIAMETER 5
+
+#define FD_GRID__ORDER4__DX__KPM2 (1.0 / 12.0)
+#define FD_GRID__ORDER4__DX__KPM1 (8.0 / 12.0)
+#define FD_GRID__ORDER4__DX(inv_delta_x_, data_,                                                                      \
+							irho_plus_m_, isigma_plus_m_)                                                             \
+	const fp data_p2 = data_(ghosted_gfn,                                                                             \
+							 irho_plus_m_(irho, +2),                                                                  \
+							 isigma_plus_m_(isigma, +2));                                                             \
+	const fp data_p1 = data_(ghosted_gfn,                                                                             \
+							 irho_plus_m_(irho, +1),                                                                  \
+							 isigma_plus_m_(isigma, +1));                                                             \
+	const fp data_m1 = data_(ghosted_gfn,                                                                             \
+							 irho_plus_m_(irho, -1),                                                                  \
+							 isigma_plus_m_(isigma, -1));                                                             \
+	const fp data_m2 = data_(ghosted_gfn,                                                                             \
+							 irho_plus_m_(irho, -2),                                                                  \
+							 isigma_plus_m_(isigma, -2));                                                             \
+	const fp sum = FD_GRID__ORDER4__DX__KPM1 * (data_p1 - data_m1) + FD_GRID__ORDER4__DX__KPM2 * (data_m2 - data_p2); \
+	/*  printf("(%2d %2d) %f %f %f %f\n",irho, isigma,data_m2, data_m1,data_p1, data_p2);*/                           \
+	return inv_delta_x_ * sum; /* end macro */
+#define FD_GRID__ORDER4__DX__COEFF_M2 (+FD_GRID__ORDER4__DX__KPM2)
+#define FD_GRID__ORDER4__DX__COEFF_M1 (-FD_GRID__ORDER4__DX__KPM1)
+#define FD_GRID__ORDER4__DX__COEFF_0 0.0
+#define FD_GRID__ORDER4__DX__COEFF_P1 (+FD_GRID__ORDER4__DX__KPM1)
+#define FD_GRID__ORDER4__DX__COEFF_P2 (-FD_GRID__ORDER4__DX__KPM2)
+
+	//**************************************
+
+#define FD_GRID__ORDER4__DXX__KPM2 (1.0 / 12.0)
+#define FD_GRID__ORDER4__DXX__KPM1 (16.0 / 12.0)
+#define FD_GRID__ORDER4__DXX__K0 (30.0 / 12.0)
+#define FD_GRID__ORDER4__DXX(inv_delta_x_, data_,                                                                                                            \
+							 irho_plus_m_, isigma_plus_m_)                                                                                                   \
+	const fp data_p2 = data_(ghosted_gfn,                                                                                                                    \
+							 irho_plus_m_(irho, +2),                                                                                                         \
+							 isigma_plus_m_(isigma, +2));                                                                                                    \
+	const fp data_p1 = data_(ghosted_gfn,                                                                                                                    \
+							 irho_plus_m_(irho, +1),                                                                                                         \
+							 isigma_plus_m_(isigma, +1));                                                                                                    \
+	const fp data_0 = data_(ghosted_gfn,                                                                                                                     \
+							irho_plus_m_(irho, 0),                                                                                                           \
+							isigma_plus_m_(isigma, 0));                                                                                                      \
+	const fp data_m1 = data_(ghosted_gfn,                                                                                                                    \
+							 irho_plus_m_(irho, -1),                                                                                                         \
+							 isigma_plus_m_(isigma, -1));                                                                                                    \
+	const fp data_m2 = data_(ghosted_gfn,                                                                                                                    \
+							 irho_plus_m_(irho, -2),                                                                                                         \
+							 isigma_plus_m_(isigma, -2));                                                                                                    \
+	const fp sum = -FD_GRID__ORDER4__DXX__K0 * data_0 + FD_GRID__ORDER4__DXX__KPM1 * (data_m1 + data_p1) - FD_GRID__ORDER4__DXX__KPM2 * (data_m2 + data_p2); \
+	return jtutil::pow2(inv_delta_x_) * sum; /* end macro */
+#define FD_GRID__ORDER4__DXX__COEFF_M2 (-FD_GRID__ORDER4__DXX__KPM2)
+#define FD_GRID__ORDER4__DXX__COEFF_M1 (+FD_GRID__ORDER4__DXX__KPM1)
+#define FD_GRID__ORDER4__DXX__COEFF_0 (-FD_GRID__ORDER4__DXX__K0)
+#define FD_GRID__ORDER4__DXX__COEFF_P1 (+FD_GRID__ORDER4__DXX__KPM1)
+#define FD_GRID__ORDER4__DXX__COEFF_P2 (-FD_GRID__ORDER4__DXX__KPM2)
+
+	//******************************************************************************
+#define FD_GRID__MOL_RADIUS FD_GRID__ORDER4__MOL_RADIUS
+#define FD_GRID__MOL_DIAMETER FD_GRID__ORDER4__MOL_DIAMETER
+#define FD_GRID__DX FD_GRID__ORDER4__DX
+#define FD_GRID__DXX FD_GRID__ORDER4__DXX
+
+#define FD_GRID__MOL_AREA (FD_GRID__MOL_DIAMETER * FD_GRID__MOL_DIAMETER)
+
+	//******************************************************************************
+
+	//
+	// ***** fd_grid - grid with finite differencing operations *****
+	//
+	// An  fd_grid  is identical to a  grid  except that it also defines
+	// (rho,sigma)-coordinate finite differencing operations on gridfns.
+	//
+
+	class fd_grid
+		: public grid
+	{
+		//
+		// molecule sizes
+		//
+	public:
+		// n.b. this interface implicitly assumes that all molecules
+		//      are centered and are the same order and size
+		static int finite_diff_order() { return 4; }
+		static int molecule_radius() { return FD_GRID__MOL_RADIUS; }
+		static int molecule_diameter() { return FD_GRID__MOL_DIAMETER; }
+		static int molecule_min_m() { return -FD_GRID__MOL_RADIUS; }
+		static int molecule_max_m() { return FD_GRID__MOL_RADIUS; }
+
+		//
+		// helper functions to compute (irho,isigma) + [m]
+		// along each axis
+		//
+	private:
+		static int rho_axis__irho_plus_m(int irho, int m) { return irho + m; }
+		static int rho_axis__isigma_plus_m(int isigma, int m) { return isigma; }
+		static int sigma_axis__irho_plus_m(int irho, int m) { return irho; }
+		static int sigma_axis__isigma_plus_m(int isigma, int m) { return isigma + m; }
+
+		//
+		// ***** finite differencing *****
+		//
+	public:
+		// 1st derivatives
+		fp partial_rho(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DX(inverse_delta_rho(),
+						ghosted_gridfn,
+						rho_axis__irho_plus_m,
+						rho_axis__isigma_plus_m);
+		}
+		fp partial_sigma(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DX(inverse_delta_sigma(),
+						ghosted_gridfn,
+						sigma_axis__irho_plus_m,
+						sigma_axis__isigma_plus_m);
+		}
+
+		// "pure" 2nd derivatives
+		fp partial_rho_rho(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DXX(inverse_delta_rho(),
+						 ghosted_gridfn,
+						 rho_axis__irho_plus_m,
+						 rho_axis__isigma_plus_m);
+		}
+		fp partial_sigma_sigma(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DXX(inverse_delta_sigma(),
+						 ghosted_gridfn,
+						 sigma_axis__irho_plus_m,
+						 sigma_axis__isigma_plus_m);
+		}
+
+		// mixed 2nd partial derivative
+		fp partial_rho_sigma(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DX(inverse_delta_rho(),
+						partial_sigma,
+						rho_axis__irho_plus_m,
+						rho_axis__isigma_plus_m);
+		}
+
+		//
+		// ***** molecule coefficients *****
+		//
+	public:
+		// molecule coefficients
+		// n.b. this interface implicitly assumes that all molecules
+		//      are position-independent
+		fp partial_rho_coeff(int m) const
+		{
+			return inverse_delta_rho() * dx_coeff(m);
+		}
+		fp partial_sigma_coeff(int m) const
+		{
+			return inverse_delta_sigma() * dx_coeff(m);
+		}
+		fp partial_rho_rho_coeff(int m) const
+		{
+			return jtutil::pow2(inverse_delta_rho()) * dxx_coeff(m);
+		}
+		fp partial_sigma_sigma_coeff(int m) const
+		{
+			return jtutil::pow2(inverse_delta_sigma()) * dxx_coeff(m);
+		}
+		fp partial_rho_sigma_coeff(int m_rho, int m_sigma) const
+		{
+			return partial_rho_coeff(m_rho) * partial_sigma_coeff(m_sigma);
+		}
+
+		// worker functions: molecule coefficients for unit grid spacing
+	private:
+		static fp dx_coeff(int m);
+		static fp dxx_coeff(int m);
+
+		//
+		// ***** constructor, destructor *****
+		//
+	public:
+		// constructor: pass through to grid:: constructor
+		fd_grid(const grid_array_pars &grid_array_pars_in,
+				const grid_pars &grid_pars_in)
+			: grid(grid_array_pars_in, grid_pars_in)
+		{
+		}
+		// compiler-generated default destructor is ok
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		fd_grid(const fd_grid &rhs);
+		fd_grid &operator=(const fd_grid &rhs);
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* FD_GRID_H  */
--- a/AMSS_NCKU_source/AHF_Direct/find_horizons.C
+++ b/AMSS_NCKU_source/AHF_Direct/find_horizons.C
@@ -1,137 +1,137 @@
-
-
-#include "macrodef.h"
-#ifdef With_AHF
-
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-#include <mpi.h>
-
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "cpm_map.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "fd_grid.h"
-#include "patch.h"
-#include "patch_edge.h"
-#include "patch_interp.h"
-#include "ghost_zone.h"
-#include "patch_system.h"
-
-#include "Jacobian.h"
-
-#include "gfns.h"
-#include "gr.h"
-
-#include "horizon_sequence.h"
-#include "BH_diagnostics.h"
-#include "myglobal.h"
-
-namespace AHFinderDirect
-{
-	void recentering(patch_system &ps, double max_x, double max_y, double max_z,
-					 double min_x, double min_y, double min_z,
-					 double centroid_x, double centroid_y, double centroid_z);
-	extern struct state state;
-
-	void AHFinderDirect_find_horizons(int HN, int *dumpid,
-									  double *xc, double *yc, double *zc, double *xr, double *yr, double *zr,
-									  bool *trigger, double *dT)
-	{
-		const int my_proc = state.my_proc;
-		horizon_sequence &hs = *state.my_hs;
-		if (my_proc == 0 && hs.N_horizons() != HN)
-		{
-			cout << "input number " << HN << " != " << "number of wanted horizons " << hs.N_horizons() << endl;
-			MPI_Abort(MPI_COMM_WORLD, 1);
-		}
-
-		state.ADM->AH_Prepare_derivatives();
-
-		for (int hn = hs.init_hn(); hs.is_genuine(); hn = hs.next_hn())
-		{
-			int ihn = hs.get_hn();
-			assert(ihn > 0 && ihn <= HN);
-			ihn = ihn - 1;
-
-			struct AH_data &AH_data = *state.AH_data_array[hn];
-
-			AH_data.find_trigger = trigger[ihn];
-			if (AH_data.find_trigger)
-			{
-				if (AH_data.found_flag)
-					AH_data.initial_find_flag = false;
-				else if (AH_data.recentering_flag == false)
-				{
-					patch_system &ps = *AH_data.ps_ptr;
-					recentering(ps, xc[ihn] + xr[ihn] / 2, yc[ihn] + yr[ihn] / 2, zc[ihn] + zr[ihn] / 2,
-								xc[ihn] - xr[ihn] / 2, yc[ihn] - yr[ihn] / 2, zc[ihn] - zr[ihn] / 2,
-								xc[ihn], yc[ihn], zc[ihn]);
-					setup_initial_guess(ps, xc[ihn], yc[ihn], zc[ihn], xr[ihn], yr[ihn], zr[ihn]);
-					AH_data.initial_find_flag = true;
-				}
-				else
-					AH_data.stop_finding == true;
-			}
-
-		} // end for hn
-
-		Newton(state.N_procs, state.N_active_procs, my_proc,
-			   *state.my_hs, state.AH_data_array,
-			   state.isb, dumpid, dT);
-	}
-
-	void AHFinderDirect_enforcefind(int HN,
-									double *xc, double *yc, double *zc, double *xr, double *yr, double *zr)
-	{
-		const int my_proc = state.my_proc;
-		horizon_sequence &hs = *state.my_hs;
-		if (my_proc == 0 && hs.N_horizons() != HN)
-		{
-			cout << "input number " << HN << " != " << "number of wanted horizons " << hs.N_horizons() << endl;
-			MPI_Abort(MPI_COMM_WORLD, 1);
-		}
-		bool *trigger;
-		int *dumpid;
-		double *dTT;
-		trigger = new bool[HN];
-		dumpid = new int[HN];
-		dTT = new double[HN];
-		for (int ihn = 0; ihn < HN; ihn++)
-		{
-			trigger[ihn] = true;
-			dumpid[ihn] = 1;
-			dTT[ihn] = 1;
-		}
-
-		for (int hn = hs.init_hn(); hs.is_genuine(); hn = hs.next_hn())
-		{
-			int ihn = hs.get_hn();
-			assert(ihn > 0 && ihn <= HN);
-
-			struct AH_data &AH_data = *state.AH_data_array[hn];
-
-			AH_data.find_trigger = true;
-			AH_data.stop_finding = false;
-			AH_data.found_flag = false;
-			AH_data.recentering_flag = false;
-			AH_data.initial_find_flag = true;
-
-		} // end for hn
-
-		AHFinderDirect_find_horizons(HN, dumpid, xc, yc, zc, xr, yr, zr, trigger, dTT);
-
-		delete[] trigger;
-		delete[] dumpid;
-		delete[] dTT;
-	}
-} // namespace AHFinderDirect
-#endif
+
+
+#include "macrodef.h"
+#ifdef With_AHF
+
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <mpi.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "myglobal.h"
+
+namespace AHFinderDirect
+{
+	void recentering(patch_system &ps, double max_x, double max_y, double max_z,
+					 double min_x, double min_y, double min_z,
+					 double centroid_x, double centroid_y, double centroid_z);
+	extern struct state state;
+
+	void AHFinderDirect_find_horizons(int HN, int *dumpid,
+									  double *xc, double *yc, double *zc, double *xr, double *yr, double *zr,
+									  bool *trigger, double *dT)
+	{
+		const int my_proc = state.my_proc;
+		horizon_sequence &hs = *state.my_hs;
+		if (my_proc == 0 && hs.N_horizons() != HN)
+		{
+			cout << "input number " << HN << " != " << "number of wanted horizons " << hs.N_horizons() << endl;
+			MPI_Abort(MPI_COMM_WORLD, 1);
+		}
+
+		state.ADM->AH_Prepare_derivatives();
+
+		for (int hn = hs.init_hn(); hs.is_genuine(); hn = hs.next_hn())
+		{
+			int ihn = hs.get_hn();
+			assert(ihn > 0 && ihn <= HN);
+			ihn = ihn - 1;
+
+			struct AH_data &AH_data = *state.AH_data_array[hn];
+
+			AH_data.find_trigger = trigger[ihn];
+			if (AH_data.find_trigger)
+			{
+				if (AH_data.found_flag)
+					AH_data.initial_find_flag = false;
+				else if (AH_data.recentering_flag == false)
+				{
+					patch_system &ps = *AH_data.ps_ptr;
+					recentering(ps, xc[ihn] + xr[ihn] / 2, yc[ihn] + yr[ihn] / 2, zc[ihn] + zr[ihn] / 2,
+								xc[ihn] - xr[ihn] / 2, yc[ihn] - yr[ihn] / 2, zc[ihn] - zr[ihn] / 2,
+								xc[ihn], yc[ihn], zc[ihn]);
+					setup_initial_guess(ps, xc[ihn], yc[ihn], zc[ihn], xr[ihn], yr[ihn], zr[ihn]);
+					AH_data.initial_find_flag = true;
+				}
+				else
+					AH_data.stop_finding == true;
+			}
+
+		} // end for hn
+
+		Newton(state.N_procs, state.N_active_procs, my_proc,
+			   *state.my_hs, state.AH_data_array,
+			   state.isb, dumpid, dT);
+	}
+
+	void AHFinderDirect_enforcefind(int HN,
+									double *xc, double *yc, double *zc, double *xr, double *yr, double *zr)
+	{
+		const int my_proc = state.my_proc;
+		horizon_sequence &hs = *state.my_hs;
+		if (my_proc == 0 && hs.N_horizons() != HN)
+		{
+			cout << "input number " << HN << " != " << "number of wanted horizons " << hs.N_horizons() << endl;
+			MPI_Abort(MPI_COMM_WORLD, 1);
+		}
+		bool *trigger;
+		int *dumpid;
+		double *dTT;
+		trigger = new bool[HN];
+		dumpid = new int[HN];
+		dTT = new double[HN];
+		for (int ihn = 0; ihn < HN; ihn++)
+		{
+			trigger[ihn] = true;
+			dumpid[ihn] = 1;
+			dTT[ihn] = 1;
+		}
+
+		for (int hn = hs.init_hn(); hs.is_genuine(); hn = hs.next_hn())
+		{
+			int ihn = hs.get_hn();
+			assert(ihn > 0 && ihn <= HN);
+
+			struct AH_data &AH_data = *state.AH_data_array[hn];
+
+			AH_data.find_trigger = true;
+			AH_data.stop_finding = false;
+			AH_data.found_flag = false;
+			AH_data.recentering_flag = false;
+			AH_data.initial_find_flag = true;
+
+		} // end for hn
+
+		AHFinderDirect_find_horizons(HN, dumpid, xc, yc, zc, xr, yr, zr, trigger, dTT);
+
+		delete[] trigger;
+		delete[] dumpid;
+		delete[] dTT;
+	}
+} // namespace AHFinderDirect
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/fuzzy.C
+++ b/AMSS_NCKU_source/AHF_Direct/fuzzy.C
@@ -1,63 +1,63 @@
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "stdc.h"
-#include "util.h"
-
-namespace AHFinderDirect
-{
-  namespace jtutil
-  {
-    template <typename fp_t>
-    bool fuzzy<fp_t>::EQ(fp_t x, fp_t y)
-    {
-      fp_t max_abs = jtutil::tmax(jtutil::abs(x), jtutil::abs(y));
-      fp_t epsilon = jtutil::tmax(tolerance_, tolerance_ * max_abs);
-
-      return jtutil::abs(x - y) <= epsilon;
-    }
-
-    //******************************************************************************
-
-    template <typename fp_t>
-    bool fuzzy<fp_t>::is_integer(fp_t x)
-    {
-      int i = round<fp_t>::to_integer(x);
-      return EQ(x, fp_t(i));
-    }
-
-    //******************************************************************************
-
-    template <typename fp_t>
-    int fuzzy<fp_t>::floor(fp_t x)
-    {
-      return fuzzy<fp_t>::is_integer(x)
-                 ? round<fp_t>::to_integer(x)
-                 : round<fp_t>::floor(x);
-    }
-
-    //******************************************************************************
-
-    template <typename fp_t>
-    int fuzzy<fp_t>::ceiling(fp_t x)
-    {
-      return fuzzy<fp_t>::is_integer(x)
-                 ? round<fp_t>::to_integer(x)
-                 : round<fp_t>::ceiling(x);
-    }
-    template <>
-    float fuzzy<float>::tolerance_ = 1.0e-5; // about 100 * FLT_EPSILON
-
-    template <>
-    double fuzzy<double>::tolerance_ = 1.0e-12; // about 1e4 * DBL_EPSILON
-
-    // template instantiations
-    template class fuzzy<float>;
-    template class fuzzy<double>;
-
-    //******************************************************************************
-    //******************************************************************************
-    //******************************************************************************
-
-  } // namespace jtutil
-} // namespace AHFinderDirect
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "stdc.h"
+#include "util.h"
+
+namespace AHFinderDirect
+{
+  namespace jtutil
+  {
+    template <typename fp_t>
+    bool fuzzy<fp_t>::EQ(fp_t x, fp_t y)
+    {
+      fp_t max_abs = jtutil::tmax(jtutil::abs(x), jtutil::abs(y));
+      fp_t epsilon = jtutil::tmax(tolerance_, tolerance_ * max_abs);
+
+      return jtutil::abs(x - y) <= epsilon;
+    }
+
+    //******************************************************************************
+
+    template <typename fp_t>
+    bool fuzzy<fp_t>::is_integer(fp_t x)
+    {
+      int i = round<fp_t>::to_integer(x);
+      return EQ(x, fp_t(i));
+    }
+
+    //******************************************************************************
+
+    template <typename fp_t>
+    int fuzzy<fp_t>::floor(fp_t x)
+    {
+      return fuzzy<fp_t>::is_integer(x)
+                 ? round<fp_t>::to_integer(x)
+                 : round<fp_t>::floor(x);
+    }
+
+    //******************************************************************************
+
+    template <typename fp_t>
+    int fuzzy<fp_t>::ceiling(fp_t x)
+    {
+      return fuzzy<fp_t>::is_integer(x)
+                 ? round<fp_t>::to_integer(x)
+                 : round<fp_t>::ceiling(x);
+    }
+    template <>
+    float fuzzy<float>::tolerance_ = 1.0e-5; // about 100 * FLT_EPSILON
+
+    template <>
+    double fuzzy<double>::tolerance_ = 1.0e-12; // about 1e4 * DBL_EPSILON
+
+    // template instantiations
+    template class fuzzy<float>;
+    template class fuzzy<double>;
+
+    //******************************************************************************
+    //******************************************************************************
+    //******************************************************************************
+
+  } // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/gfns.h
+++ b/AMSS_NCKU_source/AHF_Direct/gfns.h
@@ -1,98 +1,98 @@
-#ifndef GFNS_H
-#define GFNS_H
-namespace AHFinderDirect
-{
-
-	namespace gfns
-	{
-
-		// ghosted gridfns
-		enum
-		{
-			ghosted_min_gfn = -1, // must set this by hand so
-								  // ghosted_max_gfn is still < 0
-			gfn__h = ghosted_min_gfn,
-			ghosted_max_gfn = gfn__h
-		};
-
-		// nominal gridfns
-		enum
-		{
-			nominal_min_gfn = 1,
-
-			//
-			// for a skeletal patch system we don't need any nominal gridfns
-			//
-			skeletal_nominal_max_gfn = nominal_min_gfn - 1,
-
-			//
-			// most of these gridfns have access macros in "cg.hh";
-			// the ones that don't are marked explicitly
-			//
-			gfn__global_x = nominal_min_gfn, // no access macro
-			gfn__global_y,					 // no access macro
-			gfn__global_z,					 // no access macro
-
-			gfn__global_xx, // no access macro
-			gfn__global_xy, // no access macro
-			gfn__global_xz, // no access macro
-			gfn__global_yy, // no access macro
-			gfn__global_yz, // no access macro
-			gfn__global_zz, // no access macro
-
-			gfn__g_dd_11,
-			gfn__g_dd_12,
-			gfn__g_dd_13,
-			gfn__g_dd_22,
-			gfn__g_dd_23,
-			gfn__g_dd_33,
-			gfn__partial_d_g_dd_111,
-			gfn__partial_d_g_dd_112,
-			gfn__partial_d_g_dd_113,
-			gfn__partial_d_g_dd_122,
-			gfn__partial_d_g_dd_123,
-			gfn__partial_d_g_dd_133,
-			gfn__partial_d_g_dd_211,
-			gfn__partial_d_g_dd_212,
-			gfn__partial_d_g_dd_213,
-			gfn__partial_d_g_dd_222,
-			gfn__partial_d_g_dd_223,
-			gfn__partial_d_g_dd_233,
-			gfn__partial_d_g_dd_311,
-			gfn__partial_d_g_dd_312,
-			gfn__partial_d_g_dd_313,
-			gfn__partial_d_g_dd_322,
-			gfn__partial_d_g_dd_323,
-			gfn__partial_d_g_dd_333,
-			gfn__K_dd_11,
-			gfn__K_dd_12,
-			gfn__K_dd_13,
-			gfn__K_dd_22,
-			gfn__K_dd_23,
-			gfn__K_dd_33,
-			gfn__trK,
-
-			gfn__psi,			  // no access macro
-			gfn__partial_d_psi_1, // no access macro
-			gfn__partial_d_psi_2, // no access macro
-			gfn__partial_d_psi_3, // no access macro
-
-			gfn__Theta,
-			gfn__partial_Theta_wrt_partial_d_h_1,
-			gfn__partial_Theta_wrt_partial_d_h_2,
-			gfn__partial_Theta_wrt_partial_dd_h_11,
-			gfn__partial_Theta_wrt_partial_dd_h_12,
-			gfn__partial_Theta_wrt_partial_dd_h_22,
-			gfn__Delta_h,
-			gfn__save_Theta,
-			gfn__oldh, // used for dh/dt
-			gfn__one,
-			nominal_max_gfn = gfn__one // no comma
-		};
-
-	} // namespace gfns::
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /* GFNS_H  */
+#ifndef GFNS_H
+#define GFNS_H
+namespace AHFinderDirect
+{
+
+	namespace gfns
+	{
+
+		// ghosted gridfns
+		enum
+		{
+			ghosted_min_gfn = -1, // must set this by hand so
+								  // ghosted_max_gfn is still < 0
+			gfn__h = ghosted_min_gfn,
+			ghosted_max_gfn = gfn__h
+		};
+
+		// nominal gridfns
+		enum
+		{
+			nominal_min_gfn = 1,
+
+			//
+			// for a skeletal patch system we don't need any nominal gridfns
+			//
+			skeletal_nominal_max_gfn = nominal_min_gfn - 1,
+
+			//
+			// most of these gridfns have access macros in "cg.hh";
+			// the ones that don't are marked explicitly
+			//
+			gfn__global_x = nominal_min_gfn, // no access macro
+			gfn__global_y,					 // no access macro
+			gfn__global_z,					 // no access macro
+
+			gfn__global_xx, // no access macro
+			gfn__global_xy, // no access macro
+			gfn__global_xz, // no access macro
+			gfn__global_yy, // no access macro
+			gfn__global_yz, // no access macro
+			gfn__global_zz, // no access macro
+
+			gfn__g_dd_11,
+			gfn__g_dd_12,
+			gfn__g_dd_13,
+			gfn__g_dd_22,
+			gfn__g_dd_23,
+			gfn__g_dd_33,
+			gfn__partial_d_g_dd_111,
+			gfn__partial_d_g_dd_112,
+			gfn__partial_d_g_dd_113,
+			gfn__partial_d_g_dd_122,
+			gfn__partial_d_g_dd_123,
+			gfn__partial_d_g_dd_133,
+			gfn__partial_d_g_dd_211,
+			gfn__partial_d_g_dd_212,
+			gfn__partial_d_g_dd_213,
+			gfn__partial_d_g_dd_222,
+			gfn__partial_d_g_dd_223,
+			gfn__partial_d_g_dd_233,
+			gfn__partial_d_g_dd_311,
+			gfn__partial_d_g_dd_312,
+			gfn__partial_d_g_dd_313,
+			gfn__partial_d_g_dd_322,
+			gfn__partial_d_g_dd_323,
+			gfn__partial_d_g_dd_333,
+			gfn__K_dd_11,
+			gfn__K_dd_12,
+			gfn__K_dd_13,
+			gfn__K_dd_22,
+			gfn__K_dd_23,
+			gfn__K_dd_33,
+			gfn__trK,
+
+			gfn__psi,			  // no access macro
+			gfn__partial_d_psi_1, // no access macro
+			gfn__partial_d_psi_2, // no access macro
+			gfn__partial_d_psi_3, // no access macro
+
+			gfn__Theta,
+			gfn__partial_Theta_wrt_partial_d_h_1,
+			gfn__partial_Theta_wrt_partial_d_h_2,
+			gfn__partial_Theta_wrt_partial_dd_h_11,
+			gfn__partial_Theta_wrt_partial_dd_h_12,
+			gfn__partial_Theta_wrt_partial_dd_h_22,
+			gfn__Delta_h,
+			gfn__save_Theta,
+			gfn__oldh, // used for dh/dt
+			gfn__one,
+			nominal_max_gfn = gfn__one // no comma
+		};
+
+	} // namespace gfns::
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* GFNS_H  */
--- a/AMSS_NCKU_source/AHF_Direct/ghost_zone.C
+++ b/AMSS_NCKU_source/AHF_Direct/ghost_zone.C
--- a/AMSS_NCKU_source/AHF_Direct/ghost_zone.h
+++ b/AMSS_NCKU_source/AHF_Direct/ghost_zone.h
--- a/AMSS_NCKU_source/AHF_Direct/gr.h
+++ b/AMSS_NCKU_source/AHF_Direct/gr.h
@@ -1,40 +1,40 @@
-#ifndef GR_H
-#define GR_H
-namespace AHFinderDirect
-{
-
-	enum expansion_status
-	{
-		expansion_success,
-
-		expansion_failure__surface_nonfinite,
-
-		expansion_failure__surface_too_large,
-
-		expansion_failure__surface_outside_grid,
-
-		expansion_failure__surface_in_excised_region,
-
-		expansion_failure__geometry_nonfinite,
-
-		expansion_failure__gij_not_positive_definite // no comma
-	};
-
-	// expansion.cc
-	enum expansion_status
-	expansion(patch_system *ps_ptr, fp add_to_expansion,
-			  bool initial_flag,
-			  bool Jacobian_flag = false,
-			  jtutil::norm<fp> *H_norms_ptr = NULL);
-
-	// expansion_Jacobian.cc
-	enum expansion_status
-	expansion_Jacobian(patch_system *ps_ptr, Jacobian *Jac_ptr,
-					   fp add_to_expansion,
-					   bool initial_flag,
-					   bool print_msg_flag = false);
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /* GR_H  */
+#ifndef GR_H
+#define GR_H
+namespace AHFinderDirect
+{
+
+	enum expansion_status
+	{
+		expansion_success,
+
+		expansion_failure__surface_nonfinite,
+
+		expansion_failure__surface_too_large,
+
+		expansion_failure__surface_outside_grid,
+
+		expansion_failure__surface_in_excised_region,
+
+		expansion_failure__geometry_nonfinite,
+
+		expansion_failure__gij_not_positive_definite // no comma
+	};
+
+	// expansion.cc
+	enum expansion_status
+	expansion(patch_system *ps_ptr, fp add_to_expansion,
+			  bool initial_flag,
+			  bool Jacobian_flag = false,
+			  jtutil::norm<fp> *H_norms_ptr = NULL);
+
+	// expansion_Jacobian.cc
+	enum expansion_status
+	expansion_Jacobian(patch_system *ps_ptr, Jacobian *Jac_ptr,
+					   fp add_to_expansion,
+					   bool initial_flag,
+					   bool print_msg_flag = false);
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* GR_H  */
--- a/AMSS_NCKU_source/AHF_Direct/horizon_sequence.C
+++ b/AMSS_NCKU_source/AHF_Direct/horizon_sequence.C
@@ -1,76 +1,76 @@
-#include <stdio.h>
-#include <assert.h>
-
-#include "stdc.h"
-#include "util.h"
-
-#include "horizon_sequence.h"
-
-namespace AHFinderDirect
-{
-
-	horizon_sequence::horizon_sequence(int N_horizons_in)
-		: N_horizons_(N_horizons_in),
-		  my_N_horizons_(0), // sequence starts out empty
-		  posn_(-1),
-		  my_hn_(new int[N_horizons_in])
-	{
-	}
-
-	horizon_sequence::~horizon_sequence()
-	{
-		delete[] my_hn_;
-	}
-	//
-	// This function appends  hn  to the sequence.  It returns the new value
-	// of my_N_horizons().
-	//
-	int horizon_sequence::append_hn(int hn)
-	{
-		assert(hn > 0);						  // can only append genuine horizons
-		assert(my_N_horizons_ < N_horizons_); // make sure there's space for it
-		my_hn_[my_N_horizons_++] = hn;
-		posn_ = 0;
-		return my_N_horizons_;
-	}
-
-	//******************************************************************************
-
-	//
-	// This function computes the internal position immediately following
-	// a given internal position in the sequence.
-	//
-	// Arguments:
-	// p = (in) The current internal position, with posn_ semantics
-	//
-	// Results:
-	// This function returns the next internal position after p.
-	//
-	int horizon_sequence::next_posn(int pos)
-		const
-	{
-		return (pos < 0)					? pos - 1
-			   : (pos + 1 < my_N_horizons_) ? pos + 1
-											: -1;
-	}
-
-	//******************************************************************************
-
-	//
-	// This function determines whether or not a given  hn  is genuine.
-	//
-	bool horizon_sequence::is_hn_genuine(int hn)
-		const
-	{
-		for (int pos = 0; pos < my_N_horizons_; ++pos)
-		{
-			if (my_hn_[pos] == hn)
-				then return true;
-		}
-
-		return false;
-	}
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
+#include <stdio.h>
+#include <assert.h>
+
+#include "stdc.h"
+#include "util.h"
+
+#include "horizon_sequence.h"
+
+namespace AHFinderDirect
+{
+
+	horizon_sequence::horizon_sequence(int N_horizons_in)
+		: N_horizons_(N_horizons_in),
+		  my_N_horizons_(0), // sequence starts out empty
+		  posn_(-1),
+		  my_hn_(new int[N_horizons_in])
+	{
+	}
+
+	horizon_sequence::~horizon_sequence()
+	{
+		delete[] my_hn_;
+	}
+	//
+	// This function appends  hn  to the sequence.  It returns the new value
+	// of my_N_horizons().
+	//
+	int horizon_sequence::append_hn(int hn)
+	{
+		assert(hn > 0);						  // can only append genuine horizons
+		assert(my_N_horizons_ < N_horizons_); // make sure there's space for it
+		my_hn_[my_N_horizons_++] = hn;
+		posn_ = 0;
+		return my_N_horizons_;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes the internal position immediately following
+	// a given internal position in the sequence.
+	//
+	// Arguments:
+	// p = (in) The current internal position, with posn_ semantics
+	//
+	// Results:
+	// This function returns the next internal position after p.
+	//
+	int horizon_sequence::next_posn(int pos)
+		const
+	{
+		return (pos < 0)					? pos - 1
+			   : (pos + 1 < my_N_horizons_) ? pos + 1
+											: -1;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function determines whether or not a given  hn  is genuine.
+	//
+	bool horizon_sequence::is_hn_genuine(int hn)
+		const
+	{
+		for (int pos = 0; pos < my_N_horizons_; ++pos)
+		{
+			if (my_hn_[pos] == hn)
+				then return true;
+		}
+
+		return false;
+	}
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/horizon_sequence.h
+++ b/AMSS_NCKU_source/AHF_Direct/horizon_sequence.h
@@ -1,72 +1,72 @@
-#ifndef HORIZON_SEQUENCE_H
-#define HORIZON_SEQUENCE_H
-namespace AHFinderDirect
-{
-	class horizon_sequence
-	{
-	public:
-		int N_horizons() const { return N_horizons_; }
-
-		int my_N_horizons() const { return my_N_horizons_; }
-
-		bool has_genuine_horizons() const { return my_N_horizons_ > 0; }
-
-		bool is_dummy() const { return posn_is_dummy(posn_); }
-		bool is_genuine() const { return posn_is_genuine(posn_); }
-
-		bool is_next_genuine() const
-		{
-			return posn_is_genuine(next_posn(posn_));
-		}
-
-		int dummy_number() const { return is_genuine() ? 0 : -posn_; }
-
-		int get_hn() const
-		{
-			return posn_is_genuine(posn_) ? my_hn_[posn_] : 0;
-		}
-
-		bool is_hn_genuine(int hn) const;
-
-		int init_hn()
-		{
-			posn_ = (my_N_horizons_ == 0) ? -1 : 0;
-			return get_hn();
-		}
-
-		int next_hn()
-		{
-			posn_ = next_posn(posn_);
-			return get_hn();
-		}
-
-		horizon_sequence(int N_horizons);
-		~horizon_sequence();
-
-		int append_hn(int hn);
-
-	private:
-		bool posn_is_genuine(int pos) const
-		{
-			return (pos >= 0) && (pos < my_N_horizons_);
-		}
-		bool posn_is_dummy(int pos) const
-		{
-			return !posn_is_genuine(pos);
-		}
-
-		int next_posn(int pos) const;
-
-	private:
-		const int N_horizons_;
-		int my_N_horizons_;
-
-		int posn_;
-
-		int *my_hn_;
-	};
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /* HORIZON_SEQUENCE_H */
+#ifndef HORIZON_SEQUENCE_H
+#define HORIZON_SEQUENCE_H
+namespace AHFinderDirect
+{
+	class horizon_sequence
+	{
+	public:
+		int N_horizons() const { return N_horizons_; }
+
+		int my_N_horizons() const { return my_N_horizons_; }
+
+		bool has_genuine_horizons() const { return my_N_horizons_ > 0; }
+
+		bool is_dummy() const { return posn_is_dummy(posn_); }
+		bool is_genuine() const { return posn_is_genuine(posn_); }
+
+		bool is_next_genuine() const
+		{
+			return posn_is_genuine(next_posn(posn_));
+		}
+
+		int dummy_number() const { return is_genuine() ? 0 : -posn_; }
+
+		int get_hn() const
+		{
+			return posn_is_genuine(posn_) ? my_hn_[posn_] : 0;
+		}
+
+		bool is_hn_genuine(int hn) const;
+
+		int init_hn()
+		{
+			posn_ = (my_N_horizons_ == 0) ? -1 : 0;
+			return get_hn();
+		}
+
+		int next_hn()
+		{
+			posn_ = next_posn(posn_);
+			return get_hn();
+		}
+
+		horizon_sequence(int N_horizons);
+		~horizon_sequence();
+
+		int append_hn(int hn);
+
+	private:
+		bool posn_is_genuine(int pos) const
+		{
+			return (pos >= 0) && (pos < my_N_horizons_);
+		}
+		bool posn_is_dummy(int pos) const
+		{
+			return !posn_is_genuine(pos);
+		}
+
+		int next_posn(int pos) const;
+
+	private:
+		const int N_horizons_;
+		int my_N_horizons_;
+
+		int posn_;
+
+		int *my_hn_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* HORIZON_SEQUENCE_H */
--- a/AMSS_NCKU_source/AHF_Direct/ilucg.f90
+++ b/AMSS_NCKU_source/AHF_Direct/ilucg.f90
--- a/AMSS_NCKU_source/AHF_Direct/ilucg.h
+++ b/AMSS_NCKU_source/AHF_Direct/ilucg.h
@@ -1,24 +1,24 @@
-
-#ifndef ILUCG_H
-#define ILUCG_H
-
-#ifdef fortran1
-#define f_ilucg ilucg
-#endif
-#ifdef fortran2
-#define f_ilucg ILUCG
-#endif
-#ifdef fortran3
-#define f_ilucg ilucg_
-#endif
-
-extern "C"
-{
-	void f_ilucg(const int &N,
-				 const int *IA, const int *JA, const double *A,
-				 const double *B, double *X,
-				 int *ITEMP, double *RTEMP,
-				 const double &EPS, const int &ITER, int &ISTATUS);
-}
-
-#endif /* ILUCG_H */
+
+#ifndef ILUCG_H
+#define ILUCG_H
+
+#ifdef fortran1
+#define f_ilucg ilucg
+#endif
+#ifdef fortran2
+#define f_ilucg ILUCG
+#endif
+#ifdef fortran3
+#define f_ilucg ilucg_
+#endif
+
+extern "C"
+{
+	void f_ilucg(const int &N,
+				 const int *IA, const int *JA, const double *A,
+				 const double *B, double *X,
+				 int *ITEMP, double *RTEMP,
+				 const double &EPS, const int &ITER, int &ISTATUS);
+}
+
+#endif /* ILUCG_H */
--- a/AMSS_NCKU_source/AHF_Direct/initial_guess.C
+++ b/AMSS_NCKU_source/AHF_Direct/initial_guess.C
@@ -1,132 +1,132 @@
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-#include "util_Table.h"
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "cpm_map.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "fd_grid.h"
-#include "patch.h"
-#include "patch_edge.h"
-#include "patch_interp.h"
-#include "ghost_zone.h"
-#include "patch_system.h"
-
-#include "Jacobian.h"
-
-#include "gfns.h"
-#include "gr.h"
-
-#include "horizon_sequence.h"
-#include "BH_diagnostics.h"
-#include "myglobal.h"
-
-namespace AHFinderDirect
-{
-	extern struct state state;
-	//******************************************************************************
-
-	// ellipsoid has global-coordinates center (A,B,C), radius (a,b,c)
-	// angular coordinate system has center (U,V,W)
-	//
-	// direction cosines wrt angular coordinate center are (xcos,ycos,zcos)
-	// i.e. a point has coordinates (U+xcos*r, V+ycos*r, W+zcos*r)
-	//
-	// then the equation of the ellipsoid is
-	//	(U+xcos*r - A)^2     (V+ycos*r - B)^2     (W+zcos*r - C)^2
-	//	-----------------  +  ----------------  +  -----------------  =  1
-	//	        a^2                  b^2                   c^2
-	//
-	// to solve this, we introduce intermediate variables
-	//	AU = A - U
-	//	BV = B - V
-	//	CW = C - W
-	//
-	void setup_initial_guess(patch_system &ps,
-							 fp x_center, fp y_center, fp z_center,
-							 fp x_radius, fp y_radius, fp z_radius)
-	{
-		for (int pn = 0; pn < ps.N_patches(); ++pn)
-		{
-			patch &p = ps.ith_patch(pn);
-
-			for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
-			{
-				for (int isigma = p.min_isigma();
-					 isigma <= p.max_isigma();
-					 ++isigma)
-				{
-					const fp rho = p.rho_of_irho(irho);
-					const fp sigma = p.sigma_of_isigma(isigma);
-					fp xcos, ycos, zcos;
-					p.xyzcos_of_rho_sigma(rho, sigma, xcos, ycos, zcos);
-
-					// set up variables used by Maple-generated code
-					const fp AU = x_center - ps.origin_x();
-					const fp BV = y_center - ps.origin_y();
-					const fp CW = z_center - ps.origin_z();
-					const fp a = x_radius;
-					const fp b = y_radius;
-					const fp c = z_radius;
-
-					// compute the solutions r_plus and r_minus
-					fp r_plus, r_minus;
-					{
-						fp t1, t2, t3, t5, t6, t7, t9, t10, t12, t28;
-						fp t30, t33, t35, t36, t40, t42, t43, t48, t49, t52;
-						fp t55;
-						t1 = a * a;
-						t2 = b * b;
-						t3 = t1 * t2;
-						t5 = t3 * zcos * CW;
-						t6 = c * c;
-						t7 = t1 * t6;
-						t9 = t7 * ycos * BV;
-						t10 = t2 * t6;
-						t12 = t10 * xcos * AU;
-						t28 = xcos * xcos;
-						t30 = CW * CW;
-						t33 = BV * BV;
-						t35 = t10 * t28;
-						t36 = ycos * ycos;
-						t40 = AU * AU;
-						t42 = t7 * t36;
-						t43 = zcos * zcos;
-						t48 = t3 * t43;
-						t49 = -2.0 * t1 * zcos * CW * ycos * BV - 2.0 * t2 * zcos * CW * xcos * AU - 2.0 * t6 * ycos * BV * xcos * AU + t2 * t28 * t30 + t6 * t28 * t33 - t35 + t1 * t36 * t30 + t6 * t36 * t40 - t42 + t1 * t43 * t33 + t2 * t43 * t40 -
-							  t48;
-						t52 = sqrt(-t3 * t6 * t49);
-						t55 = 1 / (t35 + t42 + t48);
-						r_plus = (t5 + t9 + t12 + t52) * t55;
-						r_minus = (t5 + t9 + t12 - t52) * t55;
-					}
-
-					// exactly one of the solutions (call it r) should be positive
-					fp r;
-					if ((r_plus > 0.0) && (r_minus < 0.0))
-						then r = r_plus;
-					else if ((r_plus < 0.0) && (r_minus > 0.0))
-						then r = r_minus;
-					else if (state.my_proc == 0)
-						printf("\nsetup_coord_ellipsoid():\nexpected exactly one r>0 solution to quadratic, got 0 or 2!\n%s patch (irho,isigma)=(%d,%d) ==> (rho,sigma)=(%g,%g)\ndirection cosines (xcos,ycos,zcos)=(%g,%g,%g)\nr_plus=%g r_minus=%g\n==> this probably means the initial guess surface doesn't contain\nthe local origin point, or more generally that the initial\nguess surface isn't a Strahlkoerper (\"star-shaped region\")\nwith respect to the local origin point\n", p.name(), irho, isigma, double(rho), double(sigma), double(xcos), double(ycos), double(zcos), double(r_plus), double(r_minus));
-
-					// r = horizon radius at this grid point
-					p.ghosted_gridfn(gfns::gfn__h, irho, isigma) = r;
-				}
-			}
-		}
-	}
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "myglobal.h"
+
+namespace AHFinderDirect
+{
+	extern struct state state;
+	//******************************************************************************
+
+	// ellipsoid has global-coordinates center (A,B,C), radius (a,b,c)
+	// angular coordinate system has center (U,V,W)
+	//
+	// direction cosines wrt angular coordinate center are (xcos,ycos,zcos)
+	// i.e. a point has coordinates (U+xcos*r, V+ycos*r, W+zcos*r)
+	//
+	// then the equation of the ellipsoid is
+	//	(U+xcos*r - A)^2     (V+ycos*r - B)^2     (W+zcos*r - C)^2
+	//	-----------------  +  ----------------  +  -----------------  =  1
+	//	        a^2                  b^2                   c^2
+	//
+	// to solve this, we introduce intermediate variables
+	//	AU = A - U
+	//	BV = B - V
+	//	CW = C - W
+	//
+	void setup_initial_guess(patch_system &ps,
+							 fp x_center, fp y_center, fp z_center,
+							 fp x_radius, fp y_radius, fp z_radius)
+	{
+		for (int pn = 0; pn < ps.N_patches(); ++pn)
+		{
+			patch &p = ps.ith_patch(pn);
+
+			for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+			{
+				for (int isigma = p.min_isigma();
+					 isigma <= p.max_isigma();
+					 ++isigma)
+				{
+					const fp rho = p.rho_of_irho(irho);
+					const fp sigma = p.sigma_of_isigma(isigma);
+					fp xcos, ycos, zcos;
+					p.xyzcos_of_rho_sigma(rho, sigma, xcos, ycos, zcos);
+
+					// set up variables used by Maple-generated code
+					const fp AU = x_center - ps.origin_x();
+					const fp BV = y_center - ps.origin_y();
+					const fp CW = z_center - ps.origin_z();
+					const fp a = x_radius;
+					const fp b = y_radius;
+					const fp c = z_radius;
+
+					// compute the solutions r_plus and r_minus
+					fp r_plus, r_minus;
+					{
+						fp t1, t2, t3, t5, t6, t7, t9, t10, t12, t28;
+						fp t30, t33, t35, t36, t40, t42, t43, t48, t49, t52;
+						fp t55;
+						t1 = a * a;
+						t2 = b * b;
+						t3 = t1 * t2;
+						t5 = t3 * zcos * CW;
+						t6 = c * c;
+						t7 = t1 * t6;
+						t9 = t7 * ycos * BV;
+						t10 = t2 * t6;
+						t12 = t10 * xcos * AU;
+						t28 = xcos * xcos;
+						t30 = CW * CW;
+						t33 = BV * BV;
+						t35 = t10 * t28;
+						t36 = ycos * ycos;
+						t40 = AU * AU;
+						t42 = t7 * t36;
+						t43 = zcos * zcos;
+						t48 = t3 * t43;
+						t49 = -2.0 * t1 * zcos * CW * ycos * BV - 2.0 * t2 * zcos * CW * xcos * AU - 2.0 * t6 * ycos * BV * xcos * AU + t2 * t28 * t30 + t6 * t28 * t33 - t35 + t1 * t36 * t30 + t6 * t36 * t40 - t42 + t1 * t43 * t33 + t2 * t43 * t40 -
+							  t48;
+						t52 = sqrt(-t3 * t6 * t49);
+						t55 = 1 / (t35 + t42 + t48);
+						r_plus = (t5 + t9 + t12 + t52) * t55;
+						r_minus = (t5 + t9 + t12 - t52) * t55;
+					}
+
+					// exactly one of the solutions (call it r) should be positive
+					fp r;
+					if ((r_plus > 0.0) && (r_minus < 0.0))
+						then r = r_plus;
+					else if ((r_plus < 0.0) && (r_minus > 0.0))
+						then r = r_minus;
+					else if (state.my_proc == 0)
+						printf("\nsetup_coord_ellipsoid():\nexpected exactly one r>0 solution to quadratic, got 0 or 2!\n%s patch (irho,isigma)=(%d,%d) ==> (rho,sigma)=(%g,%g)\ndirection cosines (xcos,ycos,zcos)=(%g,%g,%g)\nr_plus=%g r_minus=%g\n==> this probably means the initial guess surface doesn't contain\nthe local origin point, or more generally that the initial\nguess surface isn't a Strahlkoerper (\"star-shaped region\")\nwith respect to the local origin point\n", p.name(), irho, isigma, double(rho), double(sigma), double(xcos), double(ycos), double(zcos), double(r_plus), double(r_minus));
+
+					// r = horizon radius at this grid point
+					p.ghosted_gridfn(gfns::gfn__h, irho, isigma) = r;
+				}
+			}
+		}
+	}
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/linear_map.C
+++ b/AMSS_NCKU_source/AHF_Direct/linear_map.C
@@ -1,244 +1,244 @@
-#include <assert.h>
-#include <stdio.h>
-
-#include "stdc.h"
-#include "util.h"
-#include "linear_map.h"
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-		template <typename fp_t>
-		linear_map<fp_t>::linear_map(int min_int_in, int max_int_in,
-									 fp_t min_fp_in, fp_t delta_fp_in, fp_t max_fp_in)
-			: delta_(delta_fp_in), inverse_delta_(1.0 / delta_fp_in),
-			  min_int_(min_int_in), max_int_(max_int_in)
-		{
-			constructor_common(min_fp_in, max_fp_in);
-		}
-
-		template <typename fp_t>
-		linear_map<fp_t>::linear_map(const linear_map<fp_t> &lm_in,
-									 int min_int_in, int max_int_in) // subrange
-			: delta_(lm_in.delta_fp()), inverse_delta_(lm_in.inverse_delta_fp()),
-			  min_int_(min_int_in), max_int_(max_int_in)
-		{
-			if (!(is_in_range(min_int_in) && is_in_range(max_int_in)))
-				then error_exit(ERROR_EXIT,
-								"***** linear_map<fp_t>::linear_map:\n"
-								"        min_int_in=%d and/or max_int_in=%d\n"
-								"        aren't in integer range [%d,%d] of existing linear_map!\n",
-								min_int_, max_int_,
-								lm_in.min_int(), lm_in.max_int()); /*NOTREACHED*/
-
-			constructor_common(lm_in.fp_of_int_unchecked(min_int_in),
-							   lm_in.fp_of_int_unchecked(max_int_in));
-		}
-
-		//******************************************************************************
-
-		//
-		// This function does the common argument validation and setup for
-		// all the constructors of class  linear_map<fp_t>:: .
-		//
-		template <typename fp_t>
-		void linear_map<fp_t>::constructor_common(fp_t min_fp_in, fp_t max_fp_in)
-		// assumes
-		//	min_int_, max_int_, delta_, inverse_delta_
-		// are already initialized
-		// ==> ok to use min_int(), max_int(), delta_fp(), inverse_delta_fp()
-		// ... other class members *not* yet initialized
-		{
-			origin_ = 0.0; // temp value
-			origin_ = min_fp_in - fp_of_int_unchecked(min_int());
-
-			// this should be guaranteed by the above calculation
-			assert(fuzzy<fp_t>::EQ(fp_of_int_unchecked(min_int()), min_fp_in));
-
-			// this is a test of the consistency of the input arguments
-			if (fuzzy<fp_t>::NE(fp_of_int_unchecked(max_int()), max_fp_in))
-				then error_exit(ERROR_EXIT,
-								"***** linear_map<fp_t>::linear_map:\n"
-								"        int range [%d,%d]\n"
-								"        and fp range [%g(%g)%g]\n"
-								"        are (fuzzily) inconsistent!\n",
-								min_int(), max_int(),
-								double(min_fp_in), double(delta_fp()), double(max_fp_in));
-			/*NOTREACHED*/
-		}
-
-		//******************************************************************************
-
-		//
-		// This function converts  fp  --> int  coordinate, returning the result
-		// as an fp (which need not be fuzzily integral).
-		//
-		template <typename fp_t>
-		fp_t linear_map<fp_t>::fp_int_of_fp(fp_t x)
-			const
-		{
-			if (!is_in_range(x))
-				then error_exit(ERROR_EXIT,
-								"***** linear_map<fp_t>::fp_int_of_fp:\n"
-								"        fp value x=%g is (fuzzily) outside the grid!\n"
-								"        {min(delta)max}_fp = %g(%g)%g\n",
-								double(x),
-								double(min_fp()), double(delta_fp()), double(max_fp()));
-			/*NOTREACHED*/
-
-			return inverse_delta_ * (x - origin_);
-		}
-
-		//******************************************************************************
-
-		//
-		// This function converts  fp  --> int  and checks that the result is
-		// fuzzily integral.  (The  nia  argument specifies what to do if the
-		// result *isn't* fuzzily integral.)
-		//
-		// FIXME:
-		// Having to explicitly specify the namespace for jtutil::round<fp_t>::
-		// is ++ugly. :(
-		//
-		template <typename fp_t>
-		int linear_map<fp_t>::int_of_fp(fp_t x, noninteger_action nia /* = nia_error */)
-			const
-		{
-			const fp_t fp_int = fp_int_of_fp(x);
-
-			if (fuzzy<fp_t>::is_integer(fp_int))
-				then
-				{
-					// x is (fuzzily) a grid point ==> return that
-					return jtutil::round<fp_t>::to_integer(fp_int); // *** EARLY RETURN ***
-				}
-
-			// get to here ==> x isn't (fuzzily) a grid point
-			static const char *const noninteger_msg =
-				"%s linear_map<fp_t>::int_of_fp:\n"
-				"        x=%g isn't (fuzzily) a grid point!\n"
-				"        {min(delta)max}_fp() = %g(%g)%g\n";
-			switch (nia)
-			{
-			case nia_error:
-				error_exit(ERROR_EXIT,
-						   noninteger_msg,
-						   "*****",
-						   double(x),
-						   double(min_fp()), double(delta_fp()), double(max_fp()));
-				/*NOTREACHED*/
-
-			case nia_warning:
-				printf(noninteger_msg,
-					   "---",
-					   double(x),
-					   double(min_fp()), double(delta_fp()), double(max_fp()));
-				// fall through
-
-			case nia_round:
-				return jtutil::round<fp_t>::to_integer(fp_int); // *** EARLY RETURN ***
-
-			case nia_floor:
-				return jtutil::round<fp_t>::floor(fp_int); // *** EARLY RETURN ***
-
-			case nia_ceiling:
-				return jtutil::round<fp_t>::ceiling(fp_int); // *** EARLY RETURN ***
-
-			default:
-				error_exit(PANIC_EXIT,
-						   "***** linear_map<fp_t>::int_of_fp: illegal nia=(int)%d\n"
-						   "                                   (this should never happen!)\n",
-						   int(nia)); /*NOTREACHED*/
-			}
-			return 0; // dummy return to quiet gcc
-					  // (which doesn't grok that error_exit() never returns)
-		}
-
-		//******************************************************************************
-
-		//
-		// This function converts "delta" spacings in the fp coordinate to
-		// corresponding "delta" spacings in the int coordinate, and checks that
-		// the result is fuzzily integral.  (The  nia  argument specifies what to
-		// do if the result *isn't* fuzzily integral.)
-		//
-		// FIXME:
-		// Having to explicitly specify the namespace for jtutil::round<fp_t>::
-		// is ++ugly. :(
-		//
-		template <typename fp_t>
-		int linear_map<fp_t>::delta_int_of_delta_fp(fp_t delta_x, noninteger_action nia /* = nia_error */)
-			const
-		{
-			const fp_t fp_delta_int = inverse_delta_ * delta_x;
-
-			if (fuzzy<fp_t>::is_integer(fp_delta_int))
-				then
-				{
-					// delta_x is (fuzzily) an integer number of grid spacings
-					// ==> return that
-					return jtutil::round<fp_t>::to_integer(fp_delta_int);
-					// *** EARLY RETURN ***
-				}
-
-			// get to here ==> delta_x isn't (fuzzily) an integer number of grid spacings
-			static const char *const noninteger_msg =
-				"%s linear_map<fp_t>::delta_int_of_delta_fp:\n"
-				"        delta_x=%g isn't (fuzzily) an integer number of grid spacings!\n"
-				"        {min(delta)max}_fp() = %g(%g)%g\n";
-			switch (nia)
-			{
-			case nia_error:
-				error_exit(ERROR_EXIT,
-						   noninteger_msg,
-						   "*****",
-						   double(delta_x),
-						   double(min_fp()), double(delta_fp()), double(max_fp()));
-				/*NOTREACHED*/
-
-			case nia_warning:
-				printf(noninteger_msg,
-					   "---",
-					   double(delta_x),
-					   double(min_fp()), double(delta_fp()), double(max_fp()));
-				// fall through
-
-			case nia_round:
-				return jtutil::round<fp_t>::to_integer(fp_delta_int);
-				// *** EARLY RETURN ***
-
-			case nia_floor:
-				return jtutil::round<fp_t>::floor(fp_delta_int); // *** EARLY RETURN ***
-
-			case nia_ceiling:
-				return jtutil::round<fp_t>::ceiling(fp_delta_int);
-				// *** EARLY RETURN ***
-
-			default:
-				error_exit(PANIC_EXIT,
-						   "***** linear_map<fp_t>::delta_int_of_delta_fp: illegal nia=(int)%d\n"
-						   "                                               (this should never happen!)\n",
-						   int(nia)); /*NOTREACHED*/
-			}
-			return 0; // dummy return to quiet gcc
-					  // (which doesn't grok that error_exit() never returns)
-		}
-
-		//******************************************************************************
-		//******************************************************************************
-		//******************************************************************************
-
-		//
-		// ***** template instantiation *****
-		//
-
-		template class linear_map<float>;
-		template class linear_map<double>;
-
-		//******************************************************************************
-		//******************************************************************************
-		//******************************************************************************
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
+#include <assert.h>
+#include <stdio.h>
+
+#include "stdc.h"
+#include "util.h"
+#include "linear_map.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+		template <typename fp_t>
+		linear_map<fp_t>::linear_map(int min_int_in, int max_int_in,
+									 fp_t min_fp_in, fp_t delta_fp_in, fp_t max_fp_in)
+			: delta_(delta_fp_in), inverse_delta_(1.0 / delta_fp_in),
+			  min_int_(min_int_in), max_int_(max_int_in)
+		{
+			constructor_common(min_fp_in, max_fp_in);
+		}
+
+		template <typename fp_t>
+		linear_map<fp_t>::linear_map(const linear_map<fp_t> &lm_in,
+									 int min_int_in, int max_int_in) // subrange
+			: delta_(lm_in.delta_fp()), inverse_delta_(lm_in.inverse_delta_fp()),
+			  min_int_(min_int_in), max_int_(max_int_in)
+		{
+			if (!(is_in_range(min_int_in) && is_in_range(max_int_in)))
+				then error_exit(ERROR_EXIT,
+								"***** linear_map<fp_t>::linear_map:\n"
+								"        min_int_in=%d and/or max_int_in=%d\n"
+								"        aren't in integer range [%d,%d] of existing linear_map!\n",
+								min_int_, max_int_,
+								lm_in.min_int(), lm_in.max_int()); /*NOTREACHED*/
+
+			constructor_common(lm_in.fp_of_int_unchecked(min_int_in),
+							   lm_in.fp_of_int_unchecked(max_int_in));
+		}
+
+		//******************************************************************************
+
+		//
+		// This function does the common argument validation and setup for
+		// all the constructors of class  linear_map<fp_t>:: .
+		//
+		template <typename fp_t>
+		void linear_map<fp_t>::constructor_common(fp_t min_fp_in, fp_t max_fp_in)
+		// assumes
+		//	min_int_, max_int_, delta_, inverse_delta_
+		// are already initialized
+		// ==> ok to use min_int(), max_int(), delta_fp(), inverse_delta_fp()
+		// ... other class members *not* yet initialized
+		{
+			origin_ = 0.0; // temp value
+			origin_ = min_fp_in - fp_of_int_unchecked(min_int());
+
+			// this should be guaranteed by the above calculation
+			assert(fuzzy<fp_t>::EQ(fp_of_int_unchecked(min_int()), min_fp_in));
+
+			// this is a test of the consistency of the input arguments
+			if (fuzzy<fp_t>::NE(fp_of_int_unchecked(max_int()), max_fp_in))
+				then error_exit(ERROR_EXIT,
+								"***** linear_map<fp_t>::linear_map:\n"
+								"        int range [%d,%d]\n"
+								"        and fp range [%g(%g)%g]\n"
+								"        are (fuzzily) inconsistent!\n",
+								min_int(), max_int(),
+								double(min_fp_in), double(delta_fp()), double(max_fp_in));
+			/*NOTREACHED*/
+		}
+
+		//******************************************************************************
+
+		//
+		// This function converts  fp  --> int  coordinate, returning the result
+		// as an fp (which need not be fuzzily integral).
+		//
+		template <typename fp_t>
+		fp_t linear_map<fp_t>::fp_int_of_fp(fp_t x)
+			const
+		{
+			if (!is_in_range(x))
+				then error_exit(ERROR_EXIT,
+								"***** linear_map<fp_t>::fp_int_of_fp:\n"
+								"        fp value x=%g is (fuzzily) outside the grid!\n"
+								"        {min(delta)max}_fp = %g(%g)%g\n",
+								double(x),
+								double(min_fp()), double(delta_fp()), double(max_fp()));
+			/*NOTREACHED*/
+
+			return inverse_delta_ * (x - origin_);
+		}
+
+		//******************************************************************************
+
+		//
+		// This function converts  fp  --> int  and checks that the result is
+		// fuzzily integral.  (The  nia  argument specifies what to do if the
+		// result *isn't* fuzzily integral.)
+		//
+		// FIXME:
+		// Having to explicitly specify the namespace for jtutil::round<fp_t>::
+		// is ++ugly. :(
+		//
+		template <typename fp_t>
+		int linear_map<fp_t>::int_of_fp(fp_t x, noninteger_action nia /* = nia_error */)
+			const
+		{
+			const fp_t fp_int = fp_int_of_fp(x);
+
+			if (fuzzy<fp_t>::is_integer(fp_int))
+				then
+				{
+					// x is (fuzzily) a grid point ==> return that
+					return jtutil::round<fp_t>::to_integer(fp_int); // *** EARLY RETURN ***
+				}
+
+			// get to here ==> x isn't (fuzzily) a grid point
+			static const char *const noninteger_msg =
+				"%s linear_map<fp_t>::int_of_fp:\n"
+				"        x=%g isn't (fuzzily) a grid point!\n"
+				"        {min(delta)max}_fp() = %g(%g)%g\n";
+			switch (nia)
+			{
+			case nia_error:
+				error_exit(ERROR_EXIT,
+						   noninteger_msg,
+						   "*****",
+						   double(x),
+						   double(min_fp()), double(delta_fp()), double(max_fp()));
+				/*NOTREACHED*/
+
+			case nia_warning:
+				printf(noninteger_msg,
+					   "---",
+					   double(x),
+					   double(min_fp()), double(delta_fp()), double(max_fp()));
+				// fall through
+
+			case nia_round:
+				return jtutil::round<fp_t>::to_integer(fp_int); // *** EARLY RETURN ***
+
+			case nia_floor:
+				return jtutil::round<fp_t>::floor(fp_int); // *** EARLY RETURN ***
+
+			case nia_ceiling:
+				return jtutil::round<fp_t>::ceiling(fp_int); // *** EARLY RETURN ***
+
+			default:
+				error_exit(PANIC_EXIT,
+						   "***** linear_map<fp_t>::int_of_fp: illegal nia=(int)%d\n"
+						   "                                   (this should never happen!)\n",
+						   int(nia)); /*NOTREACHED*/
+			}
+			return 0; // dummy return to quiet gcc
+					  // (which doesn't grok that error_exit() never returns)
+		}
+
+		//******************************************************************************
+
+		//
+		// This function converts "delta" spacings in the fp coordinate to
+		// corresponding "delta" spacings in the int coordinate, and checks that
+		// the result is fuzzily integral.  (The  nia  argument specifies what to
+		// do if the result *isn't* fuzzily integral.)
+		//
+		// FIXME:
+		// Having to explicitly specify the namespace for jtutil::round<fp_t>::
+		// is ++ugly. :(
+		//
+		template <typename fp_t>
+		int linear_map<fp_t>::delta_int_of_delta_fp(fp_t delta_x, noninteger_action nia /* = nia_error */)
+			const
+		{
+			const fp_t fp_delta_int = inverse_delta_ * delta_x;
+
+			if (fuzzy<fp_t>::is_integer(fp_delta_int))
+				then
+				{
+					// delta_x is (fuzzily) an integer number of grid spacings
+					// ==> return that
+					return jtutil::round<fp_t>::to_integer(fp_delta_int);
+					// *** EARLY RETURN ***
+				}
+
+			// get to here ==> delta_x isn't (fuzzily) an integer number of grid spacings
+			static const char *const noninteger_msg =
+				"%s linear_map<fp_t>::delta_int_of_delta_fp:\n"
+				"        delta_x=%g isn't (fuzzily) an integer number of grid spacings!\n"
+				"        {min(delta)max}_fp() = %g(%g)%g\n";
+			switch (nia)
+			{
+			case nia_error:
+				error_exit(ERROR_EXIT,
+						   noninteger_msg,
+						   "*****",
+						   double(delta_x),
+						   double(min_fp()), double(delta_fp()), double(max_fp()));
+				/*NOTREACHED*/
+
+			case nia_warning:
+				printf(noninteger_msg,
+					   "---",
+					   double(delta_x),
+					   double(min_fp()), double(delta_fp()), double(max_fp()));
+				// fall through
+
+			case nia_round:
+				return jtutil::round<fp_t>::to_integer(fp_delta_int);
+				// *** EARLY RETURN ***
+
+			case nia_floor:
+				return jtutil::round<fp_t>::floor(fp_delta_int); // *** EARLY RETURN ***
+
+			case nia_ceiling:
+				return jtutil::round<fp_t>::ceiling(fp_delta_int);
+				// *** EARLY RETURN ***
+
+			default:
+				error_exit(PANIC_EXIT,
+						   "***** linear_map<fp_t>::delta_int_of_delta_fp: illegal nia=(int)%d\n"
+						   "                                               (this should never happen!)\n",
+						   int(nia)); /*NOTREACHED*/
+			}
+			return 0; // dummy return to quiet gcc
+					  // (which doesn't grok that error_exit() never returns)
+		}
+
+		//******************************************************************************
+		//******************************************************************************
+		//******************************************************************************
+
+		//
+		// ***** template instantiation *****
+		//
+
+		template class linear_map<float>;
+		template class linear_map<double>;
+
+		//******************************************************************************
+		//******************************************************************************
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/linear_map.h
+++ b/AMSS_NCKU_source/AHF_Direct/linear_map.h
@@ -1,131 +1,131 @@
-#ifndef AHFINDERDIRECT__LINEAR_MAP_HH
-#define AHFINDERDIRECT__LINEAR_MAP_HH
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-
-		template <typename fp_t>
-		class linear_map
-		{
-		public:
-			// integer bounds info
-			int min_int() const { return min_int_; }
-			int max_int() const { return max_int_; }
-			int N_points() const
-			{
-				return jtutil::how_many_in_range(min_int_, max_int_);
-			}
-			bool is_in_range(int i) const
-			{
-				return (i >= min_int()) && (i <= max_int());
-			}
-			int clamp(int i) const
-			{
-				if (i < min_int())
-					then return min_int();
-				else if (i > max_int())
-					then return max_int();
-				else
-					return i;
-			}
-
-			// convert int --> fp
-			fp_t fp_of_int_unchecked(int i) const
-			{
-				return origin_ + delta_ * i;
-			}
-			fp_t fp_of_int(int i) const
-			{
-				assert(is_in_range(i));
-				return fp_of_int_unchecked(i);
-			}
-
-			// converg delta_int --> delta_fp
-			fp_t delta_fp_of_delta_int(int delta_i) const
-			{
-				return delta_ * delta_i;
-			}
-
-			// fp bounds info
-			fp_t origin() const { return origin_; }
-			fp_t delta_fp() const { return delta_; }
-			fp_t inverse_delta_fp() const { return inverse_delta_; }
-			fp_t min_fp() const { return fp_of_int_unchecked(min_int_); }
-			fp_t max_fp() const { return fp_of_int_unchecked(max_int_); }
-			bool is_in_range(fp_t x) const
-			{
-				return fuzzy<fp_t>::GE(x, min_fp()) && fuzzy<fp_t>::LE(x, max_fp());
-			}
-			fp_t clamp(fp_t x) const
-			{
-				if (x < min_fp())
-					then return min_fp();
-				else if (x > max_fp())
-					then return max_fp();
-				else
-					return x;
-			}
-
-			// convert linear map indices <--> C-style 0-origin indices
-			int zero_origin_int(int i) const { return i - min_int(); }
-			int map_int(int zero_origin_i) { return zero_origin_i + min_int(); }
-
-			// convert fp --> int coordinate, but return result as fp
-			// (which need not be fuzzily integral)
-			fp_t fp_int_of_fp(fp_t x) const;
-
-			// convert fp --> int, check being fuzzily integral
-			enum noninteger_action // what to do if "int"
-								   // isn't fuzzily integral?
-			{
-				nia_error,	 // jtutil::error_exit(...)
-				nia_warning, // print warning msg,
-							 // then round to nearest
-				nia_round,	 // (silently) round to nearest
-				nia_floor,	 // (silently) round to -infinity
-				nia_ceiling	 // (silently) round to +infinity
-			};
-			int int_of_fp(fp_t x, noninteger_action nia = nia_error) const;
-
-			// convert delta_fp --> delta_int, check being fuzzily integral
-			int delta_int_of_delta_fp(fp_t delta_x,
-									  noninteger_action nia = nia_error)
-				const;
-
-			// constructors
-			linear_map(int min_int_in, int max_int_in,
-					   fp_t min_fp_in, fp_t delta_fp_in, fp_t max_fp_in);
-			// ... construct with subrange of existing linear_map
-			linear_map(const linear_map<fp_t> &lm_in,
-					   int min_int_in, int max_int_in);
-
-			// no need for explicit destructor, compiler-generated no-op is ok
-
-			// no need for copy constructor or assignment operator,
-			// compiler-generated defaults are ok
-
-		private:
-			// common code (argument validation & setup) for all constructors
-			// assumes min_int_, max_int_, delta_ already initialized,
-			//         other class members *not* initialized
-			void constructor_common(fp_t min_fp_in, fp_t max_fp_in);
-
-			// these define the actual mapping
-			// via the  fp_of_int()  function (above)
-			fp_t origin_, delta_;
-
-			// cache of 1.0/delta_
-			// ==> avoids fp divide in inverse_delta_fp()
-			// ==> also makes fp --> int conversions slightly faster
-			fp_t inverse_delta_;
-
-			const int min_int_, max_int_;
-		};
-
-		//******************************************************************************
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
-
-#endif /* AHFINDERDIRECT__LINEAR_MAP_HH */
+#ifndef AHFINDERDIRECT__LINEAR_MAP_HH
+#define AHFINDERDIRECT__LINEAR_MAP_HH
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename fp_t>
+		class linear_map
+		{
+		public:
+			// integer bounds info
+			int min_int() const { return min_int_; }
+			int max_int() const { return max_int_; }
+			int N_points() const
+			{
+				return jtutil::how_many_in_range(min_int_, max_int_);
+			}
+			bool is_in_range(int i) const
+			{
+				return (i >= min_int()) && (i <= max_int());
+			}
+			int clamp(int i) const
+			{
+				if (i < min_int())
+					then return min_int();
+				else if (i > max_int())
+					then return max_int();
+				else
+					return i;
+			}
+
+			// convert int --> fp
+			fp_t fp_of_int_unchecked(int i) const
+			{
+				return origin_ + delta_ * i;
+			}
+			fp_t fp_of_int(int i) const
+			{
+				assert(is_in_range(i));
+				return fp_of_int_unchecked(i);
+			}
+
+			// converg delta_int --> delta_fp
+			fp_t delta_fp_of_delta_int(int delta_i) const
+			{
+				return delta_ * delta_i;
+			}
+
+			// fp bounds info
+			fp_t origin() const { return origin_; }
+			fp_t delta_fp() const { return delta_; }
+			fp_t inverse_delta_fp() const { return inverse_delta_; }
+			fp_t min_fp() const { return fp_of_int_unchecked(min_int_); }
+			fp_t max_fp() const { return fp_of_int_unchecked(max_int_); }
+			bool is_in_range(fp_t x) const
+			{
+				return fuzzy<fp_t>::GE(x, min_fp()) && fuzzy<fp_t>::LE(x, max_fp());
+			}
+			fp_t clamp(fp_t x) const
+			{
+				if (x < min_fp())
+					then return min_fp();
+				else if (x > max_fp())
+					then return max_fp();
+				else
+					return x;
+			}
+
+			// convert linear map indices <--> C-style 0-origin indices
+			int zero_origin_int(int i) const { return i - min_int(); }
+			int map_int(int zero_origin_i) { return zero_origin_i + min_int(); }
+
+			// convert fp --> int coordinate, but return result as fp
+			// (which need not be fuzzily integral)
+			fp_t fp_int_of_fp(fp_t x) const;
+
+			// convert fp --> int, check being fuzzily integral
+			enum noninteger_action // what to do if "int"
+								   // isn't fuzzily integral?
+			{
+				nia_error,	 // jtutil::error_exit(...)
+				nia_warning, // print warning msg,
+							 // then round to nearest
+				nia_round,	 // (silently) round to nearest
+				nia_floor,	 // (silently) round to -infinity
+				nia_ceiling	 // (silently) round to +infinity
+			};
+			int int_of_fp(fp_t x, noninteger_action nia = nia_error) const;
+
+			// convert delta_fp --> delta_int, check being fuzzily integral
+			int delta_int_of_delta_fp(fp_t delta_x,
+									  noninteger_action nia = nia_error)
+				const;
+
+			// constructors
+			linear_map(int min_int_in, int max_int_in,
+					   fp_t min_fp_in, fp_t delta_fp_in, fp_t max_fp_in);
+			// ... construct with subrange of existing linear_map
+			linear_map(const linear_map<fp_t> &lm_in,
+					   int min_int_in, int max_int_in);
+
+			// no need for explicit destructor, compiler-generated no-op is ok
+
+			// no need for copy constructor or assignment operator,
+			// compiler-generated defaults are ok
+
+		private:
+			// common code (argument validation & setup) for all constructors
+			// assumes min_int_, max_int_, delta_ already initialized,
+			//         other class members *not* initialized
+			void constructor_common(fp_t min_fp_in, fp_t max_fp_in);
+
+			// these define the actual mapping
+			// via the  fp_of_int()  function (above)
+			fp_t origin_, delta_;
+
+			// cache of 1.0/delta_
+			// ==> avoids fp divide in inverse_delta_fp()
+			// ==> also makes fp --> int conversions slightly faster
+			fp_t inverse_delta_;
+
+			const int min_int_, max_int_;
+		};
+
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
+
+#endif /* AHFINDERDIRECT__LINEAR_MAP_HH */
--- a/AMSS_NCKU_source/AHF_Direct/miscfp.C
+++ b/AMSS_NCKU_source/AHF_Direct/miscfp.C
@@ -1,66 +1,66 @@
-#include <math.h>
-#include <stdlib.h>
-
-#include "cctk.h"
-
-#include "stdc.h"
-#include "util.h"
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-		double signum(double x)
-		{
-			if (x == 0.0)
-				then return 0.0;
-			else
-				return (x > 0.0) ? 1.0 : -1.0;
-		}
-		double hypot3(double x, double y, double z)
-		{
-			return sqrt(x * x + y * y + z * z);
-		}
-		double arctan_xy(double x, double y)
-		{
-			return ((x == 0.0) && (y == 0.0)) ? 0.0 : atan2(y, x);
-		}
-		double modulo_reduce(double x, double xmod, double xmin, double xmax)
-		{
-			double xx = x;
-
-			while (fuzzy<double>::LT(xx, xmin))
-			{
-				xx += xmod;
-			}
-
-			while (fuzzy<double>::GT(xx, xmax))
-			{
-				xx -= xmod;
-			}
-
-			if (!(fuzzy<double>::GE(xx, xmin) && fuzzy<double>::LE(xx, xmax)))
-				then error_exit(ERROR_EXIT,
-								"***** modulo_reduce(): no modulo value is fuzzily within specified range!\n"
-								"                       x = %g   xmod = %g\n"
-								"                       [xmin,xmax] = [%g,%g]\n"
-								"                       ==> xx = %g\n",
-								x, xmod,
-								xmin, xmax,
-								xx); /*NOTREACHED*/
-
-			return xx;
-		}
-		template <typename fp_t>
-		void zero_C_array(int N, fp_t array[])
-		{
-			for (int i = 0; i < N; ++i)
-			{
-				array[i] = 0;
-			}
-		}
-
-		template void zero_C_array<CCTK_REAL>(int, CCTK_REAL[]);
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
+#include <math.h>
+#include <stdlib.h>
+
+#include "cctk.h"
+
+#include "stdc.h"
+#include "util.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+		double signum(double x)
+		{
+			if (x == 0.0)
+				then return 0.0;
+			else
+				return (x > 0.0) ? 1.0 : -1.0;
+		}
+		double hypot3(double x, double y, double z)
+		{
+			return sqrt(x * x + y * y + z * z);
+		}
+		double arctan_xy(double x, double y)
+		{
+			return ((x == 0.0) && (y == 0.0)) ? 0.0 : atan2(y, x);
+		}
+		double modulo_reduce(double x, double xmod, double xmin, double xmax)
+		{
+			double xx = x;
+
+			while (fuzzy<double>::LT(xx, xmin))
+			{
+				xx += xmod;
+			}
+
+			while (fuzzy<double>::GT(xx, xmax))
+			{
+				xx -= xmod;
+			}
+
+			if (!(fuzzy<double>::GE(xx, xmin) && fuzzy<double>::LE(xx, xmax)))
+				then error_exit(ERROR_EXIT,
+								"***** modulo_reduce(): no modulo value is fuzzily within specified range!\n"
+								"                       x = %g   xmod = %g\n"
+								"                       [xmin,xmax] = [%g,%g]\n"
+								"                       ==> xx = %g\n",
+								x, xmod,
+								xmin, xmax,
+								xx); /*NOTREACHED*/
+
+			return xx;
+		}
+		template <typename fp_t>
+		void zero_C_array(int N, fp_t array[])
+		{
+			for (int i = 0; i < N; ++i)
+			{
+				array[i] = 0;
+			}
+		}
+
+		template void zero_C_array<CCTK_REAL>(int, CCTK_REAL[]);
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/myglobal.h
+++ b/AMSS_NCKU_source/AHF_Direct/myglobal.h
@@ -1,65 +1,65 @@
-#ifndef MYGLOBAL_H
-#define MYGLOBAL_H
-
-#include "var.h"
-#include "MyList.h"
-
-#ifdef USE_GPU
-#include "bssn_gpu_class.h"
-#else
-#include "bssn_class.h"
-#endif
-
-#include "driver.h"
-
-namespace AHFinderDirect
-{
-
-	int globalInterpGFL(double *X, double *Y, double *Z, int Ns,
-						double *Data);
-
-	int globalInterpGFLlash(double *X, double *Y, double *Z, int Ns,
-							double *Data);
-
-	void AHFinderDirect_setup(MyList<var> *AHList, MyList<var> *GaugeList, bssn_class *ADM,
-							  int Symmetry, int HN, double *PhysTime);
-
-	void AHFinderDirect_cleanup();
-
-	void AHFinderDirect_find_horizons(int HN, int *dumpid,
-									  double *xc, double *yc, double *zc, double *xr, double *yr, double *zr,
-									  bool *trigger, double *);
-
-	void AHFinderDirect_enforcefind(int HN,
-									double *xc, double *yc, double *zc, double *xr, double *yr, double *zr);
-	//
-	struct state
-	{
-		int N_procs; // total number of processors
-		int my_proc; // processor number of this processor
-					 // (0 to N_procs-1)
-
-		int Symmetry;
-		double *PhysTime;
-
-		MyList<var> *AHList;
-		MyList<var> *GaugeList;
-
-		bssn_class *ADM;
-
-		int N_horizons; // total number of genuine horizons
-						// being searched for
-		int N_active_procs; // total number of active processors
-							// (the active processors are processor
-							//  numbers 0 to N_active_procs-1)
-
-		struct iteration_status_buffers isb;
-
-		horizon_sequence *my_hs;
-
-		struct AH_data **AH_data_array;
-
-		double *Data, *oX, *oY, *oZ;
-	};
-}
-#endif /* MYGLOBAL_H */
+#ifndef MYGLOBAL_H
+#define MYGLOBAL_H
+
+#include "var.h"
+#include "MyList.h"
+
+#ifdef USE_GPU
+#include "bssn_gpu_class.h"
+#else
+#include "bssn_class.h"
+#endif
+
+#include "driver.h"
+
+namespace AHFinderDirect
+{
+
+	int globalInterpGFL(double *X, double *Y, double *Z, int Ns,
+						double *Data);
+
+	int globalInterpGFLlash(double *X, double *Y, double *Z, int Ns,
+							double *Data);
+
+	void AHFinderDirect_setup(MyList<var> *AHList, MyList<var> *GaugeList, bssn_class *ADM,
+							  int Symmetry, int HN, double *PhysTime);
+
+	void AHFinderDirect_cleanup();
+
+	void AHFinderDirect_find_horizons(int HN, int *dumpid,
+									  double *xc, double *yc, double *zc, double *xr, double *yr, double *zr,
+									  bool *trigger, double *);
+
+	void AHFinderDirect_enforcefind(int HN,
+									double *xc, double *yc, double *zc, double *xr, double *yr, double *zr);
+	//
+	struct state
+	{
+		int N_procs; // total number of processors
+		int my_proc; // processor number of this processor
+					 // (0 to N_procs-1)
+
+		int Symmetry;
+		double *PhysTime;
+
+		MyList<var> *AHList;
+		MyList<var> *GaugeList;
+
+		bssn_class *ADM;
+
+		int N_horizons; // total number of genuine horizons
+						// being searched for
+		int N_active_procs; // total number of active processors
+							// (the active processors are processor
+							//  numbers 0 to N_active_procs-1)
+
+		struct iteration_status_buffers isb;
+
+		horizon_sequence *my_hs;
+
+		struct AH_data **AH_data_array;
+
+		double *Data, *oX, *oY, *oZ;
+	};
+}
+#endif /* MYGLOBAL_H */
--- a/AMSS_NCKU_source/AHF_Direct/norm.C
+++ b/AMSS_NCKU_source/AHF_Direct/norm.C
@@ -1,68 +1,68 @@
-#include <math.h>
-#include <assert.h>
-#include <stdlib.h>
-
-#include "util.h"
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-
-		template <typename fp_t>
-		norm<fp_t>::norm()
-			: N_(0L),
-			  sum_(0.0), sum2_(0.0),
-			  max_abs_value_(0.0), min_abs_value_(0.0),
-			  max_value_(0.0), min_value_(0.0)
-		{
-		}
-
-		template <typename fp_t>
-		void norm<fp_t>::reset()
-		{
-			N_ = 0L;
-			sum_ = 0.0;
-			sum2_ = 0.0;
-			max_abs_value_ = 0.0;
-			min_abs_value_ = 0.0;
-			max_value_ = 0.0;
-			min_value_ = 0.0;
-		}
-
-		template <typename fp_t>
-		void norm<fp_t>::data(fp_t x)
-		{
-			sum_ += x;
-			sum2_ += x * x;
-
-			const fp_t abs_x = jtutil::abs<fp_t>(x);
-			max_abs_value_ = jtutil::tmax(max_abs_value_, abs_x);
-			min_abs_value_ = (N_ == 0) ? abs_x : jtutil::tmin(min_abs_value_, abs_x);
-
-			min_value_ = (N_ == 0) ? x : jtutil::tmin(min_value_, x);
-			max_value_ = (N_ == 0) ? x : jtutil::tmax(max_value_, x);
-
-			++N_;
-		}
-
-		template <typename fp_t>
-		fp_t norm<fp_t>::mean() const { return sum_ / fp_t(N_); }
-		template <typename fp_t>
-		fp_t norm<fp_t>::two_norm() const { return sqrt(sum2_); }
-		template <typename fp_t>
-		fp_t norm<fp_t>::rms_norm() const
-		{
-			assert(is_nonempty());
-			return sqrt(sum2_ / fp_t(N_));
-		}
-
-		template class jtutil::norm<float>;
-		template class jtutil::norm<double>;
-
-		//******************************************************************************
-		//******************************************************************************
-		//******************************************************************************
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
+#include <math.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include "util.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename fp_t>
+		norm<fp_t>::norm()
+			: N_(0L),
+			  sum_(0.0), sum2_(0.0),
+			  max_abs_value_(0.0), min_abs_value_(0.0),
+			  max_value_(0.0), min_value_(0.0)
+		{
+		}
+
+		template <typename fp_t>
+		void norm<fp_t>::reset()
+		{
+			N_ = 0L;
+			sum_ = 0.0;
+			sum2_ = 0.0;
+			max_abs_value_ = 0.0;
+			min_abs_value_ = 0.0;
+			max_value_ = 0.0;
+			min_value_ = 0.0;
+		}
+
+		template <typename fp_t>
+		void norm<fp_t>::data(fp_t x)
+		{
+			sum_ += x;
+			sum2_ += x * x;
+
+			const fp_t abs_x = jtutil::abs<fp_t>(x);
+			max_abs_value_ = jtutil::tmax(max_abs_value_, abs_x);
+			min_abs_value_ = (N_ == 0) ? abs_x : jtutil::tmin(min_abs_value_, abs_x);
+
+			min_value_ = (N_ == 0) ? x : jtutil::tmin(min_value_, x);
+			max_value_ = (N_ == 0) ? x : jtutil::tmax(max_value_, x);
+
+			++N_;
+		}
+
+		template <typename fp_t>
+		fp_t norm<fp_t>::mean() const { return sum_ / fp_t(N_); }
+		template <typename fp_t>
+		fp_t norm<fp_t>::two_norm() const { return sqrt(sum2_); }
+		template <typename fp_t>
+		fp_t norm<fp_t>::rms_norm() const
+		{
+			assert(is_nonempty());
+			return sqrt(sum2_ / fp_t(N_));
+		}
+
+		template class jtutil::norm<float>;
+		template class jtutil::norm<double>;
+
+		//******************************************************************************
+		//******************************************************************************
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch.C
+++ b/AMSS_NCKU_source/AHF_Direct/patch.C
--- a/AMSS_NCKU_source/AHF_Direct/patch.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch.h
--- a/AMSS_NCKU_source/AHF_Direct/patch_edge.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_edge.h
@@ -1,320 +1,320 @@
-#ifndef TPATCH_EDGE_H
-#define TPATCH_EDGE_H
-namespace AHFinderDirect
-{
-
-	//*****************************************************************************
-
-	//
-	// patch_edge -- perpendicular/parallel geometry of one side of a patch
-	//
-	// A  patch_edge  object is a very light-weight object which represents
-	// the basic geometry of a min/max rho/sigma side of a patch, i.e. it
-	// provides which-side-am-I predicates, coordinate conversions between
-	// (perp,par) and (rho,sigma), etc.  Every patch has (points to) 4  patch_edge
-	//  objects, one for each of the patch's sides.  See the comments in
-	// "patch.hh" for a "big picture" discussion of patches, patch edges,
-	// ghost zones, and patch interpolation regions.
-	//
-	// Note that since  patch_edge  has only  const  member functions
-	// (and members!), a  patch_edge  object is effectively always  const .
-	// This means there's no harm in always declaring  patch_edge  objects
-	// to be  const .
-	//
-
-	class patch_edge
-	{
-	public:
-		//
-		// ***** meta-info *****
-		//
-
-		// meta-info about patch
-		patch &my_patch() const { return my_patch_; }
-
-		// meta-info about edge
-		bool is_rho() const { return is_rho_; }
-		bool is_min() const { return is_min_; }
-		bool perp_is_rho() const { return is_rho(); }
-		bool par_is_rho() const { return !is_rho(); }
-
-		// human-readable {min,max}_{rho,sigma} name (for debugging etc)
-		const char *name() const
-		{
-			return is_min()
-					   ? (is_rho() ? "min_rho" : "min_sigma")
-					   : (is_rho() ? "max_rho" : "max_sigma");
-		}
-
-		// are two edges really the same edge?
-		bool operator==(const patch_edge &other_edge) const
-		{
-			return (my_patch() == other_edge.my_patch()) && (is_rho() == other_edge.is_rho()) && (is_min() == other_edge.is_min());
-		}
-		bool operator!=(const patch_edge &other_edge) const
-		{
-			return !operator==(other_edge);
-		}
-
-		//
-		// ***** adjacent edges *****
-		//
-
-		// get adjacent edges to our min/max par corners
-		const patch_edge &min_par_adjacent_edge() const
-		{
-			return my_patch()
-				.minmax_ang_patch_edge(grid::side_is_min, par_is_rho());
-		}
-		const patch_edge &max_par_adjacent_edge() const
-		{
-			return my_patch()
-				.minmax_ang_patch_edge(grid::side_is_max, par_is_rho());
-		}
-		const patch_edge &minmax_par_adjacent_edge(bool want_min) const
-		{
-			return want_min ? min_par_adjacent_edge()
-							: max_par_adjacent_edge();
-		}
-
-		//
-		// ***** gridfn subscripting and coordinate maps *****
-		//
-
-		// gridfn strides perpendicular/parallel to the edge
-		int perp_stride() const
-		{
-			return my_patch().iang_stride(perp_is_rho());
-		}
-		int par_stride() const
-		{
-			return my_patch().iang_stride(par_is_rho());
-		}
-		int ghosted_perp_stride() const
-		{
-			return my_patch().ghosted_iang_stride(perp_is_rho());
-		}
-		int ghosted_par_stride() const
-		{
-			return my_patch().ghosted_iang_stride(par_is_rho());
-		}
-
-		// coordinate maps perpendicular/parallel to the edge
-		// ... range is that of the grid *including* ghost zones
-		const jtutil::linear_map<fp> &perp_map() const
-		{
-			return my_patch().ang_map(perp_is_rho());
-		}
-		const jtutil::linear_map<fp> &par_map() const
-		{
-			return my_patch().ang_map(par_is_rho());
-		}
-
-		// meta-info about perp/par coordinates
-		// ... as (mu,nu,phi) tensor indices
-		local_coords::coords_set coords_set_perp() const
-		{
-			return perp_is_rho() ? my_patch().coords_set_rho()
-								 : my_patch().coords_set_sigma();
-		}
-		local_coords::coords_set coords_set_par() const
-		{
-			return par_is_rho() ? my_patch().coords_set_rho()
-								: my_patch().coords_set_sigma();
-		}
-
-		//
-		// ***** coordinate conversions *****
-		//
-
-		// coordinate conversions based on ghost zone direction
-		// ... (iperp,ipar) <--> (perp,par)
-		fp perp_of_iperp(int iperp) const
-		{
-			return my_patch().ang_of_iang(perp_is_rho(), iperp);
-		}
-		fp par_of_ipar(int ipar) const
-		{
-			return my_patch().ang_of_iang(par_is_rho(), ipar);
-		}
-		fp fp_iperp_of_perp(fp perp) const
-		{
-			return my_patch().fp_iang_of_ang(perp_is_rho(), perp);
-		}
-		fp fp_ipar_of_par(fp par) const
-		{
-			return my_patch().fp_iang_of_ang(par_is_rho(), par);
-		}
-		int iperp_of_perp(fp perp, jtutil::linear_map<fp>::noninteger_action
-									   nia = jtutil::linear_map<fp>::nia_error)
-		{
-			return my_patch().iang_of_ang(perp_is_rho(), perp, nia);
-		}
-		int ipar_of_par(fp par, jtutil::linear_map<fp>::noninteger_action
-									nia = jtutil::linear_map<fp>::nia_error)
-		{
-			return my_patch().iang_of_ang(par_is_rho(), par, nia);
-		}
-
-		// ... (perp,par) --> (rho,sigma)
-		int irho_of_iperp_ipar(int iperp, int ipar) const
-		{
-			return perp_is_rho() ? iperp : ipar;
-		}
-		int isigma_of_iperp_ipar(int iperp, int ipar) const
-		{
-			return perp_is_rho() ? ipar : iperp;
-		}
-		fp rho_of_perp_par(fp perp, fp par) const
-		{
-			return perp_is_rho() ? perp : par;
-		}
-		fp sigma_of_perp_par(fp perp, fp par) const
-		{
-			return perp_is_rho() ? par : perp;
-		}
-		// ... (rho,sigma) --> (perp,par)
-		int iperp_of_irho_isigma(int irho, int isigma) const
-		{
-			return perp_is_rho() ? irho : isigma;
-		}
-		int ipar_of_irho_isigma(int irho, int isigma) const
-		{
-			return par_is_rho() ? irho : isigma;
-		}
-		fp perp_of_rho_sigma(fp rho, fp sigma) const
-		{
-			return perp_is_rho() ? rho : sigma;
-		}
-		fp par_of_rho_sigma(fp rho, fp sigma) const
-		{
-			return par_is_rho() ? rho : sigma;
-		}
-
-		// outer perp of nominal grid on this edge
-		// ... this is outermost *grid point*
-		fp grid_outer_iperp() const
-		{
-			return my_patch().minmax_iang(is_min(), is_rho());
-		}
-		// ... this is actual outer edge of grid
-		//     (might be halfway between two grid points)
-		fp grid_outer_perp() const
-		{
-			return my_patch().minmax_ang(is_min(), is_rho());
-		}
-		// ... this is grid_outer_perp() converted back to the iperp
-		//     coordinate, but still returned as floating-point;
-		//     it will be either integer or half-integer
-		fp fp_grid_outer_iperp() const
-		{
-			return fp_iperp_of_perp(grid_outer_perp());
-		}
-
-		//
-		// ***** min/max/outer coordinates of edge *****
-		//
-
-		// min/max/size ipar of the edge
-		// (these are exteme limits for any iperp, a given ghost zone
-		//  or interpolation region may have tighter and/or iperp-dependent
-		// limits)
-		// ... not including corners
-		int min_ipar_without_corners() const
-		{
-			return my_patch().min_iang(par_is_rho());
-		}
-		int max_ipar_without_corners() const
-		{
-			return my_patch().max_iang(par_is_rho());
-		}
-		// ... including corners
-		int min_ipar_with_corners() const
-		{
-			return my_patch().ghosted_min_iang(par_is_rho());
-		}
-		int max_ipar_with_corners() const
-		{
-			return my_patch().ghosted_max_iang(par_is_rho());
-		}
-		// ... of the corners themselves
-		int min_ipar_corner__min_ipar() const
-		{
-			return min_ipar_with_corners();
-		}
-		int min_ipar_corner__max_ipar() const
-		{
-			return min_ipar_without_corners() - 1;
-		}
-		int max_ipar_corner__min_ipar() const
-		{
-			return max_ipar_without_corners() + 1;
-		}
-		int max_ipar_corner__max_ipar() const
-		{
-			return max_ipar_with_corners();
-		}
-
-		// membership predicates for ipar corners, non-corners
-		bool ipar_is_in_min_ipar_corner(int ipar) const
-		{
-			return (ipar >= min_ipar_corner__min_ipar()) && (ipar <= min_ipar_corner__max_ipar());
-		}
-		bool ipar_is_in_max_ipar_corner(int ipar) const
-		{
-			return (ipar >= max_ipar_corner__min_ipar()) && (ipar <= max_ipar_corner__max_ipar());
-		}
-		bool ipar_is_in_corner(int ipar) const
-		{
-			return ipar_is_in_min_ipar_corner(ipar) || ipar_is_in_max_ipar_corner(ipar);
-		}
-		bool ipar_is_in_noncorner(int ipar) const
-		{
-			return (ipar >= min_ipar_without_corners()) && (ipar <= max_ipar_without_corners());
-		}
-
-		// convenience function selecting amongst the above
-		// membership predicates
-		bool ipar_is_in_selected_part(bool want_corners,
-									  bool want_noncorner,
-									  int ipar)
-			const
-		{
-			return (want_corners && ipar_is_in_corner(ipar)) || (want_noncorner && ipar_is_in_noncorner(ipar));
-		}
-
-		// outer (farthest from patch center) iperp of nominal grid
-		int nominal_grid_outer_iperp() const
-		{
-			return my_patch()
-				.minmax_iang(is_min(), is_rho());
-		}
-
-		//
-		// ***** constructor, destructor *****
-		//
-
-		patch_edge(patch &my_patch_in,
-				   bool is_min_in, bool is_rho_in)
-			: my_patch_(my_patch_in),
-			  is_min_(is_min_in), is_rho_(is_rho_in)
-		{
-		}
-		// compiler-synthesized (no-op) destructor is fine
-
-	private:
-		// we forbid copying and passing by value
-		// by declaring the copy constructor and assignment operator
-		// private, but never defining them
-		patch_edge(const patch_edge &rhs);
-		patch_edge &operator=(const patch_edge &rhs);
-
-	private:
-		patch &my_patch_;
-		const bool is_min_, is_rho_;
-	};
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /* TPATCH_EDGE_H */
+#ifndef TPATCH_EDGE_H
+#define TPATCH_EDGE_H
+namespace AHFinderDirect
+{
+
+	//*****************************************************************************
+
+	//
+	// patch_edge -- perpendicular/parallel geometry of one side of a patch
+	//
+	// A  patch_edge  object is a very light-weight object which represents
+	// the basic geometry of a min/max rho/sigma side of a patch, i.e. it
+	// provides which-side-am-I predicates, coordinate conversions between
+	// (perp,par) and (rho,sigma), etc.  Every patch has (points to) 4  patch_edge
+	//  objects, one for each of the patch's sides.  See the comments in
+	// "patch.hh" for a "big picture" discussion of patches, patch edges,
+	// ghost zones, and patch interpolation regions.
+	//
+	// Note that since  patch_edge  has only  const  member functions
+	// (and members!), a  patch_edge  object is effectively always  const .
+	// This means there's no harm in always declaring  patch_edge  objects
+	// to be  const .
+	//
+
+	class patch_edge
+	{
+	public:
+		//
+		// ***** meta-info *****
+		//
+
+		// meta-info about patch
+		patch &my_patch() const { return my_patch_; }
+
+		// meta-info about edge
+		bool is_rho() const { return is_rho_; }
+		bool is_min() const { return is_min_; }
+		bool perp_is_rho() const { return is_rho(); }
+		bool par_is_rho() const { return !is_rho(); }
+
+		// human-readable {min,max}_{rho,sigma} name (for debugging etc)
+		const char *name() const
+		{
+			return is_min()
+					   ? (is_rho() ? "min_rho" : "min_sigma")
+					   : (is_rho() ? "max_rho" : "max_sigma");
+		}
+
+		// are two edges really the same edge?
+		bool operator==(const patch_edge &other_edge) const
+		{
+			return (my_patch() == other_edge.my_patch()) && (is_rho() == other_edge.is_rho()) && (is_min() == other_edge.is_min());
+		}
+		bool operator!=(const patch_edge &other_edge) const
+		{
+			return !operator==(other_edge);
+		}
+
+		//
+		// ***** adjacent edges *****
+		//
+
+		// get adjacent edges to our min/max par corners
+		const patch_edge &min_par_adjacent_edge() const
+		{
+			return my_patch()
+				.minmax_ang_patch_edge(grid::side_is_min, par_is_rho());
+		}
+		const patch_edge &max_par_adjacent_edge() const
+		{
+			return my_patch()
+				.minmax_ang_patch_edge(grid::side_is_max, par_is_rho());
+		}
+		const patch_edge &minmax_par_adjacent_edge(bool want_min) const
+		{
+			return want_min ? min_par_adjacent_edge()
+							: max_par_adjacent_edge();
+		}
+
+		//
+		// ***** gridfn subscripting and coordinate maps *****
+		//
+
+		// gridfn strides perpendicular/parallel to the edge
+		int perp_stride() const
+		{
+			return my_patch().iang_stride(perp_is_rho());
+		}
+		int par_stride() const
+		{
+			return my_patch().iang_stride(par_is_rho());
+		}
+		int ghosted_perp_stride() const
+		{
+			return my_patch().ghosted_iang_stride(perp_is_rho());
+		}
+		int ghosted_par_stride() const
+		{
+			return my_patch().ghosted_iang_stride(par_is_rho());
+		}
+
+		// coordinate maps perpendicular/parallel to the edge
+		// ... range is that of the grid *including* ghost zones
+		const jtutil::linear_map<fp> &perp_map() const
+		{
+			return my_patch().ang_map(perp_is_rho());
+		}
+		const jtutil::linear_map<fp> &par_map() const
+		{
+			return my_patch().ang_map(par_is_rho());
+		}
+
+		// meta-info about perp/par coordinates
+		// ... as (mu,nu,phi) tensor indices
+		local_coords::coords_set coords_set_perp() const
+		{
+			return perp_is_rho() ? my_patch().coords_set_rho()
+								 : my_patch().coords_set_sigma();
+		}
+		local_coords::coords_set coords_set_par() const
+		{
+			return par_is_rho() ? my_patch().coords_set_rho()
+								: my_patch().coords_set_sigma();
+		}
+
+		//
+		// ***** coordinate conversions *****
+		//
+
+		// coordinate conversions based on ghost zone direction
+		// ... (iperp,ipar) <--> (perp,par)
+		fp perp_of_iperp(int iperp) const
+		{
+			return my_patch().ang_of_iang(perp_is_rho(), iperp);
+		}
+		fp par_of_ipar(int ipar) const
+		{
+			return my_patch().ang_of_iang(par_is_rho(), ipar);
+		}
+		fp fp_iperp_of_perp(fp perp) const
+		{
+			return my_patch().fp_iang_of_ang(perp_is_rho(), perp);
+		}
+		fp fp_ipar_of_par(fp par) const
+		{
+			return my_patch().fp_iang_of_ang(par_is_rho(), par);
+		}
+		int iperp_of_perp(fp perp, jtutil::linear_map<fp>::noninteger_action
+									   nia = jtutil::linear_map<fp>::nia_error)
+		{
+			return my_patch().iang_of_ang(perp_is_rho(), perp, nia);
+		}
+		int ipar_of_par(fp par, jtutil::linear_map<fp>::noninteger_action
+									nia = jtutil::linear_map<fp>::nia_error)
+		{
+			return my_patch().iang_of_ang(par_is_rho(), par, nia);
+		}
+
+		// ... (perp,par) --> (rho,sigma)
+		int irho_of_iperp_ipar(int iperp, int ipar) const
+		{
+			return perp_is_rho() ? iperp : ipar;
+		}
+		int isigma_of_iperp_ipar(int iperp, int ipar) const
+		{
+			return perp_is_rho() ? ipar : iperp;
+		}
+		fp rho_of_perp_par(fp perp, fp par) const
+		{
+			return perp_is_rho() ? perp : par;
+		}
+		fp sigma_of_perp_par(fp perp, fp par) const
+		{
+			return perp_is_rho() ? par : perp;
+		}
+		// ... (rho,sigma) --> (perp,par)
+		int iperp_of_irho_isigma(int irho, int isigma) const
+		{
+			return perp_is_rho() ? irho : isigma;
+		}
+		int ipar_of_irho_isigma(int irho, int isigma) const
+		{
+			return par_is_rho() ? irho : isigma;
+		}
+		fp perp_of_rho_sigma(fp rho, fp sigma) const
+		{
+			return perp_is_rho() ? rho : sigma;
+		}
+		fp par_of_rho_sigma(fp rho, fp sigma) const
+		{
+			return par_is_rho() ? rho : sigma;
+		}
+
+		// outer perp of nominal grid on this edge
+		// ... this is outermost *grid point*
+		fp grid_outer_iperp() const
+		{
+			return my_patch().minmax_iang(is_min(), is_rho());
+		}
+		// ... this is actual outer edge of grid
+		//     (might be halfway between two grid points)
+		fp grid_outer_perp() const
+		{
+			return my_patch().minmax_ang(is_min(), is_rho());
+		}
+		// ... this is grid_outer_perp() converted back to the iperp
+		//     coordinate, but still returned as floating-point;
+		//     it will be either integer or half-integer
+		fp fp_grid_outer_iperp() const
+		{
+			return fp_iperp_of_perp(grid_outer_perp());
+		}
+
+		//
+		// ***** min/max/outer coordinates of edge *****
+		//
+
+		// min/max/size ipar of the edge
+		// (these are exteme limits for any iperp, a given ghost zone
+		//  or interpolation region may have tighter and/or iperp-dependent
+		// limits)
+		// ... not including corners
+		int min_ipar_without_corners() const
+		{
+			return my_patch().min_iang(par_is_rho());
+		}
+		int max_ipar_without_corners() const
+		{
+			return my_patch().max_iang(par_is_rho());
+		}
+		// ... including corners
+		int min_ipar_with_corners() const
+		{
+			return my_patch().ghosted_min_iang(par_is_rho());
+		}
+		int max_ipar_with_corners() const
+		{
+			return my_patch().ghosted_max_iang(par_is_rho());
+		}
+		// ... of the corners themselves
+		int min_ipar_corner__min_ipar() const
+		{
+			return min_ipar_with_corners();
+		}
+		int min_ipar_corner__max_ipar() const
+		{
+			return min_ipar_without_corners() - 1;
+		}
+		int max_ipar_corner__min_ipar() const
+		{
+			return max_ipar_without_corners() + 1;
+		}
+		int max_ipar_corner__max_ipar() const
+		{
+			return max_ipar_with_corners();
+		}
+
+		// membership predicates for ipar corners, non-corners
+		bool ipar_is_in_min_ipar_corner(int ipar) const
+		{
+			return (ipar >= min_ipar_corner__min_ipar()) && (ipar <= min_ipar_corner__max_ipar());
+		}
+		bool ipar_is_in_max_ipar_corner(int ipar) const
+		{
+			return (ipar >= max_ipar_corner__min_ipar()) && (ipar <= max_ipar_corner__max_ipar());
+		}
+		bool ipar_is_in_corner(int ipar) const
+		{
+			return ipar_is_in_min_ipar_corner(ipar) || ipar_is_in_max_ipar_corner(ipar);
+		}
+		bool ipar_is_in_noncorner(int ipar) const
+		{
+			return (ipar >= min_ipar_without_corners()) && (ipar <= max_ipar_without_corners());
+		}
+
+		// convenience function selecting amongst the above
+		// membership predicates
+		bool ipar_is_in_selected_part(bool want_corners,
+									  bool want_noncorner,
+									  int ipar)
+			const
+		{
+			return (want_corners && ipar_is_in_corner(ipar)) || (want_noncorner && ipar_is_in_noncorner(ipar));
+		}
+
+		// outer (farthest from patch center) iperp of nominal grid
+		int nominal_grid_outer_iperp() const
+		{
+			return my_patch()
+				.minmax_iang(is_min(), is_rho());
+		}
+
+		//
+		// ***** constructor, destructor *****
+		//
+
+		patch_edge(patch &my_patch_in,
+				   bool is_min_in, bool is_rho_in)
+			: my_patch_(my_patch_in),
+			  is_min_(is_min_in), is_rho_(is_rho_in)
+		{
+		}
+		// compiler-synthesized (no-op) destructor is fine
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		patch_edge(const patch_edge &rhs);
+		patch_edge &operator=(const patch_edge &rhs);
+
+	private:
+		patch &my_patch_;
+		const bool is_min_, is_rho_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* TPATCH_EDGE_H */
--- a/AMSS_NCKU_source/AHF_Direct/patch_info.C
+++ b/AMSS_NCKU_source/AHF_Direct/patch_info.C
@@ -1,187 +1,187 @@
-#include <stdio.h>
-#include <math.h>
-#include <assert.h>
-
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "cpm_map.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "patch_info.h"
-
-namespace AHFinderDirect
-	  {
-using jtutil::error_exit;
-
-//******************************************************************************
-//******************************************************************************
-//******************************************************************************
-
-//
-// This function computes, and returns a reference to, a
-//  struct grid_arrays::grid_array_pars  from the info in a
-//  struct patch_info  and the additional information in the arguments.
-//
-// The result refers to an internal static buffer in this function; the
-// usual caveats about lifetimes/overwriting apply.
-//
-// Arguments:
-// ghost_zone_width = Width in grid points of all ghost zones.
-// patch_extend_width = Number of grid points to extend each patch past
-//		     "just touching" so as to overlap neighboring patches.
-//		     Thus patches overlap by
-//			patch_overlap_width = 2*patch_extend_width + 1
-//		     grid points.  For example, with patch_extend_width == 2,
-//		     here are the grid points of two neighboring patches:
-//			x   x   x   x   x   X   X
-//                                      |
-//			        O   O   o   o   o   o   o
-//		     Here | marks the "just touching" boundary,
-//		     x and o the grid points before this extension,
-//		     and X and O the extra grid points added by this
-//		     extension.
-// N_zones_per_right_angle = This sets the grid spacing (same in both
-//			     directions) to 90.0 / N_zones_per_right_angle.
-//			     It's a fatal error (error_exit()) if this
-//			     doesn't evenly divide the grid sizes in both
-//			     directions.
-//
-const grid_arrays::grid_array_pars&
-  patch_info::grid_array_pars(int ghost_zone_width, int patch_extend_width,
-			      int N_zones_per_right_angle)
-	const
-{
-static
-  struct grid_arrays::grid_array_pars grid_array_pars_buffer;
-
-//
-// the values of min_(irho,isigma) are actually arbitrary, but for
-// debugging convenience it's handy to have (irho,isigma) ranges map
-// one-to-one with (rho,sigma) ranges across all patches; the assignments
-// here have this property
-//
-const fp delta_drho_dsigma = 90.0 / fp(N_zones_per_right_angle);
-grid_array_pars_buffer.min_irho
-	= jtutil::round<fp>::to_integer(min_drho  /delta_drho_dsigma);
-grid_array_pars_buffer.min_isigma
-	= jtutil::round<fp>::to_integer(min_dsigma/delta_drho_dsigma);
-
-verify_grid_spacing_ok(N_zones_per_right_angle);
-const int N_irho_zones
-	= jtutil::round<fp>::to_integer(
-		   fp(N_zones_per_right_angle) * (max_drho  -min_drho  ) / 90.0
-				       );
-const int N_isigma_zones
-	= jtutil::round<fp>::to_integer(
-		   fp(N_zones_per_right_angle) * (max_dsigma-min_dsigma) / 90.0
-				       );
-
-grid_array_pars_buffer.max_irho
-	= grid_array_pars_buffer.min_irho   + N_irho_zones;
-grid_array_pars_buffer.max_isigma
-	= grid_array_pars_buffer.min_isigma + N_isigma_zones;
-
-grid_array_pars_buffer.min_irho   -= patch_extend_width;
-grid_array_pars_buffer.min_isigma -= patch_extend_width;
-grid_array_pars_buffer.max_irho   += patch_extend_width;
-grid_array_pars_buffer.max_isigma += patch_extend_width;
-
-grid_array_pars_buffer.min_rho_ghost_zone_width = ghost_zone_width;
-grid_array_pars_buffer.max_rho_ghost_zone_width = ghost_zone_width;
-grid_array_pars_buffer.min_sigma_ghost_zone_width = ghost_zone_width;
-grid_array_pars_buffer.max_sigma_ghost_zone_width = ghost_zone_width;
-
-return grid_array_pars_buffer;
-}
-
-//******************************************************************************
-//
-//
-// This function computes, and returns a reference to, a
-//  struct grid_arrays::grid_pars  from the info in a  struct patch_info
-// and the additional information in the arguments.
-//
-// The result refers to an internal static buffer in this function; the
-// usual caveats about lifetimes/overwriting apply.
-//
-// Arguments:
-// patch_extend_width = Number of grid points to extend each patch past
-//		     "just touching" so as to overlap neighboring patches.
-//		     Thus patches overlap by  2*patch_extend_width + 1  grid
-//		     points.  For example, with patch_extend_width == 2, here
-//		     are the grid points of two neighboring patches:
-//			x   x   x   x   x   X   X
-//                                      |
-//			        O   O   o   o   o   o   o
-//		     Here | marks the "just touching" boundary,
-//		     x and o the grid points before this extension,
-//		     and X and O the extra grid points added by this
-//		     extension.
-// N_zones_per_right_angle = This sets the grid spacing (same in both
-//			     directions) to 90.0 / N_zones_per_right_angle.
-//			     It's a fatal error (error_exit()) if this
-//			     doesn't evenly divide the grid sizes in both
-//			     directions.
-//
-const grid::grid_pars& patch_info::grid_pars(int patch_extend_width,
-					     int N_zones_per_right_angle)
-	const
-{
-static
-  struct grid::grid_pars grid_pars_buffer;
-
-verify_grid_spacing_ok(N_zones_per_right_angle);
-const fp delta_drho_dsigma = 90.0 / fp(N_zones_per_right_angle);
-const fp extend_drho_dsigma = fp(patch_extend_width) * delta_drho_dsigma;
-
-grid_pars_buffer.  min_drho   = min_drho   - extend_drho_dsigma;
-grid_pars_buffer.delta_drho   = delta_drho_dsigma;
-grid_pars_buffer.  max_drho   = max_drho   + extend_drho_dsigma;
-grid_pars_buffer.  min_dsigma = min_dsigma - extend_drho_dsigma;
-grid_pars_buffer.delta_dsigma = delta_drho_dsigma;
-grid_pars_buffer.  max_dsigma = max_dsigma + extend_drho_dsigma;
-
-return grid_pars_buffer;
-}
-
-//******************************************************************************
-
-//
-// This function verifies that the grid spacing evenly divides the
-// grid sizes in both directions, and does an  error_exit()  if not.
-//
-// Arguments:
-// N_zones_per_right_angle = This sets the grid spacing (same in both
-//			     directions) to 90.0 / N_zones_per_right_angle.
-//
-void patch_info::verify_grid_spacing_ok(int N_zones_per_right_angle)
-	const
-{
-const fp N_irho_zones_fp
-	= fp(N_zones_per_right_angle) * (max_drho  -min_drho  ) / 90.0;
-const fp N_isigma_zones_fp
-	= fp(N_zones_per_right_angle) * (max_dsigma-min_dsigma) / 90.0;
-
-if (! (    jtutil::fuzzy<fp>::is_integer(N_irho_zones_fp)
-	&& jtutil::fuzzy<fp>::is_integer(N_isigma_zones_fp)    ) )
-   then error_exit(ERROR_EXIT,
-"***** patch_info::verify_grid_spacing_ok():\n"
-"        N_zones_per_right_angle=%d gives grid spacing which\n"
-"        doesn't evenly divide grid sizes!\n"
-"        [min,max]_drho=[%g,%g] [min,max]_dsigma=[%g,%g]\n"
-"        ==> N_irho_zones_fp=%g N_isigma_zones_fp=%g\n"
-		   ,
-		   N_zones_per_right_angle,
-		   double(min_drho), double(max_drho),
-		   double(min_dsigma), double(max_dsigma),
-		   double(N_irho_zones_fp), double(N_isigma_zones_fp));
-								/*NOTREACHED*/
-}
-
-	  }	// namespace AHFinderDirect
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "patch_info.h"
+
+namespace AHFinderDirect
+	  {
+using jtutil::error_exit;
+
+//******************************************************************************
+//******************************************************************************
+//******************************************************************************
+
+//
+// This function computes, and returns a reference to, a
+//  struct grid_arrays::grid_array_pars  from the info in a
+//  struct patch_info  and the additional information in the arguments.
+//
+// The result refers to an internal static buffer in this function; the
+// usual caveats about lifetimes/overwriting apply.
+//
+// Arguments:
+// ghost_zone_width = Width in grid points of all ghost zones.
+// patch_extend_width = Number of grid points to extend each patch past
+//		     "just touching" so as to overlap neighboring patches.
+//		     Thus patches overlap by
+//			patch_overlap_width = 2*patch_extend_width + 1
+//		     grid points.  For example, with patch_extend_width == 2,
+//		     here are the grid points of two neighboring patches:
+//			x   x   x   x   x   X   X
+//                                      |
+//			        O   O   o   o   o   o   o
+//		     Here | marks the "just touching" boundary,
+//		     x and o the grid points before this extension,
+//		     and X and O the extra grid points added by this
+//		     extension.
+// N_zones_per_right_angle = This sets the grid spacing (same in both
+//			     directions) to 90.0 / N_zones_per_right_angle.
+//			     It's a fatal error (error_exit()) if this
+//			     doesn't evenly divide the grid sizes in both
+//			     directions.
+//
+const grid_arrays::grid_array_pars&
+  patch_info::grid_array_pars(int ghost_zone_width, int patch_extend_width,
+			      int N_zones_per_right_angle)
+	const
+{
+static
+  struct grid_arrays::grid_array_pars grid_array_pars_buffer;
+
+//
+// the values of min_(irho,isigma) are actually arbitrary, but for
+// debugging convenience it's handy to have (irho,isigma) ranges map
+// one-to-one with (rho,sigma) ranges across all patches; the assignments
+// here have this property
+//
+const fp delta_drho_dsigma = 90.0 / fp(N_zones_per_right_angle);
+grid_array_pars_buffer.min_irho
+	= jtutil::round<fp>::to_integer(min_drho  /delta_drho_dsigma);
+grid_array_pars_buffer.min_isigma
+	= jtutil::round<fp>::to_integer(min_dsigma/delta_drho_dsigma);
+
+verify_grid_spacing_ok(N_zones_per_right_angle);
+const int N_irho_zones
+	= jtutil::round<fp>::to_integer(
+		   fp(N_zones_per_right_angle) * (max_drho  -min_drho  ) / 90.0
+				       );
+const int N_isigma_zones
+	= jtutil::round<fp>::to_integer(
+		   fp(N_zones_per_right_angle) * (max_dsigma-min_dsigma) / 90.0
+				       );
+
+grid_array_pars_buffer.max_irho
+	= grid_array_pars_buffer.min_irho   + N_irho_zones;
+grid_array_pars_buffer.max_isigma
+	= grid_array_pars_buffer.min_isigma + N_isigma_zones;
+
+grid_array_pars_buffer.min_irho   -= patch_extend_width;
+grid_array_pars_buffer.min_isigma -= patch_extend_width;
+grid_array_pars_buffer.max_irho   += patch_extend_width;
+grid_array_pars_buffer.max_isigma += patch_extend_width;
+
+grid_array_pars_buffer.min_rho_ghost_zone_width = ghost_zone_width;
+grid_array_pars_buffer.max_rho_ghost_zone_width = ghost_zone_width;
+grid_array_pars_buffer.min_sigma_ghost_zone_width = ghost_zone_width;
+grid_array_pars_buffer.max_sigma_ghost_zone_width = ghost_zone_width;
+
+return grid_array_pars_buffer;
+}
+
+//******************************************************************************
+//
+//
+// This function computes, and returns a reference to, a
+//  struct grid_arrays::grid_pars  from the info in a  struct patch_info
+// and the additional information in the arguments.
+//
+// The result refers to an internal static buffer in this function; the
+// usual caveats about lifetimes/overwriting apply.
+//
+// Arguments:
+// patch_extend_width = Number of grid points to extend each patch past
+//		     "just touching" so as to overlap neighboring patches.
+//		     Thus patches overlap by  2*patch_extend_width + 1  grid
+//		     points.  For example, with patch_extend_width == 2, here
+//		     are the grid points of two neighboring patches:
+//			x   x   x   x   x   X   X
+//                                      |
+//			        O   O   o   o   o   o   o
+//		     Here | marks the "just touching" boundary,
+//		     x and o the grid points before this extension,
+//		     and X and O the extra grid points added by this
+//		     extension.
+// N_zones_per_right_angle = This sets the grid spacing (same in both
+//			     directions) to 90.0 / N_zones_per_right_angle.
+//			     It's a fatal error (error_exit()) if this
+//			     doesn't evenly divide the grid sizes in both
+//			     directions.
+//
+const grid::grid_pars& patch_info::grid_pars(int patch_extend_width,
+					     int N_zones_per_right_angle)
+	const
+{
+static
+  struct grid::grid_pars grid_pars_buffer;
+
+verify_grid_spacing_ok(N_zones_per_right_angle);
+const fp delta_drho_dsigma = 90.0 / fp(N_zones_per_right_angle);
+const fp extend_drho_dsigma = fp(patch_extend_width) * delta_drho_dsigma;
+
+grid_pars_buffer.  min_drho   = min_drho   - extend_drho_dsigma;
+grid_pars_buffer.delta_drho   = delta_drho_dsigma;
+grid_pars_buffer.  max_drho   = max_drho   + extend_drho_dsigma;
+grid_pars_buffer.  min_dsigma = min_dsigma - extend_drho_dsigma;
+grid_pars_buffer.delta_dsigma = delta_drho_dsigma;
+grid_pars_buffer.  max_dsigma = max_dsigma + extend_drho_dsigma;
+
+return grid_pars_buffer;
+}
+
+//******************************************************************************
+
+//
+// This function verifies that the grid spacing evenly divides the
+// grid sizes in both directions, and does an  error_exit()  if not.
+//
+// Arguments:
+// N_zones_per_right_angle = This sets the grid spacing (same in both
+//			     directions) to 90.0 / N_zones_per_right_angle.
+//
+void patch_info::verify_grid_spacing_ok(int N_zones_per_right_angle)
+	const
+{
+const fp N_irho_zones_fp
+	= fp(N_zones_per_right_angle) * (max_drho  -min_drho  ) / 90.0;
+const fp N_isigma_zones_fp
+	= fp(N_zones_per_right_angle) * (max_dsigma-min_dsigma) / 90.0;
+
+if (! (    jtutil::fuzzy<fp>::is_integer(N_irho_zones_fp)
+	&& jtutil::fuzzy<fp>::is_integer(N_isigma_zones_fp)    ) )
+   then error_exit(ERROR_EXIT,
+"***** patch_info::verify_grid_spacing_ok():\n"
+"        N_zones_per_right_angle=%d gives grid spacing which\n"
+"        doesn't evenly divide grid sizes!\n"
+"        [min,max]_drho=[%g,%g] [min,max]_dsigma=[%g,%g]\n"
+"        ==> N_irho_zones_fp=%g N_isigma_zones_fp=%g\n"
+		   ,
+		   N_zones_per_right_angle,
+		   double(min_drho), double(max_drho),
+		   double(min_dsigma), double(max_dsigma),
+		   double(N_irho_zones_fp), double(N_isigma_zones_fp));
+								/*NOTREACHED*/
+}
+
+	  }	// namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch_info.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_info.h
@@ -1,70 +1,70 @@
-namespace AHFinderDirect
-{
-
-	//*****************************************************************************
-
-	//
-	// This (POD, and hence static-initializable) struct gives a minimal
-	// set of information which varies from one patch to another.
-	//
-	// The member functions allow computing all the grid:: constructor
-	// arguments; with these in hand it's fairly easy to construct the
-	// patch itself.  This scheme doesn't allow the most general possible
-	// type of patch (eg it constrains all ghost zones to have the same width,
-	// and it requires the grid spacing to evenly divide 90 degrees), but
-	// it does cover all the cases that seem to come up in practice.
-	//
-	// Arguments for member functions:
-	// ghost_zone_width = Width in grid points of all ghost zones.
-	// patch_extend_width = Number of grid points to extend each patch past
-	//		     "just touching" so as to overlap neighboring patches.
-	//		     Thus patches overlap by
-	//			patch_overlap_width = 2*patch_extend_width + 1
-	//		     grid points.  For example, with patch_extend_width == 2,
-	//		     here are the grid points of two neighboring patches:
-	//			x   x   x   x   x   X   X
-	//                                      |
-	//			        O   O   o   o   o   o   o
-	//		     Here | marks the "just touching" boundary,
-	//		     x and o the grid points before this extension,
-	//		     and X and O the extra grid points added by this
-	//		     extension.
-	// N_zones_per_right_angle = This sets the grid spacing (same in both
-	//			     directions) to 90.0 / N_zones_per_right_angle.
-	//			     It's a fatal error (error_exit()) if this
-	//			     doesn't evenly divide the grid sizes in both
-	//			     directions.
-	//
-	struct patch_info
-	{
-		const char *name;
-		bool is_plus;
-		char ctype;
-		fp min_drho, max_drho;
-		fp min_dsigma, max_dsigma;
-
-		// compute and return reference to  struct grid_arrays::grid_array_pars
-		// ... result refers to internal static buffer;
-		//     the usual caveats about lifetimes/overwriting apply
-		const grid_arrays::grid_array_pars &
-		grid_array_pars(int ghost_zone_width, int patch_extend_width,
-						int N_zones_per_right_angle)
-			const;
-
-		// compute and return reference to  struct grid::grid_pars
-		// ... result refers to internal static buffer;
-		//     the usual caveats about lifetimes/overwriting apply
-		const grid::grid_pars &grid_pars(int patch_extend_width,
-										 int N_zones_per_right_angle)
-			const;
-
-	private:
-		// verify that grid spacing evenly divides grid sizes
-		// in both directions; no-op if ok, error_exit() if not ok
-		void verify_grid_spacing_ok(int N_zones_per_right_angle)
-			const;
-	};
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
+namespace AHFinderDirect
+{
+
+	//*****************************************************************************
+
+	//
+	// This (POD, and hence static-initializable) struct gives a minimal
+	// set of information which varies from one patch to another.
+	//
+	// The member functions allow computing all the grid:: constructor
+	// arguments; with these in hand it's fairly easy to construct the
+	// patch itself.  This scheme doesn't allow the most general possible
+	// type of patch (eg it constrains all ghost zones to have the same width,
+	// and it requires the grid spacing to evenly divide 90 degrees), but
+	// it does cover all the cases that seem to come up in practice.
+	//
+	// Arguments for member functions:
+	// ghost_zone_width = Width in grid points of all ghost zones.
+	// patch_extend_width = Number of grid points to extend each patch past
+	//		     "just touching" so as to overlap neighboring patches.
+	//		     Thus patches overlap by
+	//			patch_overlap_width = 2*patch_extend_width + 1
+	//		     grid points.  For example, with patch_extend_width == 2,
+	//		     here are the grid points of two neighboring patches:
+	//			x   x   x   x   x   X   X
+	//                                      |
+	//			        O   O   o   o   o   o   o
+	//		     Here | marks the "just touching" boundary,
+	//		     x and o the grid points before this extension,
+	//		     and X and O the extra grid points added by this
+	//		     extension.
+	// N_zones_per_right_angle = This sets the grid spacing (same in both
+	//			     directions) to 90.0 / N_zones_per_right_angle.
+	//			     It's a fatal error (error_exit()) if this
+	//			     doesn't evenly divide the grid sizes in both
+	//			     directions.
+	//
+	struct patch_info
+	{
+		const char *name;
+		bool is_plus;
+		char ctype;
+		fp min_drho, max_drho;
+		fp min_dsigma, max_dsigma;
+
+		// compute and return reference to  struct grid_arrays::grid_array_pars
+		// ... result refers to internal static buffer;
+		//     the usual caveats about lifetimes/overwriting apply
+		const grid_arrays::grid_array_pars &
+		grid_array_pars(int ghost_zone_width, int patch_extend_width,
+						int N_zones_per_right_angle)
+			const;
+
+		// compute and return reference to  struct grid::grid_pars
+		// ... result refers to internal static buffer;
+		//     the usual caveats about lifetimes/overwriting apply
+		const grid::grid_pars &grid_pars(int patch_extend_width,
+										 int N_zones_per_right_angle)
+			const;
+
+	private:
+		// verify that grid spacing evenly divides grid sizes
+		// in both directions; no-op if ok, error_exit() if not ok
+		void verify_grid_spacing_ok(int N_zones_per_right_angle)
+			const;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch_interp.C
+++ b/AMSS_NCKU_source/AHF_Direct/patch_interp.C
@@ -1,360 +1,360 @@
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-
-#include "util_Table.h"
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "cpm_map.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "fd_grid.h"
-#include "patch.h"
-#include "patch_edge.h"
-#include "patch_interp.h"
-#include "ghost_zone.h"
-
-namespace AHFinderDirect
-{
-	int lagrange_interp(double coor_orin, double dx, double *gf,
-						int PTS, double ipx, double *out, int *mposn, double *Jac,
-						int ORD) // ORD-1 order lagrange interpolation
-	{
-		assert(PTS >= ORD);
-		int mi, mf;
-
-		double *L, *x;
-		L = new double[PTS];
-		x = new double[PTS];
-		int i, j, k;
-
-		//-- Determine molecular range
-		//   for odd points, say 5, the molecular is
-		//             |
-		//   +-----+---x-+-----+-----+
-		//
-		mi = jtutil::round<double>::ceiling((ipx - coor_orin) / dx) - ORD / 2;
-		mf = mi + ORD;
-		if (mi < 0)
-		{
-			mi = 0;
-			mf = ORD;
-		}
-		else if (mf > PTS)
-		{
-			mf = PTS;
-			mi = PTS - ORD;
-		}
-
-		//-- Setup coordinate by input origin, dx
-		for (j = mi; j < mf; j++)
-			x[j] = coor_orin + j * dx;
-
-		//-- Lagrange basis function
-		*out = 0;
-		for (i = mi; i < mf; i++)
-		{
-			L[i] = 1.0;
-			for (k = mi; k < mf; k++)
-				if (k != i)
-				{
-					L[i] *= (ipx - x[k]) / (x[i] - x[k]);
-				}
-			*out += *(gf + i) * L[i];
-			*Jac = L[i];
-			Jac++;
-		}
-
-		*mposn = mi;
-
-		delete[] L;
-		delete[] x;
-
-		return 0; // Normal retrun
-	}
-
-	using jtutil::error_exit;
-
-	patch_interp::patch_interp(const patch_edge &my_edge_in,
-							   int min_iperp_in, int max_iperp_in,
-							   const jtutil::array1d<int> &min_parindex_array_in,
-							   const jtutil::array1d<int> &max_parindex_array_in,
-							   const jtutil::array2d<fp> &interp_par_in,
-							   bool ok_to_use_min_par_ghost_zone,
-							   bool ok_to_use_max_par_ghost_zone,
-							   int interp_handle_in, int interp_par_table_handle_in)
-		: my_patch_(my_edge_in.my_patch()),
-		  my_edge_(my_edge_in),
-		  min_gfn_(my_patch().ghosted_min_gfn()),
-		  max_gfn_(my_patch().ghosted_max_gfn()),
-		  ok_to_use_min_par_ghost_zone_(ok_to_use_min_par_ghost_zone),
-		  ok_to_use_max_par_ghost_zone_(ok_to_use_max_par_ghost_zone),
-		  min_iperp_(min_iperp_in), max_iperp_(max_iperp_in),
-		  min_ipar_(ok_to_use_min_par_ghost_zone
-						? my_edge_in.min_ipar_with_corners()
-						: my_edge_in.min_ipar_without_corners()),
-		  max_ipar_(ok_to_use_max_par_ghost_zone
-						? my_edge_in.max_ipar_with_corners()
-						: my_edge_in.max_ipar_without_corners()),
-		  min_parindex_array_(min_parindex_array_in),
-		  max_parindex_array_(max_parindex_array_in),
-		  interp_par_(interp_par_in),
-		  interp_handle_(interp_handle_in),
-		  interp_par_table_handle_(1),
-		  gridfn_coord_origin_(my_edge().par_map().fp_of_int(min_ipar_)),
-		  gridfn_coord_delta_(my_edge().par_map().delta_fp()),
-		  gridfn_data_ptrs_(min_gfn_, max_gfn_),
-		  interp_data_buffer_ptrs_(min_gfn_, max_gfn_) // no comma
-	{
-		int status;
-
-		const CCTK_INT stride = my_edge().ghosted_par_stride();
-
-		status = 0;
-		if (status < 0)
-			then error_exit(ERROR_EXIT,
-							"***** patch_interp::patch_interp():\n"
-							"        can't set gridfn stride in interpolator parmameter table!\n"
-							"        error status=%d\n",
-							status); /*NOTREACHED*/
-	}
-
-	patch_interp::~patch_interp()
-	{
-	}
-
-	void patch_interp::interpolate(int ghosted_min_gfn_to_interp,
-								   int ghosted_max_gfn_to_interp,
-								   jtutil::array3d<fp> &data_buffer,
-								   jtutil::array2d<CCTK_INT> &posn_buffer,
-								   jtutil::array3d<fp> &Jacobian_buffer)
-		const
-
-	{
-		int status;
-
-		const int N_dims = 1;
-		const int N_gridfns = jtutil::how_many_in_range(ghosted_min_gfn_to_interp,
-														ghosted_max_gfn_to_interp);
-		const CCTK_INT N_gridfn_data_points = jtutil::how_many_in_range(min_ipar(), max_ipar());
-
-		//--  Jacobian
-		const int Jacobian_interp_point_stride = Jacobian_buffer.subscript_stride_j();
-
-		//
-		// do the interpolations at each iperp
-		//
-		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
-		{
-			//
-			// interpolation-point coordinates
-			//
-			const int min_parindex = min_parindex_array_(iperp);
-			const int max_parindex = max_parindex_array_(iperp);
-			const CCTK_INT N_interp_points = jtutil::how_many_in_range(min_parindex, max_parindex);
-			const fp *const interp_coords_ptr = &interp_par_(iperp, min_parindex);
-			const void *const interp_coords[N_dims] = {static_cast<const void *>(interp_coords_ptr)};
-
-			//
-			// pointers to gridfn data to interpolate, and to result buffer
-			//
-			for (int ghosted_gfn = ghosted_min_gfn_to_interp;
-				 ghosted_gfn <= ghosted_max_gfn_to_interp;
-				 ++ghosted_gfn)
-			{
-				// set up data pointer to --> (iperp,min_ipar) gridfn
-				const int start_irho = my_edge().irho_of_iperp_ipar(iperp, min_ipar());
-				const int start_isigma = my_edge().isigma_of_iperp_ipar(iperp, min_ipar());
-				gridfn_data_ptrs_(ghosted_gfn) = static_cast<const void *>(
-					&my_patch()
-						 .ghosted_gridfn(ghosted_gfn,
-										 start_irho, start_isigma));
-				interp_data_buffer_ptrs_(ghosted_gfn) = static_cast<void *>(
-					&data_buffer(ghosted_gfn, iperp, min_parindex));
-			}
-			const void *const *const gridfn_data = &gridfn_data_ptrs_(ghosted_min_gfn_to_interp);
-			void *const *const interp_buffer = &interp_data_buffer_ptrs_(ghosted_min_gfn_to_interp);
-
-			//--  molecule position
-			CCTK_POINTER molecule_posn_ptrs[N_dims] = {static_cast<CCTK_POINTER>(&posn_buffer(iperp, min_parindex))};
-			//--  Jacobian
-			CCTK_POINTER const Jacobian_ptrs[1] //[N_gridfns]
-				= {static_cast<CCTK_POINTER>(
-					&Jacobian_buffer(iperp, min_parindex, 0))};
-			// Jacobian_buffer has continuous memory allocation.
-
-			const CCTK_INT stride = my_edge().ghosted_par_stride();
-			double y[N_gridfn_data_points];
-
-			for (int i = 0; i < N_gridfn_data_points; i++)
-			{
-				y[i] = *((double *)(*gridfn_data) + stride * i);
-			}
-
-			const int ORD = 6;
-			double Jac[ORD];
-			int posn; // of molecular, starting from 0
-			for (int i = 0; i < N_interp_points; i++)
-			{
-				status = lagrange_interp(gridfn_coord_origin_, gridfn_coord_delta_,
-										 y, N_gridfn_data_points,
-										 *((double *)interp_coords[0] + i), ((double *)(*interp_buffer) + i),
-										 &posn, Jac, ORD);
-
-				*((int *)molecule_posn_ptrs[0] + i) = posn + 2;
-
-				memcpy((double *)(Jacobian_ptrs[0]) + Jacobian_buffer.min_k() +
-						   Jacobian_interp_point_stride * i,
-					   Jac, sizeof(Jac));
-			}
-
-			// convert the molecule positions from  parindex-min_ipar
-			// to  parindex  values (again, cf comments on array subscripting
-			// at the start of "patch_interp.hh")
-			for (int parindex = min_parindex;
-				 parindex <= max_parindex;
-				 ++parindex)
-			{
-				posn_buffer(iperp, parindex) += min_ipar();
-			}
-
-			if (status < 0)
-				then error_exit(ERROR_EXIT,
-								"***** patch_interp::interpolate():\n"
-								"        error return %d from interpolator at iperp=%d of [%d,%d]!\n"
-								"        my_patch()=\"%s\" my_edge()=\"%s\"\n",
-								status, iperp, min_iperp(), max_iperp(),
-								my_patch().name(), my_edge().name()); /*NOTREACHED*/
-
-		} // end for iperp
-	}
-
-	void patch_interp::verify_Jacobian_sparsity_pattern_ok()
-		const
-	{
-		CCTK_INT MSS_is_fn_of_interp_coords = 0, MSS_is_fn_of_input_array_values = 0;
-		CCTK_INT Jacobian_is_fn_of_input_array_values = 0;
-
-		//
-		// verify that we grok the Jacobian sparsity pattern
-		//
-		if (MSS_is_fn_of_interp_coords || MSS_is_fn_of_input_array_values || Jacobian_is_fn_of_input_array_values)
-			then error_exit(ERROR_EXIT,
-							"***** patch_interp::verify_Jacobian_sparsity_pattern_ok():\n"
-							"        implementation restriction: we only grok Jacobians with\n"
-							"        fixed-sized hypercube-shaped molecules, independent of\n"
-							"        the interpolation coordinates and the floating-point values!\n"
-							"        MSS_is_fn_of_interp_coords=(int)%d (we only grok 0)\n"
-							"        MSS_is_fn_of_input_array_values=(int)%d (we only grok 0)\n"
-							"        Jacobian_is_fn_of_input_array_values=(int)%d (we only grok 0)\n",
-							MSS_is_fn_of_interp_coords,
-							MSS_is_fn_of_input_array_values,
-							Jacobian_is_fn_of_input_array_values);
-	}
-
-	//******************************************************************************
-
-	//
-	// This function queries the interpolator to get the [min,max] ipar m
-	// coordinates of the interpolation molecules.
-	//
-	// (This API implicitly assumes that the Jacobian sparsity is one which
-	// is "ok" as verified by  verify_Jacobian_sparsity_pattern_ok() .)
-	//
-	void patch_interp::molecule_minmax_ipar_m(int &min_ipar_m, int &max_ipar_m)
-		const
-	{
-		min_ipar_m = -2;
-		max_ipar_m = 3;
-	}
-
-	//******************************************************************************
-
-	//
-	// This function queries the interpolator at each iperp to find out the
-	// molecule ipar positions (which we implicitly assume to be independent
-	// of ghosted_gfn), and stores these in  posn_buffer(iperp, parindex) .
-	//
-	// (This API implicitly assumes that the Jacobian sparsity is one which
-	// is "ok" as verified by  verify_Jacobian_sparsity_pattern_ok() .)
-	//
-	void patch_interp::molecule_posn(jtutil::array2d<CCTK_INT> &posn_buffer)
-		const
-	{
-		const int N_dims = 1;
-		int status;
-
-		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
-		{
-			const int min_parindex = min_parindex_array_(iperp);
-			const int max_parindex = max_parindex_array_(iperp);
-
-			// set up the molecule-position query in the parameter table
-			CCTK_POINTER molecule_posn_ptrs[N_dims] = {static_cast<CCTK_POINTER>(&posn_buffer(iperp, min_parindex))};
-			status = 0; // Util_TableSetPointerArray(interp_par_table_handle_, N_dims,
-						//               molecule_posn_ptrs, "molecule_positions");
-
-			if (status < 0)
-				then error_exit(ERROR_EXIT,
-								"***** patch_interp::molecule_posn():\n"
-								"        can't set molecule position query\n"
-								"        in interpolator parmameter table at iperp=%d of [%d,%d]!\n"
-								"        error status=%d\n",
-								iperp, min_iperp(), max_iperp(),
-								status); /*NOTREACHED*/
-
-			for (int parindex = min_parindex;
-				 parindex <= max_parindex;
-				 ++parindex)
-			{
-				posn_buffer(iperp, parindex) += min_ipar();
-			}
-		}
-	}
-
-	void patch_interp::Jacobian(jtutil::array3d<fp> &Jacobian_buffer)
-		const
-	{
-		const int N_dims = 1;
-		const int N_gridfns = 1;
-
-		int status1, status2;
-
-		//
-		// set Jacobian stride info in parameter table
-		//
-		const int Jacobian_interp_point_stride = Jacobian_buffer.subscript_stride_j();
-
-		status1 = 0;
-
-		status2 = 0;
-
-		if ((status1 < 0) || (status2 < 0))
-			then error_exit(ERROR_EXIT,
-							"***** patch_interp::Jacobian():\n"
-							"        can't set Jacobian stride info in interpolator parmameter table!\n"
-							"        error status1=%d status2=%d\n",
-							status1, status2);
-
-		//
-		// query the Jacobians at each iperp
-		//
-		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
-		{
-			const int min_parindex = min_parindex_array_(iperp);
-			const int max_parindex = max_parindex_array_(iperp);
-
-			//
-			// set up the Jacobian query in the parameter table
-			//
-			CCTK_POINTER const Jacobian_ptrs[N_gridfns] = {static_cast<CCTK_POINTER>(
-				&Jacobian_buffer(iperp, min_parindex, 0))};
-		}
-	}
-} // namespace AHFinderDirect
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+
+namespace AHFinderDirect
+{
+	int lagrange_interp(double coor_orin, double dx, double *gf,
+						int PTS, double ipx, double *out, int *mposn, double *Jac,
+						int ORD) // ORD-1 order lagrange interpolation
+	{
+		assert(PTS >= ORD);
+		int mi, mf;
+
+		double *L, *x;
+		L = new double[PTS];
+		x = new double[PTS];
+		int i, j, k;
+
+		//-- Determine molecular range
+		//   for odd points, say 5, the molecular is
+		//             |
+		//   +-----+---x-+-----+-----+
+		//
+		mi = jtutil::round<double>::ceiling((ipx - coor_orin) / dx) - ORD / 2;
+		mf = mi + ORD;
+		if (mi < 0)
+		{
+			mi = 0;
+			mf = ORD;
+		}
+		else if (mf > PTS)
+		{
+			mf = PTS;
+			mi = PTS - ORD;
+		}
+
+		//-- Setup coordinate by input origin, dx
+		for (j = mi; j < mf; j++)
+			x[j] = coor_orin + j * dx;
+
+		//-- Lagrange basis function
+		*out = 0;
+		for (i = mi; i < mf; i++)
+		{
+			L[i] = 1.0;
+			for (k = mi; k < mf; k++)
+				if (k != i)
+				{
+					L[i] *= (ipx - x[k]) / (x[i] - x[k]);
+				}
+			*out += *(gf + i) * L[i];
+			*Jac = L[i];
+			Jac++;
+		}
+
+		*mposn = mi;
+
+		delete[] L;
+		delete[] x;
+
+		return 0; // Normal retrun
+	}
+
+	using jtutil::error_exit;
+
+	patch_interp::patch_interp(const patch_edge &my_edge_in,
+							   int min_iperp_in, int max_iperp_in,
+							   const jtutil::array1d<int> &min_parindex_array_in,
+							   const jtutil::array1d<int> &max_parindex_array_in,
+							   const jtutil::array2d<fp> &interp_par_in,
+							   bool ok_to_use_min_par_ghost_zone,
+							   bool ok_to_use_max_par_ghost_zone,
+							   int interp_handle_in, int interp_par_table_handle_in)
+		: my_patch_(my_edge_in.my_patch()),
+		  my_edge_(my_edge_in),
+		  min_gfn_(my_patch().ghosted_min_gfn()),
+		  max_gfn_(my_patch().ghosted_max_gfn()),
+		  ok_to_use_min_par_ghost_zone_(ok_to_use_min_par_ghost_zone),
+		  ok_to_use_max_par_ghost_zone_(ok_to_use_max_par_ghost_zone),
+		  min_iperp_(min_iperp_in), max_iperp_(max_iperp_in),
+		  min_ipar_(ok_to_use_min_par_ghost_zone
+						? my_edge_in.min_ipar_with_corners()
+						: my_edge_in.min_ipar_without_corners()),
+		  max_ipar_(ok_to_use_max_par_ghost_zone
+						? my_edge_in.max_ipar_with_corners()
+						: my_edge_in.max_ipar_without_corners()),
+		  min_parindex_array_(min_parindex_array_in),
+		  max_parindex_array_(max_parindex_array_in),
+		  interp_par_(interp_par_in),
+		  interp_handle_(interp_handle_in),
+		  interp_par_table_handle_(1),
+		  gridfn_coord_origin_(my_edge().par_map().fp_of_int(min_ipar_)),
+		  gridfn_coord_delta_(my_edge().par_map().delta_fp()),
+		  gridfn_data_ptrs_(min_gfn_, max_gfn_),
+		  interp_data_buffer_ptrs_(min_gfn_, max_gfn_) // no comma
+	{
+		int status;
+
+		const CCTK_INT stride = my_edge().ghosted_par_stride();
+
+		status = 0;
+		if (status < 0)
+			then error_exit(ERROR_EXIT,
+							"***** patch_interp::patch_interp():\n"
+							"        can't set gridfn stride in interpolator parmameter table!\n"
+							"        error status=%d\n",
+							status); /*NOTREACHED*/
+	}
+
+	patch_interp::~patch_interp()
+	{
+	}
+
+	void patch_interp::interpolate(int ghosted_min_gfn_to_interp,
+								   int ghosted_max_gfn_to_interp,
+								   jtutil::array3d<fp> &data_buffer,
+								   jtutil::array2d<CCTK_INT> &posn_buffer,
+								   jtutil::array3d<fp> &Jacobian_buffer)
+		const
+
+	{
+		int status;
+
+		const int N_dims = 1;
+		const int N_gridfns = jtutil::how_many_in_range(ghosted_min_gfn_to_interp,
+														ghosted_max_gfn_to_interp);
+		const CCTK_INT N_gridfn_data_points = jtutil::how_many_in_range(min_ipar(), max_ipar());
+
+		//--  Jacobian
+		const int Jacobian_interp_point_stride = Jacobian_buffer.subscript_stride_j();
+
+		//
+		// do the interpolations at each iperp
+		//
+		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+		{
+			//
+			// interpolation-point coordinates
+			//
+			const int min_parindex = min_parindex_array_(iperp);
+			const int max_parindex = max_parindex_array_(iperp);
+			const CCTK_INT N_interp_points = jtutil::how_many_in_range(min_parindex, max_parindex);
+			const fp *const interp_coords_ptr = &interp_par_(iperp, min_parindex);
+			const void *const interp_coords[N_dims] = {static_cast<const void *>(interp_coords_ptr)};
+
+			//
+			// pointers to gridfn data to interpolate, and to result buffer
+			//
+			for (int ghosted_gfn = ghosted_min_gfn_to_interp;
+				 ghosted_gfn <= ghosted_max_gfn_to_interp;
+				 ++ghosted_gfn)
+			{
+				// set up data pointer to --> (iperp,min_ipar) gridfn
+				const int start_irho = my_edge().irho_of_iperp_ipar(iperp, min_ipar());
+				const int start_isigma = my_edge().isigma_of_iperp_ipar(iperp, min_ipar());
+				gridfn_data_ptrs_(ghosted_gfn) = static_cast<const void *>(
+					&my_patch()
+						 .ghosted_gridfn(ghosted_gfn,
+										 start_irho, start_isigma));
+				interp_data_buffer_ptrs_(ghosted_gfn) = static_cast<void *>(
+					&data_buffer(ghosted_gfn, iperp, min_parindex));
+			}
+			const void *const *const gridfn_data = &gridfn_data_ptrs_(ghosted_min_gfn_to_interp);
+			void *const *const interp_buffer = &interp_data_buffer_ptrs_(ghosted_min_gfn_to_interp);
+
+			//--  molecule position
+			CCTK_POINTER molecule_posn_ptrs[N_dims] = {static_cast<CCTK_POINTER>(&posn_buffer(iperp, min_parindex))};
+			//--  Jacobian
+			CCTK_POINTER const Jacobian_ptrs[1] //[N_gridfns]
+				= {static_cast<CCTK_POINTER>(
+					&Jacobian_buffer(iperp, min_parindex, 0))};
+			// Jacobian_buffer has continuous memory allocation.
+
+			const CCTK_INT stride = my_edge().ghosted_par_stride();
+			double y[N_gridfn_data_points];
+
+			for (int i = 0; i < N_gridfn_data_points; i++)
+			{
+				y[i] = *((double *)(*gridfn_data) + stride * i);
+			}
+
+			const int ORD = 6;
+			double Jac[ORD];
+			int posn; // of molecular, starting from 0
+			for (int i = 0; i < N_interp_points; i++)
+			{
+				status = lagrange_interp(gridfn_coord_origin_, gridfn_coord_delta_,
+										 y, N_gridfn_data_points,
+										 *((double *)interp_coords[0] + i), ((double *)(*interp_buffer) + i),
+										 &posn, Jac, ORD);
+
+				*((int *)molecule_posn_ptrs[0] + i) = posn + 2;
+
+				memcpy((double *)(Jacobian_ptrs[0]) + Jacobian_buffer.min_k() +
+						   Jacobian_interp_point_stride * i,
+					   Jac, sizeof(Jac));
+			}
+
+			// convert the molecule positions from  parindex-min_ipar
+			// to  parindex  values (again, cf comments on array subscripting
+			// at the start of "patch_interp.hh")
+			for (int parindex = min_parindex;
+				 parindex <= max_parindex;
+				 ++parindex)
+			{
+				posn_buffer(iperp, parindex) += min_ipar();
+			}
+
+			if (status < 0)
+				then error_exit(ERROR_EXIT,
+								"***** patch_interp::interpolate():\n"
+								"        error return %d from interpolator at iperp=%d of [%d,%d]!\n"
+								"        my_patch()=\"%s\" my_edge()=\"%s\"\n",
+								status, iperp, min_iperp(), max_iperp(),
+								my_patch().name(), my_edge().name()); /*NOTREACHED*/
+
+		} // end for iperp
+	}
+
+	void patch_interp::verify_Jacobian_sparsity_pattern_ok()
+		const
+	{
+		CCTK_INT MSS_is_fn_of_interp_coords = 0, MSS_is_fn_of_input_array_values = 0;
+		CCTK_INT Jacobian_is_fn_of_input_array_values = 0;
+
+		//
+		// verify that we grok the Jacobian sparsity pattern
+		//
+		if (MSS_is_fn_of_interp_coords || MSS_is_fn_of_input_array_values || Jacobian_is_fn_of_input_array_values)
+			then error_exit(ERROR_EXIT,
+							"***** patch_interp::verify_Jacobian_sparsity_pattern_ok():\n"
+							"        implementation restriction: we only grok Jacobians with\n"
+							"        fixed-sized hypercube-shaped molecules, independent of\n"
+							"        the interpolation coordinates and the floating-point values!\n"
+							"        MSS_is_fn_of_interp_coords=(int)%d (we only grok 0)\n"
+							"        MSS_is_fn_of_input_array_values=(int)%d (we only grok 0)\n"
+							"        Jacobian_is_fn_of_input_array_values=(int)%d (we only grok 0)\n",
+							MSS_is_fn_of_interp_coords,
+							MSS_is_fn_of_input_array_values,
+							Jacobian_is_fn_of_input_array_values);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function queries the interpolator to get the [min,max] ipar m
+	// coordinates of the interpolation molecules.
+	//
+	// (This API implicitly assumes that the Jacobian sparsity is one which
+	// is "ok" as verified by  verify_Jacobian_sparsity_pattern_ok() .)
+	//
+	void patch_interp::molecule_minmax_ipar_m(int &min_ipar_m, int &max_ipar_m)
+		const
+	{
+		min_ipar_m = -2;
+		max_ipar_m = 3;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function queries the interpolator at each iperp to find out the
+	// molecule ipar positions (which we implicitly assume to be independent
+	// of ghosted_gfn), and stores these in  posn_buffer(iperp, parindex) .
+	//
+	// (This API implicitly assumes that the Jacobian sparsity is one which
+	// is "ok" as verified by  verify_Jacobian_sparsity_pattern_ok() .)
+	//
+	void patch_interp::molecule_posn(jtutil::array2d<CCTK_INT> &posn_buffer)
+		const
+	{
+		const int N_dims = 1;
+		int status;
+
+		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+		{
+			const int min_parindex = min_parindex_array_(iperp);
+			const int max_parindex = max_parindex_array_(iperp);
+
+			// set up the molecule-position query in the parameter table
+			CCTK_POINTER molecule_posn_ptrs[N_dims] = {static_cast<CCTK_POINTER>(&posn_buffer(iperp, min_parindex))};
+			status = 0; // Util_TableSetPointerArray(interp_par_table_handle_, N_dims,
+						//               molecule_posn_ptrs, "molecule_positions");
+
+			if (status < 0)
+				then error_exit(ERROR_EXIT,
+								"***** patch_interp::molecule_posn():\n"
+								"        can't set molecule position query\n"
+								"        in interpolator parmameter table at iperp=%d of [%d,%d]!\n"
+								"        error status=%d\n",
+								iperp, min_iperp(), max_iperp(),
+								status); /*NOTREACHED*/
+
+			for (int parindex = min_parindex;
+				 parindex <= max_parindex;
+				 ++parindex)
+			{
+				posn_buffer(iperp, parindex) += min_ipar();
+			}
+		}
+	}
+
+	void patch_interp::Jacobian(jtutil::array3d<fp> &Jacobian_buffer)
+		const
+	{
+		const int N_dims = 1;
+		const int N_gridfns = 1;
+
+		int status1, status2;
+
+		//
+		// set Jacobian stride info in parameter table
+		//
+		const int Jacobian_interp_point_stride = Jacobian_buffer.subscript_stride_j();
+
+		status1 = 0;
+
+		status2 = 0;
+
+		if ((status1 < 0) || (status2 < 0))
+			then error_exit(ERROR_EXIT,
+							"***** patch_interp::Jacobian():\n"
+							"        can't set Jacobian stride info in interpolator parmameter table!\n"
+							"        error status1=%d status2=%d\n",
+							status1, status2);
+
+		//
+		// query the Jacobians at each iperp
+		//
+		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+		{
+			const int min_parindex = min_parindex_array_(iperp);
+			const int max_parindex = max_parindex_array_(iperp);
+
+			//
+			// set up the Jacobian query in the parameter table
+			//
+			CCTK_POINTER const Jacobian_ptrs[N_gridfns] = {static_cast<CCTK_POINTER>(
+				&Jacobian_buffer(iperp, min_parindex, 0))};
+		}
+	}
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch_interp.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_interp.h
@@ -1,293 +1,293 @@
-#ifndef TPATCH_INTERP_H
-#define TPATCH_INTERP_H
-namespace AHFinderDirect
-	  {
-
-//
-// patch_interp - interpolation from a patch
-//
-
-//
-// A patch_interp object is responsible for interpolating gridfn data
-// from its owning patch for use by another patch's ghost_zone object
-// (in setting up the gridfn in the other ghost zone).  A patch_interp
-// object deals only in its own patch's coordinates; other code elsewhere
-// (in practice in interpatch_ghost_zone::) is responsible for translating
-// other patch's coordinates into our coordinates.
-//
-
-//
-// A patch_interp defines a "patch interpolation region", the region of
-// its owning patch from which this interpolation will use gridfn data.
-//
-
-//
-// The way the patch coordnates are constructed, any two adjacent patches
-// share a common (perpendicular) coordinate.  Thus we only have to do
-// 1-dimensional interpolation here (in the parallel direction).  In
-// other words, for each iperp we interpolate in par.
-//
-// In general we interpolate each gridfn at a number of distinct par
-// for each iperp; the integer "parindex" indexes these points.  We
-// attach no particular semantics to parindex, and it need not be
-// 0-origin or have the same range for each iperp.  [In practice,
-// parindex will be the other patch's ipar coordinate.]  However,
-// we assume that the range of parindex is roughly similar for each
-// iperp, so it's ok to use (iperp,parindex) as a 2-D rectangular
-// index space.
-//
-// For example, we might interpolate at the points
-//            ipar ipar ipar ipar ipar ipar ipar ipar ipar
-//              1    2    3    4    5    6    7    8    9
-// iperp=10           (2a)   (3b)   (4c)
-// iperp=11          (2d)   (3e)  (4f)   (5g)
-// where the (2a)-(5g) are the interpolation points, with 2-5 being the
-// parindex values and a-g being unique identifiers used in our description
-// below.  In terms of our member data, this interpolation region would
-// be described by
-//	[min,max]_iperp_=[10,11]
-//	[min,max]_ipar_=[1,9]
-//	[min,max]_parindex_array_(10)=[2,5]
-//	[min,max]_parindex_array_(11)=[2,6]
-//	interp_par_(10,2) = x[a]
-//	interp_par_(10,3) = x[b]
-//	interp_par_(10,4) = x[c]
-//	interp_par_(11,2) = x[d]
-//	interp_par_(11,3) = x[e]
-//	interp_par_(11,4) = x[f]
-//	interp_par_(11,5) = x[g]
-//
-
-//
-// We use the Cactus local interpolator CCTK_InterpLocalUniform()
-// to do the interpolation.  To minimize interpolator overheads, we
-// interpolate all the gridfns at each iperp in a single interpolator
-// call.  [Different iperp values involve different sets of (1-D)
-// gridfn data, and so inherently require distinct interpolator calls.]
-//
-// Setting up the array subscripting for the interpolator to access
-// the gridfn data is a bit tricky:  The interpolator accesses the
-// gridfn data using the generic (1-D) subscripting expression
-//	data[offset + i*stride]
-// where  i  is the data array index.  However, we'd rather not use
-//  offset , because it has to be supplied in the parameter table as
-// an array subscripted by  gfn , and so would require changing the
-// parameter table for each call on  interpolate()  (with potentially
-// different numbers of gridfns being interpolated).  Instead, at each
-//  iperp  we use  i = ipar-min_ipar , so the default  offset=0  makes
-// the subscripting expression zero for  ipar = min_ipar .  This also
-// makes the interpolator's  min_i = 0  and  max_i  be  dims-1  (both
-// the defaults), so those also don't have to be set in the parameter
-// table either.  We set the interpolator's data coordinate origin to
-// the  par  coordinate for  min_ipar , so it correctly maps  i --> par .
-// With this strategy we can share the interpolator parameter table
-// across all the  iperp  values, and we don't need to modify the
-// parameter table at all after the initial setup in our constructor.
-// However, we do have to adjust the molecule positions in
-//  patch_interp::molecule_posn() , since the interpolator will return
-//  i  values, while  molecule_posn()  needs  ipar  values.
-//
-
-class	patch_interp
-	{
-public:
-	// to which patch/edge do we belong?
-	const patch& my_patch() const { return my_patch_; }
-	const patch_edge& my_edge() const { return my_edge_; }
-
-
-public:
-	//
-	// ***** main client interface *****
-	//
-	// interpolate specified range of ghosted gridfns
-	// at all the coordinates specified when we were constructed,
-	// store interpolated data in
-	//	data_buffer(ghosted_gfn, iperp, parindex)
-	void interpolate(int ghosted_min_gfn_to_interp,
-			 int ghosted_max_gfn_to_interp,
-			 jtutil::array3d<fp>& data_buffer)
-		const;
-	void interpolate(int ghosted_min_gfn_to_interp,
-			 int ghosted_max_gfn_to_interp,
-			 jtutil::array3d<fp>& data_buffer,
-			 jtutil::array2d<CCTK_INT>& posn_buffer,
-                         jtutil::array3d<fp>& Jacobian_buffe)
-		const;
-
-public:
-	//
-	// ***** Jacobian of interpolate() *****
-	//
-
-	// verify (no-op if ok, error_exit() if not) that interpolator
-	// has a Jacobian sparsity pattern which we grok: at present this
-	// means molecules are fixed-sized hypercubes, with size/shape
-	// independent of interpolation coordinates and the floating-point
-	// values in the input arrays
-	void verify_Jacobian_sparsity_pattern_ok() const;
-
-	//
-	// The API for the remaining Jacobian functions implicitly
-	// assumes that the Jacobian sparsity pattern is "ok" as
-	// verified by  verify_Jacobian_sparsity_pattern_ok() ,
-	// and in particular that  [min,max]_ipar_m  are independent
-	// of iperp and parindex.
-	//
-
-	// get [min,max] ipar m coordinates of interpolation molecules
-	void molecule_minmax_ipar_m(int& min_ipar_m, int& max_ipar_m) const;
-
-	// get interpolation molecule ipar positions in
-	//  molecule_posn_buffer(iperp, parindex)
-	// ... array type is CCTK_INT so we can pass by reference
-	//     to interpolator
-	void molecule_posn(jtutil::array2d<CCTK_INT>& posn_buffer) const;
-
-	// get Jacobian of interpolated data with respect to this patch's
-	// ghosted gridfns,
-	//	partial interpolate() data_buffer(ghosted_gfn, iperp, parindex)
-	//	---------------------------------------------------------------
-	//	    partial ghosted_gridfn(ghosted_gfn, iperp, posn+ipar_m)
-	// store Jacobian in
-	//	Jacobian_buffer(iperp, parindex, ipar_m)
-	// where we implicitly assume the Jacobian to be independent of
-	// ghosted_gfn, and where
-	//	posn = posn_buffer(iperp, parindex)
-	// as returned by  molecule_posn()
-	void Jacobian(jtutil::array3d<fp>& Jacobian_buffer) const;
-
-	//
-	// ***** internal functions *****
-	//
-private:
-	// [min,max] iperp for interpolation and gridfn data
-	int min_iperp() const { return min_iperp_; }
-	int max_iperp() const { return max_iperp_; }
-
-	// min/max (iperp,ipar) of the gridfn data to use for interpolation
-	int min_ipar() const { return min_ipar_; }
-	int max_ipar() const { return max_ipar_; }
-
-	//
-	// ***** constructor, destructor, et al *****
-	//
-public:
-	//
-	// Constructor arguments:
-	// my_edge_in = Identifies the patch/edge to which this
-	//		interpolation region is to belong.
-	// [min,max]_iperp_in = The range of iperp for this interpolation
-	//			region
-	// [min,max]_parindex_array_in(iperp)
-	//	= [min,max] range of parindex actually used at each iperp.
-	//	  We keep references to these arrays, so they should have
-	//	  lifetimes at last as long as that of this object.
-	// interp_par_in(iperp,parindex)
-	//	= Gives the par coordinates at which we will interpolate;
-	//	  array entries outside the range [min,max]_parindex_in
-	//	  are unused.  We keep a reference to this array, so it
-	//	  should have a lifetime at last as long as that of this
-	//	  object.
-	// ok_to_use_[min,max]_par_ghost_zone
-	//	= Boolean flags saying whether or not we should use gridfn
-	//	  data from the [min,max]_par ghost zones in the interpolation.
-	// interp_handle_in = Cactus handle to the interpatch interpolation
-	//		      operator.
-	// interp_par_table_handle_in
-	//	= Cactus handle to a Cactus key/value table giving
-	//	  parameters (eg order) for the interpatch interpolation
-	//	  operator.  This class internally clones this table and
-	//	  modifies the clone, so the original table is not modified
-	//	  by any actions of this class.
-	//
-	// This constructor requires that this patch's gridfns already
-	// exist, since we size various arrays based on the patch's min/max
-	// ghosted gfn.
-	//
-	patch_interp(const patch_edge& my_edge_in,
-		     int min_iperp_in, int max_iperp_in,
-		     const jtutil::array1d<int>& min_parindex_array_in,
-		     const jtutil::array1d<int>& max_parindex_array_in,
-		     const jtutil::array2d<fp>& interp_par_in,
-		     bool ok_to_use_min_par_ghost_zone,
-		     bool ok_to_use_max_par_ghost_zone,
-		     int interp_handle_in, int interp_par_table_handle_in);
-	~patch_interp();
-
-private:
-        // we forbid copying and passing by value
-        // by declaring the copy constructor and assignment operator
-        // private, but never defining them
-	patch_interp(const patch_interp& rhs);
-	patch_interp& operator=(const patch_interp& rhs);
-
-
-	//
-	// ***** data members *****
-	//
-private:
-	const patch& my_patch_;
-	const patch_edge& my_edge_;
-
-	// range of gfn we can handle
-	// (any given interpolate() call may specify a subrange)
-	const int min_gfn_, max_gfn_;
-
-	// these are strictly speaking redundant
-	// but we keep them for use in debugging
-	bool ok_to_use_min_par_ghost_zone_, ok_to_use_max_par_ghost_zone_;
-
-	// patch interpolation region,
-	// i.e. range of (iperp,ipar) in this patch from which
-	// we will use gridfn data in interpolation
-	const int min_iperp_, max_iperp_;
-	const int min_ipar_, max_ipar_;
-
-	// [min,max] parindex at each iperp
-	// ... these are references to arrays passed in to our constructor
-	//     ==> we do *not* own them!
-	// ... indices are (iperp)
-	const jtutil::array1d<int>& min_parindex_array_;
-	const jtutil::array1d<int>& max_parindex_array_;
-
-	// interp_par_(iperp,parindex)
-	//	= Gives the par coordinates at which we will interpolate;
-	//	  array entries outside the range [min,max]_parindex_in
-	//	  are unused (n.b. this interface implicitly takes the
-	//	  par coordinates to be independent of ghosted_gfn).
-	// ... this is a reference to an array passed in to our constructor
-	//     ==> we do *not* own this!
-	const jtutil::array2d<fp>& interp_par_;	// indices (iperp,parindex)
-
-	// Cactus handle to the interpolation operator
-	int interp_handle_;
-
-	// Cactus handle to our private Cactus key/value table
-	// giving parameters for the interpolation operator
-	// ... this starts out as a copy of the passed-in table,
-	//     then gets extra stuff added to it specific to this
-	//     interpolation region; it's shared across all iperp
-	// ... we own this table
-	const int interp_par_table_handle_;
-
-	// (par) origin and delta values of the gridfn data
-	const fp gridfn_coord_origin_, gridfn_coord_delta_;
-
-	// --> start of gridfn data to use for interpolation
-	//     (reset for each iperp)
-	// ... we do *not* own the pointed-to data!
-	// ... index is (gfn)
-	mutable jtutil::array1d<const void*> gridfn_data_ptrs_;
-
-	// --> start of interpolation data buffer for each gridfn
-	//     (reset for each iperp)
-	// ... we do *not* own the pointed-to data!
-	// ... index is (gfn)
-	mutable jtutil::array1d<void*> interp_data_buffer_ptrs_;
-	};
-
-//******************************************************************************
-
-	  }	// namespace AHFinderDirect
-#endif  /* TPATCH_INTERP_H */
+#ifndef TPATCH_INTERP_H
+#define TPATCH_INTERP_H
+namespace AHFinderDirect
+	  {
+
+//
+// patch_interp - interpolation from a patch
+//
+
+//
+// A patch_interp object is responsible for interpolating gridfn data
+// from its owning patch for use by another patch's ghost_zone object
+// (in setting up the gridfn in the other ghost zone).  A patch_interp
+// object deals only in its own patch's coordinates; other code elsewhere
+// (in practice in interpatch_ghost_zone::) is responsible for translating
+// other patch's coordinates into our coordinates.
+//
+
+//
+// A patch_interp defines a "patch interpolation region", the region of
+// its owning patch from which this interpolation will use gridfn data.
+//
+
+//
+// The way the patch coordnates are constructed, any two adjacent patches
+// share a common (perpendicular) coordinate.  Thus we only have to do
+// 1-dimensional interpolation here (in the parallel direction).  In
+// other words, for each iperp we interpolate in par.
+//
+// In general we interpolate each gridfn at a number of distinct par
+// for each iperp; the integer "parindex" indexes these points.  We
+// attach no particular semantics to parindex, and it need not be
+// 0-origin or have the same range for each iperp.  [In practice,
+// parindex will be the other patch's ipar coordinate.]  However,
+// we assume that the range of parindex is roughly similar for each
+// iperp, so it's ok to use (iperp,parindex) as a 2-D rectangular
+// index space.
+//
+// For example, we might interpolate at the points
+//            ipar ipar ipar ipar ipar ipar ipar ipar ipar
+//              1    2    3    4    5    6    7    8    9
+// iperp=10           (2a)   (3b)   (4c)
+// iperp=11          (2d)   (3e)  (4f)   (5g)
+// where the (2a)-(5g) are the interpolation points, with 2-5 being the
+// parindex values and a-g being unique identifiers used in our description
+// below.  In terms of our member data, this interpolation region would
+// be described by
+//	[min,max]_iperp_=[10,11]
+//	[min,max]_ipar_=[1,9]
+//	[min,max]_parindex_array_(10)=[2,5]
+//	[min,max]_parindex_array_(11)=[2,6]
+//	interp_par_(10,2) = x[a]
+//	interp_par_(10,3) = x[b]
+//	interp_par_(10,4) = x[c]
+//	interp_par_(11,2) = x[d]
+//	interp_par_(11,3) = x[e]
+//	interp_par_(11,4) = x[f]
+//	interp_par_(11,5) = x[g]
+//
+
+//
+// We use the Cactus local interpolator CCTK_InterpLocalUniform()
+// to do the interpolation.  To minimize interpolator overheads, we
+// interpolate all the gridfns at each iperp in a single interpolator
+// call.  [Different iperp values involve different sets of (1-D)
+// gridfn data, and so inherently require distinct interpolator calls.]
+//
+// Setting up the array subscripting for the interpolator to access
+// the gridfn data is a bit tricky:  The interpolator accesses the
+// gridfn data using the generic (1-D) subscripting expression
+//	data[offset + i*stride]
+// where  i  is the data array index.  However, we'd rather not use
+//  offset , because it has to be supplied in the parameter table as
+// an array subscripted by  gfn , and so would require changing the
+// parameter table for each call on  interpolate()  (with potentially
+// different numbers of gridfns being interpolated).  Instead, at each
+//  iperp  we use  i = ipar-min_ipar , so the default  offset=0  makes
+// the subscripting expression zero for  ipar = min_ipar .  This also
+// makes the interpolator's  min_i = 0  and  max_i  be  dims-1  (both
+// the defaults), so those also don't have to be set in the parameter
+// table either.  We set the interpolator's data coordinate origin to
+// the  par  coordinate for  min_ipar , so it correctly maps  i --> par .
+// With this strategy we can share the interpolator parameter table
+// across all the  iperp  values, and we don't need to modify the
+// parameter table at all after the initial setup in our constructor.
+// However, we do have to adjust the molecule positions in
+//  patch_interp::molecule_posn() , since the interpolator will return
+//  i  values, while  molecule_posn()  needs  ipar  values.
+//
+
+class	patch_interp
+	{
+public:
+	// to which patch/edge do we belong?
+	const patch& my_patch() const { return my_patch_; }
+	const patch_edge& my_edge() const { return my_edge_; }
+
+
+public:
+	//
+	// ***** main client interface *****
+	//
+	// interpolate specified range of ghosted gridfns
+	// at all the coordinates specified when we were constructed,
+	// store interpolated data in
+	//	data_buffer(ghosted_gfn, iperp, parindex)
+	void interpolate(int ghosted_min_gfn_to_interp,
+			 int ghosted_max_gfn_to_interp,
+			 jtutil::array3d<fp>& data_buffer)
+		const;
+	void interpolate(int ghosted_min_gfn_to_interp,
+			 int ghosted_max_gfn_to_interp,
+			 jtutil::array3d<fp>& data_buffer,
+			 jtutil::array2d<CCTK_INT>& posn_buffer,
+                         jtutil::array3d<fp>& Jacobian_buffe)
+		const;
+
+public:
+	//
+	// ***** Jacobian of interpolate() *****
+	//
+
+	// verify (no-op if ok, error_exit() if not) that interpolator
+	// has a Jacobian sparsity pattern which we grok: at present this
+	// means molecules are fixed-sized hypercubes, with size/shape
+	// independent of interpolation coordinates and the floating-point
+	// values in the input arrays
+	void verify_Jacobian_sparsity_pattern_ok() const;
+
+	//
+	// The API for the remaining Jacobian functions implicitly
+	// assumes that the Jacobian sparsity pattern is "ok" as
+	// verified by  verify_Jacobian_sparsity_pattern_ok() ,
+	// and in particular that  [min,max]_ipar_m  are independent
+	// of iperp and parindex.
+	//
+
+	// get [min,max] ipar m coordinates of interpolation molecules
+	void molecule_minmax_ipar_m(int& min_ipar_m, int& max_ipar_m) const;
+
+	// get interpolation molecule ipar positions in
+	//  molecule_posn_buffer(iperp, parindex)
+	// ... array type is CCTK_INT so we can pass by reference
+	//     to interpolator
+	void molecule_posn(jtutil::array2d<CCTK_INT>& posn_buffer) const;
+
+	// get Jacobian of interpolated data with respect to this patch's
+	// ghosted gridfns,
+	//	partial interpolate() data_buffer(ghosted_gfn, iperp, parindex)
+	//	---------------------------------------------------------------
+	//	    partial ghosted_gridfn(ghosted_gfn, iperp, posn+ipar_m)
+	// store Jacobian in
+	//	Jacobian_buffer(iperp, parindex, ipar_m)
+	// where we implicitly assume the Jacobian to be independent of
+	// ghosted_gfn, and where
+	//	posn = posn_buffer(iperp, parindex)
+	// as returned by  molecule_posn()
+	void Jacobian(jtutil::array3d<fp>& Jacobian_buffer) const;
+
+	//
+	// ***** internal functions *****
+	//
+private:
+	// [min,max] iperp for interpolation and gridfn data
+	int min_iperp() const { return min_iperp_; }
+	int max_iperp() const { return max_iperp_; }
+
+	// min/max (iperp,ipar) of the gridfn data to use for interpolation
+	int min_ipar() const { return min_ipar_; }
+	int max_ipar() const { return max_ipar_; }
+
+	//
+	// ***** constructor, destructor, et al *****
+	//
+public:
+	//
+	// Constructor arguments:
+	// my_edge_in = Identifies the patch/edge to which this
+	//		interpolation region is to belong.
+	// [min,max]_iperp_in = The range of iperp for this interpolation
+	//			region
+	// [min,max]_parindex_array_in(iperp)
+	//	= [min,max] range of parindex actually used at each iperp.
+	//	  We keep references to these arrays, so they should have
+	//	  lifetimes at last as long as that of this object.
+	// interp_par_in(iperp,parindex)
+	//	= Gives the par coordinates at which we will interpolate;
+	//	  array entries outside the range [min,max]_parindex_in
+	//	  are unused.  We keep a reference to this array, so it
+	//	  should have a lifetime at last as long as that of this
+	//	  object.
+	// ok_to_use_[min,max]_par_ghost_zone
+	//	= Boolean flags saying whether or not we should use gridfn
+	//	  data from the [min,max]_par ghost zones in the interpolation.
+	// interp_handle_in = Cactus handle to the interpatch interpolation
+	//		      operator.
+	// interp_par_table_handle_in
+	//	= Cactus handle to a Cactus key/value table giving
+	//	  parameters (eg order) for the interpatch interpolation
+	//	  operator.  This class internally clones this table and
+	//	  modifies the clone, so the original table is not modified
+	//	  by any actions of this class.
+	//
+	// This constructor requires that this patch's gridfns already
+	// exist, since we size various arrays based on the patch's min/max
+	// ghosted gfn.
+	//
+	patch_interp(const patch_edge& my_edge_in,
+		     int min_iperp_in, int max_iperp_in,
+		     const jtutil::array1d<int>& min_parindex_array_in,
+		     const jtutil::array1d<int>& max_parindex_array_in,
+		     const jtutil::array2d<fp>& interp_par_in,
+		     bool ok_to_use_min_par_ghost_zone,
+		     bool ok_to_use_max_par_ghost_zone,
+		     int interp_handle_in, int interp_par_table_handle_in);
+	~patch_interp();
+
+private:
+        // we forbid copying and passing by value
+        // by declaring the copy constructor and assignment operator
+        // private, but never defining them
+	patch_interp(const patch_interp& rhs);
+	patch_interp& operator=(const patch_interp& rhs);
+
+
+	//
+	// ***** data members *****
+	//
+private:
+	const patch& my_patch_;
+	const patch_edge& my_edge_;
+
+	// range of gfn we can handle
+	// (any given interpolate() call may specify a subrange)
+	const int min_gfn_, max_gfn_;
+
+	// these are strictly speaking redundant
+	// but we keep them for use in debugging
+	bool ok_to_use_min_par_ghost_zone_, ok_to_use_max_par_ghost_zone_;
+
+	// patch interpolation region,
+	// i.e. range of (iperp,ipar) in this patch from which
+	// we will use gridfn data in interpolation
+	const int min_iperp_, max_iperp_;
+	const int min_ipar_, max_ipar_;
+
+	// [min,max] parindex at each iperp
+	// ... these are references to arrays passed in to our constructor
+	//     ==> we do *not* own them!
+	// ... indices are (iperp)
+	const jtutil::array1d<int>& min_parindex_array_;
+	const jtutil::array1d<int>& max_parindex_array_;
+
+	// interp_par_(iperp,parindex)
+	//	= Gives the par coordinates at which we will interpolate;
+	//	  array entries outside the range [min,max]_parindex_in
+	//	  are unused (n.b. this interface implicitly takes the
+	//	  par coordinates to be independent of ghosted_gfn).
+	// ... this is a reference to an array passed in to our constructor
+	//     ==> we do *not* own this!
+	const jtutil::array2d<fp>& interp_par_;	// indices (iperp,parindex)
+
+	// Cactus handle to the interpolation operator
+	int interp_handle_;
+
+	// Cactus handle to our private Cactus key/value table
+	// giving parameters for the interpolation operator
+	// ... this starts out as a copy of the passed-in table,
+	//     then gets extra stuff added to it specific to this
+	//     interpolation region; it's shared across all iperp
+	// ... we own this table
+	const int interp_par_table_handle_;
+
+	// (par) origin and delta values of the gridfn data
+	const fp gridfn_coord_origin_, gridfn_coord_delta_;
+
+	// --> start of gridfn data to use for interpolation
+	//     (reset for each iperp)
+	// ... we do *not* own the pointed-to data!
+	// ... index is (gfn)
+	mutable jtutil::array1d<const void*> gridfn_data_ptrs_;
+
+	// --> start of interpolation data buffer for each gridfn
+	//     (reset for each iperp)
+	// ... we do *not* own the pointed-to data!
+	// ... index is (gfn)
+	mutable jtutil::array1d<void*> interp_data_buffer_ptrs_;
+	};
+
+//******************************************************************************
+
+	  }	// namespace AHFinderDirect
+#endif  /* TPATCH_INTERP_H */
--- a/AMSS_NCKU_source/AHF_Direct/patch_system.C
+++ b/AMSS_NCKU_source/AHF_Direct/patch_system.C
--- a/AMSS_NCKU_source/AHF_Direct/patch_system.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_system.h
--- a/AMSS_NCKU_source/AHF_Direct/patch_system_info.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_system_info.h
@@ -1,183 +1,183 @@
-#ifndef TPATCH_SYSTEM_INFO_H
-#define TPATCH_SYSTEM_INFO_H
-namespace AHFinderDirect
-{
-
-	//******************************************************************************
-
-	//
-	// This namespace contains static data describing the patch sizes and
-	// shapes for each type of patch system.  Since this data only describes
-	// the patch sizes/shapes, we don't distinguish between the different
-	// boundary conditions.
-	//
-
-	namespace patch_system_info
-	{
-		//
-		// full-sphere patch system
-		// ... covers all 4pi steradians
-		//
-		namespace full_sphere
-		{
-			enum
-			{
-				patch_number__pz = 0,
-				patch_number__px,
-				patch_number__py,
-				patch_number__mx,
-				patch_number__my,
-				patch_number__mz,
-				N_patches // no comma
-			};
-			static const struct patch_info patch_info_array[N_patches] = {
-				// +z patch (90 x 90 degrees): dmu [ -45,    45], dnu  [ -45,    45]
-				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, -45.0, 45.0},
-
-				// +x patch (90 x 90 degrees): dnu [  45,   135], dphi [ -45,    45]
-				{"+x", patch::patch_is_plus, 'x', 45.0, 135.0, -45.0, 45.0},
-
-				// +y patch (90 x 90 degrees): dmu [  45,   135], dphi [  45,   135]
-				{"+y", patch::patch_is_plus, 'y', 45.0, 135.0, 45.0, 135.0},
-
-				// -x patch (90 x 90 degrees): dnu [-135,   -45], dphi [ 135,   225]
-				{"-x", patch::patch_is_minus, 'x', -135.0, -45.0, 135.0, 225.0},
-
-				// -y patch (90 x 90 degrees): dmu [-135,   -45], dphi [-135,   -45]
-				{"-y", patch::patch_is_minus, 'y', -135.0, -45.0, -135.0, -45.0},
-
-				// -z patch (90 x 90 degrees): dmu [ 135,   225], dnu  [ 135,   225]
-				{"-z", patch::patch_is_minus, 'z', 135.0, 225.0, 135.0, 225.0},
-			};
-		} // namespace patch_system_info::full_sphere
-
-		//
-		// +z hemisphere (half) patch system
-		// ... mirror symmetry across z=0 plane
-		//
-		namespace plus_z_hemisphere
-		{
-			enum
-			{
-				patch_number__pz = 0,
-				patch_number__px,
-				patch_number__py,
-				patch_number__mx,
-				patch_number__my,
-				N_patches // no comma
-			};
-			static const struct patch_info patch_info_array[N_patches] = {
-				// +z patch (90 x 90 degrees): dmu [ -45,    45], dnu  [ -45,    45]
-				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, -45.0, 45.0},
-
-				// +x patch (45 x 90 degrees): dnu [  45,    90], dphi [ -45,    45]
-				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, -45.0, 45.0},
-
-				// +y patch (45 x 90 degrees): dmu [  45,    90], dphi [  45,   135]
-				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 135.0},
-
-				// -x patch (45 x 90 degrees): dnu [ -90,   -45], dphi [ 135,   225]
-				{"-x", patch::patch_is_minus, 'x', -90.0, -45.0, 135.0, 225.0},
-
-				// -y patch (45 x 90 degrees): dmu [ -90,   -45], dphi [-135,   -45]
-				{"-y", patch::patch_is_minus, 'y', -90.0, -45.0, -135.0, -45.0},
-			};
-		} // namespace patch_system_info::plus_z_hemisphere
-
-		//
-		// +[xy] "vertical" quarter-grid (quadrant) patch system
-		// two types of boundary conditions:
-		// ... mirror symmetry across x=0 and y=0 planes
-		// ... 90 degree periodic rotation symmetry about z axis
-		//
-		namespace plus_xy_quadrant
-		{
-			enum
-			{
-				patch_number__pz = 0,
-				patch_number__px,
-				patch_number__py,
-				patch_number__mz,
-				N_patches // no comma
-			};
-			static const struct patch_info patch_info_array[N_patches] = {
-				// +z patch (45 x 45 degrees): dmu [   0,    45], dnu  [   0,    45]
-				{"+z", patch::patch_is_plus, 'z', 0.0, 45.0, 0.0, 45.0},
-
-				// +x patch (90 x 45 degrees): dnu [  45,   135], dphi [   0,    45]
-				{"+x", patch::patch_is_plus, 'x', 45.0, 135.0, 0.0, 45.0},
-
-				// +y patch (90 x 45 degrees): dmu [  45,   135], dphi [  45,    90]
-				{"+y", patch::patch_is_plus, 'y', 45.0, 135.0, 45.0, 90.0},
-
-				// -z patch (45 x 45 degrees): dmu [ 135,   180], dnu  [ 135,   180]
-				{"-z", patch::patch_is_minus, 'z', 135.0, 180.0, 135.0, 180.0},
-			};
-		} // namespace patch_system_info::plus_xy_quadrant
-
-		//
-		// +[xz] "horizontal" quarter-grid (quadrant) patch system
-		// two types of boundary conditions
-		// ... mirror symmetry across x=0 plane, z=0 plane
-		// ... 180 degree periodic rotation symmetry about z axis,
-		//     mirror symmetry across z=0 plane
-		//
-		namespace plus_xz_quadrant
-		{
-			enum
-			{
-				patch_number__pz = 0,
-				patch_number__px,
-				patch_number__py,
-				patch_number__my,
-				N_patches // no comma
-			};
-			static const struct patch_info patch_info_array[N_patches] = {
-				// +z patch (90 x 45 degrees): dmu [ -45,    45], dnu  [   0,    45]
-				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, 0.0, 45.0},
-
-				// +x patch (45 x 90 degrees): dnu [  45,    90], dphi [ -45,    45]
-				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, -45.0, 45.0},
-
-				// +y patch (45 x 45 degrees): dmu [  45,    90], dphi [  45,    90]
-				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 90.0},
-
-				// -y patch (45 x 45 degrees): dmu [ -90,   -45], dphi [ -90,   -45]
-				{"-y", patch::patch_is_minus, 'y', -90.0, -45.0, -90.0, -45.0},
-			};
-		} // namespace patch_system_info::plus_xz_quadrant_rotating
-
-		//
-		// +[xyz] (octant) patch system
-		// two types of boundary conditions:
-		// ... mirror symmetry across x=0 plane, y=0 plane, z=0 plane
-		// ... 90 degree periodic rotation symmetry about z axis,
-		//     mirror symmetry across z=0 plane
-		//
-		namespace plus_xyz_octant
-		{
-			enum
-			{
-				patch_number__pz = 0,
-				patch_number__px,
-				patch_number__py,
-				N_patches // no comma
-			};
-			static const struct patch_info patch_info_array[N_patches] = {
-				// +z patch (45 x 45 degrees): dmu [   0,    45], dnu  [   0,    45]
-				{"+z", patch::patch_is_plus, 'z', 0.0, 45.0, 0.0, 45.0},
-
-				// +x patch (45 x 45 degrees): dnu [  45,    90], dphi [   0,    45]
-				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, 0.0, 45.0},
-
-				// +y patch (45 x 45 degrees): dmu [  45,    90], dphi [  45,    90]
-				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 90.0},
-			};
-		} // namespace patch_system_info::octant_mirrored
-
-	} // namespace patch_system_info::
-
-	//******************************************************************************
-
-} // namespace AHFinderDirect
-#endif /*  TPATCH_SYSTEM_INFO_H */
+#ifndef TPATCH_SYSTEM_INFO_H
+#define TPATCH_SYSTEM_INFO_H
+namespace AHFinderDirect
+{
+
+	//******************************************************************************
+
+	//
+	// This namespace contains static data describing the patch sizes and
+	// shapes for each type of patch system.  Since this data only describes
+	// the patch sizes/shapes, we don't distinguish between the different
+	// boundary conditions.
+	//
+
+	namespace patch_system_info
+	{
+		//
+		// full-sphere patch system
+		// ... covers all 4pi steradians
+		//
+		namespace full_sphere
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				patch_number__mx,
+				patch_number__my,
+				patch_number__mz,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (90 x 90 degrees): dmu [ -45,    45], dnu  [ -45,    45]
+				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, -45.0, 45.0},
+
+				// +x patch (90 x 90 degrees): dnu [  45,   135], dphi [ -45,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 135.0, -45.0, 45.0},
+
+				// +y patch (90 x 90 degrees): dmu [  45,   135], dphi [  45,   135]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 135.0, 45.0, 135.0},
+
+				// -x patch (90 x 90 degrees): dnu [-135,   -45], dphi [ 135,   225]
+				{"-x", patch::patch_is_minus, 'x', -135.0, -45.0, 135.0, 225.0},
+
+				// -y patch (90 x 90 degrees): dmu [-135,   -45], dphi [-135,   -45]
+				{"-y", patch::patch_is_minus, 'y', -135.0, -45.0, -135.0, -45.0},
+
+				// -z patch (90 x 90 degrees): dmu [ 135,   225], dnu  [ 135,   225]
+				{"-z", patch::patch_is_minus, 'z', 135.0, 225.0, 135.0, 225.0},
+			};
+		} // namespace patch_system_info::full_sphere
+
+		//
+		// +z hemisphere (half) patch system
+		// ... mirror symmetry across z=0 plane
+		//
+		namespace plus_z_hemisphere
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				patch_number__mx,
+				patch_number__my,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (90 x 90 degrees): dmu [ -45,    45], dnu  [ -45,    45]
+				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, -45.0, 45.0},
+
+				// +x patch (45 x 90 degrees): dnu [  45,    90], dphi [ -45,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, -45.0, 45.0},
+
+				// +y patch (45 x 90 degrees): dmu [  45,    90], dphi [  45,   135]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 135.0},
+
+				// -x patch (45 x 90 degrees): dnu [ -90,   -45], dphi [ 135,   225]
+				{"-x", patch::patch_is_minus, 'x', -90.0, -45.0, 135.0, 225.0},
+
+				// -y patch (45 x 90 degrees): dmu [ -90,   -45], dphi [-135,   -45]
+				{"-y", patch::patch_is_minus, 'y', -90.0, -45.0, -135.0, -45.0},
+			};
+		} // namespace patch_system_info::plus_z_hemisphere
+
+		//
+		// +[xy] "vertical" quarter-grid (quadrant) patch system
+		// two types of boundary conditions:
+		// ... mirror symmetry across x=0 and y=0 planes
+		// ... 90 degree periodic rotation symmetry about z axis
+		//
+		namespace plus_xy_quadrant
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				patch_number__mz,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (45 x 45 degrees): dmu [   0,    45], dnu  [   0,    45]
+				{"+z", patch::patch_is_plus, 'z', 0.0, 45.0, 0.0, 45.0},
+
+				// +x patch (90 x 45 degrees): dnu [  45,   135], dphi [   0,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 135.0, 0.0, 45.0},
+
+				// +y patch (90 x 45 degrees): dmu [  45,   135], dphi [  45,    90]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 135.0, 45.0, 90.0},
+
+				// -z patch (45 x 45 degrees): dmu [ 135,   180], dnu  [ 135,   180]
+				{"-z", patch::patch_is_minus, 'z', 135.0, 180.0, 135.0, 180.0},
+			};
+		} // namespace patch_system_info::plus_xy_quadrant
+
+		//
+		// +[xz] "horizontal" quarter-grid (quadrant) patch system
+		// two types of boundary conditions
+		// ... mirror symmetry across x=0 plane, z=0 plane
+		// ... 180 degree periodic rotation symmetry about z axis,
+		//     mirror symmetry across z=0 plane
+		//
+		namespace plus_xz_quadrant
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				patch_number__my,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (90 x 45 degrees): dmu [ -45,    45], dnu  [   0,    45]
+				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, 0.0, 45.0},
+
+				// +x patch (45 x 90 degrees): dnu [  45,    90], dphi [ -45,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, -45.0, 45.0},
+
+				// +y patch (45 x 45 degrees): dmu [  45,    90], dphi [  45,    90]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 90.0},
+
+				// -y patch (45 x 45 degrees): dmu [ -90,   -45], dphi [ -90,   -45]
+				{"-y", patch::patch_is_minus, 'y', -90.0, -45.0, -90.0, -45.0},
+			};
+		} // namespace patch_system_info::plus_xz_quadrant_rotating
+
+		//
+		// +[xyz] (octant) patch system
+		// two types of boundary conditions:
+		// ... mirror symmetry across x=0 plane, y=0 plane, z=0 plane
+		// ... 90 degree periodic rotation symmetry about z axis,
+		//     mirror symmetry across z=0 plane
+		//
+		namespace plus_xyz_octant
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (45 x 45 degrees): dmu [   0,    45], dnu  [   0,    45]
+				{"+z", patch::patch_is_plus, 'z', 0.0, 45.0, 0.0, 45.0},
+
+				// +x patch (45 x 45 degrees): dnu [  45,    90], dphi [   0,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, 0.0, 45.0},
+
+				// +y patch (45 x 45 degrees): dmu [  45,    90], dphi [  45,    90]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 90.0},
+			};
+		} // namespace patch_system_info::octant_mirrored
+
+	} // namespace patch_system_info::
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /*  TPATCH_SYSTEM_INFO_H */
--- a/AMSS_NCKU_source/AHF_Direct/round.C
+++ b/AMSS_NCKU_source/AHF_Direct/round.C
@@ -1,38 +1,38 @@
-#include <stdlib.h>
-
-#include "stdc.h"
-#include "util.h"
-
-namespace AHFinderDirect
-{
-       namespace jtutil
-       {
-              template <typename fp_t>
-              int round<fp_t>::to_integer(fp_t x)
-              {
-                     return (x >= 0.0)
-                                ? int(x + 0.5)      // eg 3.6 --> int(4.1) = 4
-                                : -int((-x) + 0.5); // eg -3.6 --> - int(4.1) = -4
-              }
-
-              template <typename fp_t>
-              int round<fp_t>::floor(fp_t x)
-              {
-                     return (x >= 0.0)
-                                ? int(x)
-                                : -ceiling(-x);
-              }
-
-              template <typename fp_t>
-              int round<fp_t>::ceiling(fp_t x)
-              {
-                     return (x >= 0.0)
-                                ? int(x) + (x != fp_t(int(x)))
-                                : -floor(-x);
-              }
-
-              template class round<float>;
-              template class round<double>;
-
-       } // namespace jtutil
-} // namespace AHFinderDirect
+#include <stdlib.h>
+
+#include "stdc.h"
+#include "util.h"
+
+namespace AHFinderDirect
+{
+       namespace jtutil
+       {
+              template <typename fp_t>
+              int round<fp_t>::to_integer(fp_t x)
+              {
+                     return (x >= 0.0)
+                                ? int(x + 0.5)      // eg 3.6 --> int(4.1) = 4
+                                : -int((-x) + 0.5); // eg -3.6 --> - int(4.1) = -4
+              }
+
+              template <typename fp_t>
+              int round<fp_t>::floor(fp_t x)
+              {
+                     return (x >= 0.0)
+                                ? int(x)
+                                : -ceiling(-x);
+              }
+
+              template <typename fp_t>
+              int round<fp_t>::ceiling(fp_t x)
+              {
+                     return (x >= 0.0)
+                                ? int(x) + (x != fp_t(int(x)))
+                                : -floor(-x);
+              }
+
+              template class round<float>;
+              template class round<double>;
+
+       } // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/setup.C
+++ b/AMSS_NCKU_source/AHF_Direct/setup.C
@@ -1,188 +1,188 @@
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-#include <mpi.h>
-
-#include "util_Table.h"
-#include "cctk.h"
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "cpm_map.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-#include "fd_grid.h"
-#include "patch.h"
-#include "patch_edge.h"
-#include "patch_interp.h"
-#include "ghost_zone.h"
-#include "patch_system.h"
-
-#include "Jacobian.h"
-
-#include "gfns.h"
-#include "gr.h"
-
-#include "horizon_sequence.h"
-#include "BH_diagnostics.h"
-#include "driver.h"
-using namespace std;
-
-#include "myglobal.h"
-#include "bssn_class.h"
-
-namespace AHFinderDirect
-{
-	struct state state;
-
-	using jtutil::error_exit;
-
-	namespace
-	{
-		int allocate_horizons_to_processor(int N_procs, int my_proc,
-										   int N_horizons, bool multiproc_flag,
-										   horizon_sequence &my_hs)
-		{
-			const int N_active_procs = multiproc_flag ? Mymin(N_procs, N_horizons)
-													  : 1;
-			// Implementation note:
-			// We allocate the horizons to active processors in round-robin order.
-			//
-			int proc = 0;
-			for (int hn = 1; hn <= N_horizons; ++hn)
-			{
-				if (proc == my_proc)
-					my_hs.append_hn(hn);
-				if (++proc >= N_active_procs)
-					proc = 0;
-			}
-
-			return N_active_procs;
-		}
-	}
-
-	extern struct state state;
-
-	void AHFinderDirect_setup(MyList<var> *AHList, MyList<var> *GaugeList, bssn_class *ADM,
-							  int Symmetry, int HN, double *PhysTime)
-	{
-		enum patch_system::patch_system_type ps_type;
-
-		switch (Symmetry)
-		{
-		case 2:
-			ps_type = patch_system::patch_system__plus_xyz_octant_mirrored;
-			break;
-		case 1:
-			ps_type = patch_system::patch_system__plus_z_hemisphere;
-			break;
-		case 0:
-			ps_type = patch_system::patch_system__full_sphere;
-			break;
-		default:
-			jtutil::error_exit(ERROR_EXIT, "** Symmetry=%d is not support by AHFD yet.", Symmetry);
-		}
-
-		int nprocs = 1, myrank = 0;
-		MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
-		MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-
-		state.PhysTime = PhysTime; // Synchonize the PhysTime
-		state.Symmetry = Symmetry;
-		state.AHList = AHList;
-		state.GaugeList = GaugeList;
-		state.ADM = ADM;
-		state.N_procs = nprocs;
-		state.my_proc = myrank;
-
-		state.N_horizons = HN;
-
-		//
-		// (genuine) horizon sequence for this processor
-		//
-		state.my_hs = new horizon_sequence(state.N_horizons);
-		horizon_sequence &hs = *state.my_hs;
-
-		const bool multiproc_flag = true;
-		state.N_active_procs = allocate_horizons_to_processor(state.N_procs, state.my_proc,
-															  state.N_horizons, multiproc_flag,
-															  hs);
-
-		// ... horizon numbers run from 1 to N_horizons inclusive
-		//     so the array size is N_horizons+1
-		state.AH_data_array = new AH_data *[HN + 1];
-		for (int hn = 0; hn <= HN; ++hn)
-		{
-			state.AH_data_array[hn] = NULL;
-		}
-
-		int NNP = 0, NNP_out;
-		for (int hn = 1; hn <= hs.N_horizons(); ++hn)
-		{
-			const bool genuine_flag = hs.is_hn_genuine(hn);
-			state.AH_data_array[hn] = new AH_data;
-			struct AH_data &AH_data = *state.AH_data_array[hn];
-
-			AH_data.recentering_flag = false;
-			AH_data.stop_finding = false;
-
-			// create the patch system
-			AH_data.ps_ptr = new patch_system(0, 0, 0, // just dummy set, we will recenter it when setting initial guess
-											  ps_type, 2, 1,
-											  20, 1,
-											  //			      (genuine_flag ? 53 : 0),
-											  (genuine_flag ? gfns::nominal_max_gfn
-															: gfns::skeletal_nominal_max_gfn),
-											  -1, -1,
-											  1, 1,
-											  1, 1,
-											  true, false);
-			patch_system &ps = *AH_data.ps_ptr;
-
-			if (genuine_flag)
-				ps.set_gridfn_to_constant(1.0, gfns::gfn__one);
-
-			AH_data.Jac_ptr = genuine_flag ? new Jacobian(ps) : NULL;
-
-			AH_data.surface_expansion = 0;
-
-			AH_data.initial_find_flag = genuine_flag;
-
-			AH_data.found_flag = false;
-			AH_data.BH_diagnostics_fileptr = NULL;
-
-			NNP = Mymax(NNP, AH_data.ps_ptr->N_grid_points());
-		} // end of for hn
-
-		MPI_Allreduce(&NNP, &NNP_out, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
-
-		state.Data = new double[NNP_out * 35];
-		state.oX = new double[NNP_out];
-		state.oY = new double[NNP_out];
-		state.oZ = new double[NNP_out];
-	}
-	void AHFinderDirect_cleanup()
-	{
-		horizon_sequence &hs = *state.my_hs;
-		for (int hn = 1; hn <= hs.N_horizons(); ++hn)
-		{
-			struct AH_data &AH_data = *state.AH_data_array[hn];
-			if (AH_data.ps_ptr)
-				delete AH_data.ps_ptr;
-			if (AH_data.Jac_ptr)
-				delete AH_data.Jac_ptr;
-			delete state.AH_data_array[hn];
-		} // end of for hn
-		delete[] state.AH_data_array;
-		delete state.my_hs;
-		delete[] state.oX;
-		delete[] state.oY;
-		delete[] state.oZ;
-		delete[] state.Data;
-	}
-} // namespace AHFinderDirect
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include <mpi.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "driver.h"
+using namespace std;
+
+#include "myglobal.h"
+#include "bssn_class.h"
+
+namespace AHFinderDirect
+{
+	struct state state;
+
+	using jtutil::error_exit;
+
+	namespace
+	{
+		int allocate_horizons_to_processor(int N_procs, int my_proc,
+										   int N_horizons, bool multiproc_flag,
+										   horizon_sequence &my_hs)
+		{
+			const int N_active_procs = multiproc_flag ? Mymin(N_procs, N_horizons)
+													  : 1;
+			// Implementation note:
+			// We allocate the horizons to active processors in round-robin order.
+			//
+			int proc = 0;
+			for (int hn = 1; hn <= N_horizons; ++hn)
+			{
+				if (proc == my_proc)
+					my_hs.append_hn(hn);
+				if (++proc >= N_active_procs)
+					proc = 0;
+			}
+
+			return N_active_procs;
+		}
+	}
+
+	extern struct state state;
+
+	void AHFinderDirect_setup(MyList<var> *AHList, MyList<var> *GaugeList, bssn_class *ADM,
+							  int Symmetry, int HN, double *PhysTime)
+	{
+		enum patch_system::patch_system_type ps_type;
+
+		switch (Symmetry)
+		{
+		case 2:
+			ps_type = patch_system::patch_system__plus_xyz_octant_mirrored;
+			break;
+		case 1:
+			ps_type = patch_system::patch_system__plus_z_hemisphere;
+			break;
+		case 0:
+			ps_type = patch_system::patch_system__full_sphere;
+			break;
+		default:
+			jtutil::error_exit(ERROR_EXIT, "** Symmetry=%d is not support by AHFD yet.", Symmetry);
+		}
+
+		int nprocs = 1, myrank = 0;
+		MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+		MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+
+		state.PhysTime = PhysTime; // Synchonize the PhysTime
+		state.Symmetry = Symmetry;
+		state.AHList = AHList;
+		state.GaugeList = GaugeList;
+		state.ADM = ADM;
+		state.N_procs = nprocs;
+		state.my_proc = myrank;
+
+		state.N_horizons = HN;
+
+		//
+		// (genuine) horizon sequence for this processor
+		//
+		state.my_hs = new horizon_sequence(state.N_horizons);
+		horizon_sequence &hs = *state.my_hs;
+
+		const bool multiproc_flag = true;
+		state.N_active_procs = allocate_horizons_to_processor(state.N_procs, state.my_proc,
+															  state.N_horizons, multiproc_flag,
+															  hs);
+
+		// ... horizon numbers run from 1 to N_horizons inclusive
+		//     so the array size is N_horizons+1
+		state.AH_data_array = new AH_data *[HN + 1];
+		for (int hn = 0; hn <= HN; ++hn)
+		{
+			state.AH_data_array[hn] = NULL;
+		}
+
+		int NNP = 0, NNP_out;
+		for (int hn = 1; hn <= hs.N_horizons(); ++hn)
+		{
+			const bool genuine_flag = hs.is_hn_genuine(hn);
+			state.AH_data_array[hn] = new AH_data;
+			struct AH_data &AH_data = *state.AH_data_array[hn];
+
+			AH_data.recentering_flag = false;
+			AH_data.stop_finding = false;
+
+			// create the patch system
+			AH_data.ps_ptr = new patch_system(0, 0, 0, // just dummy set, we will recenter it when setting initial guess
+											  ps_type, 2, 1,
+											  20, 1,
+											  //			      (genuine_flag ? 53 : 0),
+											  (genuine_flag ? gfns::nominal_max_gfn
+															: gfns::skeletal_nominal_max_gfn),
+											  -1, -1,
+											  1, 1,
+											  1, 1,
+											  true, false);
+			patch_system &ps = *AH_data.ps_ptr;
+
+			if (genuine_flag)
+				ps.set_gridfn_to_constant(1.0, gfns::gfn__one);
+
+			AH_data.Jac_ptr = genuine_flag ? new Jacobian(ps) : NULL;
+
+			AH_data.surface_expansion = 0;
+
+			AH_data.initial_find_flag = genuine_flag;
+
+			AH_data.found_flag = false;
+			AH_data.BH_diagnostics_fileptr = NULL;
+
+			NNP = Mymax(NNP, AH_data.ps_ptr->N_grid_points());
+		} // end of for hn
+
+		MPI_Allreduce(&NNP, &NNP_out, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+		state.Data = new double[NNP_out * 35];
+		state.oX = new double[NNP_out];
+		state.oY = new double[NNP_out];
+		state.oZ = new double[NNP_out];
+	}
+	void AHFinderDirect_cleanup()
+	{
+		horizon_sequence &hs = *state.my_hs;
+		for (int hn = 1; hn <= hs.N_horizons(); ++hn)
+		{
+			struct AH_data &AH_data = *state.AH_data_array[hn];
+			if (AH_data.ps_ptr)
+				delete AH_data.ps_ptr;
+			if (AH_data.Jac_ptr)
+				delete AH_data.Jac_ptr;
+			delete state.AH_data_array[hn];
+		} // end of for hn
+		delete[] state.AH_data_array;
+		delete state.my_hs;
+		delete[] state.oX;
+		delete[] state.oY;
+		delete[] state.oZ;
+		delete[] state.Data;
+	}
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/stdc.h
+++ b/AMSS_NCKU_source/AHF_Direct/stdc.h
@@ -1,24 +1,24 @@
-#ifndef AHFINDERDIRECT__STDC_H
-#define AHFINDERDIRECT__STDC_H
-
-#define then /* empty */
-
-#ifdef M_PI
-#define PI M_PI
-#endif
-
-#define iabs(x_) abs(x_)
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-
-		int error_exit(int msg_level, const char *format, ...);
-
-#define ERROR_EXIT (-1)
-#define PANIC_EXIT (-2)
-	}
-}
-
-#endif /* AHFINDERDIRECT__STDC_H */
+#ifndef AHFINDERDIRECT__STDC_H
+#define AHFINDERDIRECT__STDC_H
+
+#define then /* empty */
+
+#ifdef M_PI
+#define PI M_PI
+#endif
+
+#define iabs(x_) abs(x_)
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		int error_exit(int msg_level, const char *format, ...);
+
+#define ERROR_EXIT (-1)
+#define PANIC_EXIT (-2)
+	}
+}
+
+#endif /* AHFINDERDIRECT__STDC_H */
--- a/AMSS_NCKU_source/AHF_Direct/tgrid.C
+++ b/AMSS_NCKU_source/AHF_Direct/tgrid.C
@@ -1,128 +1,128 @@
-#include <stdio.h>
-#include <assert.h>
-#include <math.h>
-
-#include "cctk.h"
-
-#include "config.h"
-#include "stdc.h"
-#include "util.h"
-#include "array.h"
-#include "linear_map.h"
-
-#include "coords.h"
-#include "tgrid.h"
-
-namespace AHFinderDirect
-{
-
-	//*****************************************************************************
-	//*****************************************************************************
-	//*****************************************************************************
-
-	//
-	// This function constructs a  grid_arrays  object.
-	//
-	grid_arrays::grid_arrays(const grid_array_pars &grid_array_pars_in)
-
-		: gridfn_data_(NULL),
-		  ghosted_gridfn_data_(NULL),
-
-		  // these are all set properly by setup_gridfn_storage()
-		  min_gfn_(0), max_gfn_(0),
-		  ghosted_min_gfn_(0), ghosted_max_gfn_(0),
-
-		  min_irho_(grid_array_pars_in.min_irho),
-		  max_irho_(grid_array_pars_in.max_irho),
-		  min_isigma_(grid_array_pars_in.min_isigma),
-		  max_isigma_(grid_array_pars_in.max_isigma),
-
-		  ghosted_min_irho_(grid_array_pars_in.min_irho - grid_array_pars_in.min_rho_ghost_zone_width),
-		  ghosted_max_irho_(grid_array_pars_in.max_irho + grid_array_pars_in.max_rho_ghost_zone_width),
-		  ghosted_min_isigma_(grid_array_pars_in.min_isigma - grid_array_pars_in.min_sigma_ghost_zone_width),
-		  ghosted_max_isigma_(grid_array_pars_in.max_isigma + grid_array_pars_in.max_sigma_ghost_zone_width)
-	// no comma
-	{
-	}
-
-	//*****************************************************************************
-
-	//
-	// This function sets up the gridfn storage arrays in a  grid_arrays  object.
-	//
-	void grid_arrays::setup_gridfn_storage(const gridfn_pars &gridfn_pars_in,
-										   const gridfn_pars &ghosted_gridfn_pars_in)
-	{
-		assert(gridfn_data_ == NULL);
-		gridfn_data_ = new jtutil::array3d<fp>(gridfn_pars_in.min_gfn,
-											   gridfn_pars_in.max_gfn,
-											   min_irho(), max_irho(),
-											   min_isigma(), max_isigma(),
-											   gridfn_pars_in.storage_array,
-											   gridfn_pars_in.gfn_stride,
-											   gridfn_pars_in.irho_stride,
-											   gridfn_pars_in.isigma_stride);
-
-		assert(ghosted_gridfn_data_ == NULL);
-		ghosted_gridfn_data_ = new jtutil::array3d<fp>(ghosted_gridfn_pars_in.min_gfn,
-													   ghosted_gridfn_pars_in.max_gfn,
-													   ghosted_min_irho(), ghosted_max_irho(),
-													   ghosted_min_isigma(), ghosted_max_isigma(),
-													   ghosted_gridfn_pars_in.storage_array,
-													   ghosted_gridfn_pars_in.gfn_stride,
-													   ghosted_gridfn_pars_in.irho_stride,
-													   ghosted_gridfn_pars_in.isigma_stride);
-	}
-
-	//******************************************************************************
-
-	//
-	// This function destroys a  grid_arrays  object.
-	//
-	grid_arrays::~grid_arrays()
-	{
-		delete ghosted_gridfn_data_;
-		delete gridfn_data_;
-	}
-
-	//*****************************************************************************
-	//*****************************************************************************
-	//*****************************************************************************
-
-	//
-	// This function constructs a  grid  object.
-	//
-	grid::grid(const grid_array_pars &grid_array_pars_in,
-			   const grid_pars &grid_pars_in)
-
-		: grid_arrays(grid_array_pars_in),
-
-		  rho_map_(grid_array_pars_in.min_irho - grid_array_pars_in.min_rho_ghost_zone_width,
-				   grid_array_pars_in.max_irho + grid_array_pars_in.max_rho_ghost_zone_width,
-				   jtutil::radians_of_degrees(
-					   grid_pars_in.min_drho - grid_array_pars_in.min_rho_ghost_zone_width * grid_pars_in.delta_drho),
-				   jtutil::radians_of_degrees(grid_pars_in.delta_drho),
-				   jtutil::radians_of_degrees(
-					   grid_pars_in.max_drho + grid_array_pars_in.max_rho_ghost_zone_width * grid_pars_in.delta_drho)),
-
-		  sigma_map_(grid_array_pars_in.min_isigma - grid_array_pars_in.min_sigma_ghost_zone_width,
-					 grid_array_pars_in.max_isigma + grid_array_pars_in.max_sigma_ghost_zone_width,
-					 jtutil::radians_of_degrees(
-						 grid_pars_in.min_dsigma - grid_array_pars_in.min_sigma_ghost_zone_width * grid_pars_in.delta_dsigma),
-					 jtutil::radians_of_degrees(grid_pars_in.delta_dsigma),
-					 jtutil::radians_of_degrees(
-						 grid_pars_in.max_dsigma + grid_array_pars_in.max_sigma_ghost_zone_width * grid_pars_in.delta_dsigma)),
-
-		  min_rho_(jtutil::radians_of_degrees(grid_pars_in.min_drho)),
-		  max_rho_(jtutil::radians_of_degrees(grid_pars_in.max_drho)),
-		  min_sigma_(jtutil::radians_of_degrees(grid_pars_in.min_dsigma)),
-		  max_sigma_(jtutil::radians_of_degrees(grid_pars_in.max_dsigma))
-	// no comma
-	{
-	}
-
-	//******************************************************************************
-	//******************************************************************************
-	//******************************************************************************
-
-} // namespace AHFinderDirect
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+
+namespace AHFinderDirect
+{
+
+	//*****************************************************************************
+	//*****************************************************************************
+	//*****************************************************************************
+
+	//
+	// This function constructs a  grid_arrays  object.
+	//
+	grid_arrays::grid_arrays(const grid_array_pars &grid_array_pars_in)
+
+		: gridfn_data_(NULL),
+		  ghosted_gridfn_data_(NULL),
+
+		  // these are all set properly by setup_gridfn_storage()
+		  min_gfn_(0), max_gfn_(0),
+		  ghosted_min_gfn_(0), ghosted_max_gfn_(0),
+
+		  min_irho_(grid_array_pars_in.min_irho),
+		  max_irho_(grid_array_pars_in.max_irho),
+		  min_isigma_(grid_array_pars_in.min_isigma),
+		  max_isigma_(grid_array_pars_in.max_isigma),
+
+		  ghosted_min_irho_(grid_array_pars_in.min_irho - grid_array_pars_in.min_rho_ghost_zone_width),
+		  ghosted_max_irho_(grid_array_pars_in.max_irho + grid_array_pars_in.max_rho_ghost_zone_width),
+		  ghosted_min_isigma_(grid_array_pars_in.min_isigma - grid_array_pars_in.min_sigma_ghost_zone_width),
+		  ghosted_max_isigma_(grid_array_pars_in.max_isigma + grid_array_pars_in.max_sigma_ghost_zone_width)
+	// no comma
+	{
+	}
+
+	//*****************************************************************************
+
+	//
+	// This function sets up the gridfn storage arrays in a  grid_arrays  object.
+	//
+	void grid_arrays::setup_gridfn_storage(const gridfn_pars &gridfn_pars_in,
+										   const gridfn_pars &ghosted_gridfn_pars_in)
+	{
+		assert(gridfn_data_ == NULL);
+		gridfn_data_ = new jtutil::array3d<fp>(gridfn_pars_in.min_gfn,
+											   gridfn_pars_in.max_gfn,
+											   min_irho(), max_irho(),
+											   min_isigma(), max_isigma(),
+											   gridfn_pars_in.storage_array,
+											   gridfn_pars_in.gfn_stride,
+											   gridfn_pars_in.irho_stride,
+											   gridfn_pars_in.isigma_stride);
+
+		assert(ghosted_gridfn_data_ == NULL);
+		ghosted_gridfn_data_ = new jtutil::array3d<fp>(ghosted_gridfn_pars_in.min_gfn,
+													   ghosted_gridfn_pars_in.max_gfn,
+													   ghosted_min_irho(), ghosted_max_irho(),
+													   ghosted_min_isigma(), ghosted_max_isigma(),
+													   ghosted_gridfn_pars_in.storage_array,
+													   ghosted_gridfn_pars_in.gfn_stride,
+													   ghosted_gridfn_pars_in.irho_stride,
+													   ghosted_gridfn_pars_in.isigma_stride);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function destroys a  grid_arrays  object.
+	//
+	grid_arrays::~grid_arrays()
+	{
+		delete ghosted_gridfn_data_;
+		delete gridfn_data_;
+	}
+
+	//*****************************************************************************
+	//*****************************************************************************
+	//*****************************************************************************
+
+	//
+	// This function constructs a  grid  object.
+	//
+	grid::grid(const grid_array_pars &grid_array_pars_in,
+			   const grid_pars &grid_pars_in)
+
+		: grid_arrays(grid_array_pars_in),
+
+		  rho_map_(grid_array_pars_in.min_irho - grid_array_pars_in.min_rho_ghost_zone_width,
+				   grid_array_pars_in.max_irho + grid_array_pars_in.max_rho_ghost_zone_width,
+				   jtutil::radians_of_degrees(
+					   grid_pars_in.min_drho - grid_array_pars_in.min_rho_ghost_zone_width * grid_pars_in.delta_drho),
+				   jtutil::radians_of_degrees(grid_pars_in.delta_drho),
+				   jtutil::radians_of_degrees(
+					   grid_pars_in.max_drho + grid_array_pars_in.max_rho_ghost_zone_width * grid_pars_in.delta_drho)),
+
+		  sigma_map_(grid_array_pars_in.min_isigma - grid_array_pars_in.min_sigma_ghost_zone_width,
+					 grid_array_pars_in.max_isigma + grid_array_pars_in.max_sigma_ghost_zone_width,
+					 jtutil::radians_of_degrees(
+						 grid_pars_in.min_dsigma - grid_array_pars_in.min_sigma_ghost_zone_width * grid_pars_in.delta_dsigma),
+					 jtutil::radians_of_degrees(grid_pars_in.delta_dsigma),
+					 jtutil::radians_of_degrees(
+						 grid_pars_in.max_dsigma + grid_array_pars_in.max_sigma_ghost_zone_width * grid_pars_in.delta_dsigma)),
+
+		  min_rho_(jtutil::radians_of_degrees(grid_pars_in.min_drho)),
+		  max_rho_(jtutil::radians_of_degrees(grid_pars_in.max_drho)),
+		  min_sigma_(jtutil::radians_of_degrees(grid_pars_in.min_dsigma)),
+		  max_sigma_(jtutil::radians_of_degrees(grid_pars_in.max_dsigma))
+	// no comma
+	{
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/tgrid.h
+++ b/AMSS_NCKU_source/AHF_Direct/tgrid.h
--- a/AMSS_NCKU_source/AHF_Direct/util.h
+++ b/AMSS_NCKU_source/AHF_Direct/util.h
@@ -1,157 +1,157 @@
-#ifndef AHFINDERDIRECT__UTIL_HH
-#define AHFINDERDIRECT__UTIL_HH
-#ifdef newc
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <strstream>
-#include <cmath>
-using namespace std;
-#else
-#include <iostream.h>
-#include <iomanip.h>
-#include <fstream.h>
-#include <string.h>
-#include <math.h>
-#endif
-
-#define PI M_PI
-
-namespace AHFinderDirect
-{
-	namespace jtutil
-	{
-		inline int how_many_in_range(int low, int high) { return high - low + 1; }
-
-		inline int is_even(int i) { return !(i & 0x1); }
-		inline int is_odd(int i) { return (i & 0x1); }
-
-		template <typename T>
-		inline T tmin(T x, T y) { return (x < y) ? x : y; }
-		template <typename T>
-		inline T tmax(T x, T y) { return (x > y) ? x : y; }
-		template <typename T>
-		inline T abs(T x) { return (x > 0) ? x : -x; }
-
-		template <typename T>
-		inline T pow2(T x) { return x * x; }
-		template <typename T>
-		inline T pow3(T x) { return x * x * x; }
-		template <typename T>
-		inline T pow4(T x) { return pow2(pow2(x)); }
-
-		template <typename fp_t>
-		inline fp_t degrees_of_radians(fp_t radians) { return (180.0 / PI) * radians; }
-		template <typename fp_t>
-		inline fp_t radians_of_degrees(fp_t degrees) { return (PI / 180.0) * degrees; }
-
-		// in miscfp.cc
-		//-----------------------------------------------------
-		double signum(double x);
-		double hypot3(double x, double y, double z);
-		double arctan_xy(double x, double y);
-
-		double modulo_reduce(double x, double xmod, double xmin, double xmax);
-
-		template <typename fp_t>
-		void zero_C_array(int N, fp_t array[]);
-
-		// in error_exit.cc
-		// ------------------------------------------------------
-		int error_exit(int msg_level, const char *format, ...);
-
-		// in norm.cc
-		//
-		template <typename fp_t>
-		class norm
-		{
-		public:
-			// get norms etc
-			fp_t mean() const;
-			fp_t two_norm() const; // sqrt(sum x_i^2)
-			fp_t rms_norm() const; // sqrt(average of x_i^2)
-			fp_t infinity_norm() const { return max_abs_value_; }
-
-			fp_t max_abs_value() const { return max_abs_value_; }
-			fp_t min_abs_value() const { return min_abs_value_; }
-
-			fp_t max_value() const { return max_value_; }
-			fp_t min_value() const { return min_value_; }
-
-			// specify data point
-			void data(fp_t x);
-
-			// have any data points been specified?
-			bool is_empty() const { return N_ == 0; }
-			bool is_nonempty() const { return N_ > 0; }
-
-			// reset ==> just like newly-constructed object
-			void reset();
-
-			// constructor, destructor
-			// ... compiler-generated no-op destructor is ok
-			norm();
-
-		private:
-			// we forbid copying and passing by value
-			// by declaring the copy constructor and assignment operator
-			// private, but never defining them
-			norm(const norm &rhs);
-			norm &operator=(const norm &rhs);
-
-		private:
-			long N_;			 // # of data points
-			fp_t sum_;			 // sum(data)
-			fp_t sum2_;			 // sum(data^2)
-			fp_t max_abs_value_; // max |data|
-			fp_t min_abs_value_; // min |data|
-			fp_t max_value_;	 // max data
-			fp_t min_value_;	 // min data
-		};
-
-		// in fuzzy.cc
-		template <typename fp_t>
-		class fuzzy
-		{
-		public:
-			// comparison tolerance (may be modified by user code if needed)
-			static fp_t get_tolerance() { return tolerance_; }
-			static void set_tolerance(fp_t new_tolerance)
-			{
-				tolerance_ = new_tolerance;
-			}
-
-			// fuzzy commparisons
-			static bool EQ(fp_t x, fp_t y);
-			static bool NE(fp_t x, fp_t y) { return !EQ(x, y); }
-			static bool LT(fp_t x, fp_t y) { return EQ(x, y) ? false : (x < y); }
-			static bool LE(fp_t x, fp_t y) { return EQ(x, y) ? true : (x < y); }
-			static bool GT(fp_t x, fp_t y) { return EQ(x, y) ? false : (x > y); }
-			static bool GE(fp_t x, fp_t y) { return EQ(x, y) ? true : (x > y); }
-
-			static bool is_integer(fp_t x); // is x fuzzily an integer?
-			static int floor(fp_t x);		// round x fuzzily down to integer
-			static int ceiling(fp_t x);		// round x fuzzily up to integer
-
-		private:
-			// comparison tolerance
-			// ... must be explicitly initialized when instantiating
-			//     for a new <fp_t> type, see "fuzzy.cc" for details/examples
-			static fp_t tolerance_;
-		};
-
-		// in round.cc
-		template <typename fp_t>
-		class round
-		{
-		public:
-			static int to_integer(fp_t x); // round to nearest integer
-
-			static int floor(fp_t x);	// round down to integer
-			static int ceiling(fp_t x); // round up to integer
-		};
-
-	} // namespace jtutil
-} // namespace AHFinderDirect
-
-#endif /* AHFINDERDIRECT__UTIL_HH */
+#ifndef AHFINDERDIRECT__UTIL_HH
+#define AHFINDERDIRECT__UTIL_HH
+#ifdef newc
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <strstream>
+#include <cmath>
+using namespace std;
+#else
+#include <iostream.h>
+#include <iomanip.h>
+#include <fstream.h>
+#include <string.h>
+#include <math.h>
+#endif
+
+#define PI M_PI
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+		inline int how_many_in_range(int low, int high) { return high - low + 1; }
+
+		inline int is_even(int i) { return !(i & 0x1); }
+		inline int is_odd(int i) { return (i & 0x1); }
+
+		template <typename T>
+		inline T tmin(T x, T y) { return (x < y) ? x : y; }
+		template <typename T>
+		inline T tmax(T x, T y) { return (x > y) ? x : y; }
+		template <typename T>
+		inline T abs(T x) { return (x > 0) ? x : -x; }
+
+		template <typename T>
+		inline T pow2(T x) { return x * x; }
+		template <typename T>
+		inline T pow3(T x) { return x * x * x; }
+		template <typename T>
+		inline T pow4(T x) { return pow2(pow2(x)); }
+
+		template <typename fp_t>
+		inline fp_t degrees_of_radians(fp_t radians) { return (180.0 / PI) * radians; }
+		template <typename fp_t>
+		inline fp_t radians_of_degrees(fp_t degrees) { return (PI / 180.0) * degrees; }
+
+		// in miscfp.cc
+		//-----------------------------------------------------
+		double signum(double x);
+		double hypot3(double x, double y, double z);
+		double arctan_xy(double x, double y);
+
+		double modulo_reduce(double x, double xmod, double xmin, double xmax);
+
+		template <typename fp_t>
+		void zero_C_array(int N, fp_t array[]);
+
+		// in error_exit.cc
+		// ------------------------------------------------------
+		int error_exit(int msg_level, const char *format, ...);
+
+		// in norm.cc
+		//
+		template <typename fp_t>
+		class norm
+		{
+		public:
+			// get norms etc
+			fp_t mean() const;
+			fp_t two_norm() const; // sqrt(sum x_i^2)
+			fp_t rms_norm() const; // sqrt(average of x_i^2)
+			fp_t infinity_norm() const { return max_abs_value_; }
+
+			fp_t max_abs_value() const { return max_abs_value_; }
+			fp_t min_abs_value() const { return min_abs_value_; }
+
+			fp_t max_value() const { return max_value_; }
+			fp_t min_value() const { return min_value_; }
+
+			// specify data point
+			void data(fp_t x);
+
+			// have any data points been specified?
+			bool is_empty() const { return N_ == 0; }
+			bool is_nonempty() const { return N_ > 0; }
+
+			// reset ==> just like newly-constructed object
+			void reset();
+
+			// constructor, destructor
+			// ... compiler-generated no-op destructor is ok
+			norm();
+
+		private:
+			// we forbid copying and passing by value
+			// by declaring the copy constructor and assignment operator
+			// private, but never defining them
+			norm(const norm &rhs);
+			norm &operator=(const norm &rhs);
+
+		private:
+			long N_;			 // # of data points
+			fp_t sum_;			 // sum(data)
+			fp_t sum2_;			 // sum(data^2)
+			fp_t max_abs_value_; // max |data|
+			fp_t min_abs_value_; // min |data|
+			fp_t max_value_;	 // max data
+			fp_t min_value_;	 // min data
+		};
+
+		// in fuzzy.cc
+		template <typename fp_t>
+		class fuzzy
+		{
+		public:
+			// comparison tolerance (may be modified by user code if needed)
+			static fp_t get_tolerance() { return tolerance_; }
+			static void set_tolerance(fp_t new_tolerance)
+			{
+				tolerance_ = new_tolerance;
+			}
+
+			// fuzzy commparisons
+			static bool EQ(fp_t x, fp_t y);
+			static bool NE(fp_t x, fp_t y) { return !EQ(x, y); }
+			static bool LT(fp_t x, fp_t y) { return EQ(x, y) ? false : (x < y); }
+			static bool LE(fp_t x, fp_t y) { return EQ(x, y) ? true : (x < y); }
+			static bool GT(fp_t x, fp_t y) { return EQ(x, y) ? false : (x > y); }
+			static bool GE(fp_t x, fp_t y) { return EQ(x, y) ? true : (x > y); }
+
+			static bool is_integer(fp_t x); // is x fuzzily an integer?
+			static int floor(fp_t x);		// round x fuzzily down to integer
+			static int ceiling(fp_t x);		// round x fuzzily up to integer
+
+		private:
+			// comparison tolerance
+			// ... must be explicitly initialized when instantiating
+			//     for a new <fp_t> type, see "fuzzy.cc" for details/examples
+			static fp_t tolerance_;
+		};
+
+		// in round.cc
+		template <typename fp_t>
+		class round
+		{
+		public:
+			static int to_integer(fp_t x); // round to nearest integer
+
+			static int floor(fp_t x);	// round down to integer
+			static int ceiling(fp_t x); // round up to integer
+		};
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
+
+#endif /* AHFINDERDIRECT__UTIL_HH */
--- a/AMSS_NCKU_source/AHF_Direct/util_String.h
+++ b/AMSS_NCKU_source/AHF_Direct/util_String.h
@@ -1,45 +1,45 @@
-#ifndef _UTIL_STRING_H_
-#define _UTIL_STRING_H_ 1
-
-#include <stdarg.h>
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-    const char *Util_StrSep(const char **stringp,
-                            const char *delim);
-
-    int Util_SplitString(char **before,
-                         char **after,
-                         const char *string,
-                         const char *sep);
-
-    int Util_SplitFilename(char **dir,
-                           char **file,
-                           const char *string);
-
-    char *Util_Strdup(const char *s);
-
-    size_t Util_Strlcpy(char *dst, const char *src, size_t dst_size);
-    size_t Util_Strlcat(char *dst, const char *src, size_t dst_size);
-
-    int Util_StrCmpi(const char *string1,
-                     const char *string2);
-    int Util_StrMemCmpi(const char *string1,
-                        const char *string2,
-                        size_t len2);
-
-    int Util_vsnprintf(char *str, size_t count, const char *fmt, va_list args);
-    int Util_snprintf(char *str, size_t count, const char *fmt, ...);
-
-    int Util_asprintf(char **buffer, const char *fmt, ...);
-    int Util_asnprintf(char **buffer, size_t size, const char *fmt, ...);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _UTIL_STRING_H_ */
+#ifndef _UTIL_STRING_H_
+#define _UTIL_STRING_H_ 1
+
+#include <stdarg.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    const char *Util_StrSep(const char **stringp,
+                            const char *delim);
+
+    int Util_SplitString(char **before,
+                         char **after,
+                         const char *string,
+                         const char *sep);
+
+    int Util_SplitFilename(char **dir,
+                           char **file,
+                           const char *string);
+
+    char *Util_Strdup(const char *s);
+
+    size_t Util_Strlcpy(char *dst, const char *src, size_t dst_size);
+    size_t Util_Strlcat(char *dst, const char *src, size_t dst_size);
+
+    int Util_StrCmpi(const char *string1,
+                     const char *string2);
+    int Util_StrMemCmpi(const char *string1,
+                        const char *string2,
+                        size_t len2);
+
+    int Util_vsnprintf(char *str, size_t count, const char *fmt, va_list args);
+    int Util_snprintf(char *str, size_t count, const char *fmt, ...);
+
+    int Util_asprintf(char **buffer, const char *fmt, ...);
+    int Util_asnprintf(char **buffer, size_t size, const char *fmt, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UTIL_STRING_H_ */
--- a/AMSS_NCKU_source/AHF_Direct/util_Table.h
+++ b/AMSS_NCKU_source/AHF_Direct/util_Table.h
@@ -1,496 +1,496 @@
-#ifndef _UTIL_TABLE_H_
-#define _UTIL_TABLE_H_  1
-
-#include "cctk_Types.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/******************************************************************************/
-/***** Macros for Flags Word **************************************************/
-/******************************************************************************/
-
-/*
- * The hexadecimal forms are more convenient for thinking about
- * bitwise-oring, but alas Fortran 77 doesn't seem to support
- * hexadecimal constants, so we give the actual values in decimal.
- */
-
-/*@@
-  @defines      UTIL_TABLE_FLAGS_DEFAULT
-  @desc         flags-word macro: no flags set (default)
-  @@*/
-#define UTIL_TABLE_FLAGS_DEFAULT                0
-
-/*@@
-  @defines      UTIL_TABLE_FLAGS_CASE_INSENSITIVE
-  @desc         flags-word macro: key comparisons are case-insensitive
-  @@*/
-#define UTIL_TABLE_FLAGS_CASE_INSENSITIVE       1       /* 0x1 */
-
-/*@@
-  @defines      UTIL_TABLE_FLAGS_USER_DEFINED_BASE
-  @desc         flags-word macro: user-defined flags word bit masks
-                should use only this and higher bit positions (i.e.
-                all bit positions below this one are reserved for
-                current or future Cactus use)
-  @@*/
-#define UTIL_TABLE_FLAGS_USER_DEFINED_BASE      65536   /* 0x10000 */
-
-/******************************************************************************/
-/***** Error Codes ************************************************************/
-/******************************************************************************/
-
-/*
- * error codes specific to the table routines (between -100 and -199)
- */
-
-/*@@
-  @defines      UTIL_ERROR_TABLE_BAD_FLAGS
-  @desc         error return code: flags word is invalid
-  @@*/
-#define UTIL_ERROR_TABLE_BAD_FLAGS              (-100)
-
-/*@@
-  @defines      UTIL_ERROR_TABLE_BAD_KEY
-  @desc         error return code: key contains '/' character
-                                   or is otherwise invalid
-  @@*/
-#define UTIL_ERROR_TABLE_BAD_KEY                (-101)
-
-/*@@
-  @defines      UTIL_ERROR_TABLE_STRING_TRUNCATED
-  @desc         error return code: string was truncated to fit in buffer
-  @@*/
-#define UTIL_ERROR_TABLE_STRING_TRUNCATED       (-102)
-
-/*@@
-  @defines      UTIL_ERROR_TABLE_NO_SUCH_KEY
-  @desc         error return code: no such key in table
-  @@*/
-#define UTIL_ERROR_TABLE_NO_SUCH_KEY            (-103)
-
-/*@@
-  @defines      UTIL_ERROR_TABLE_WRONG_DATA_TYPE
-  @desc         error return code: value associated with this key
-                has the wrong data type for this function
-  @@*/
-#define UTIL_ERROR_TABLE_WRONG_DATA_TYPE        (-104)
-
-/*@@
-  @defines      UTIL_ERROR_TABLE_VALUE_IS_EMPTY
-  @desc         error return code: value associated with this key
-                is an empty (0-element) array
-  @@*/
-#define UTIL_ERROR_TABLE_VALUE_IS_EMPTY         (-105)
-
-/*@@
-  @defines      UTIL_ERROR_TABLE_ITERATOR_IS_NULL
-  @desc         error return code: table iterator is in "null-pointer" state
-  @@*/
-#define UTIL_ERROR_TABLE_ITERATOR_IS_NULL       (-106)
-
-/*@@
-  @defines      UTIL_ERROR_TABLE_NO_MIXED_TYPE_ARRAY
-  @desc         error return code: different array values have different
-                datatypes
-  @@*/
-#define UTIL_ERROR_TABLE_NO_MIXED_TYPE_ARRAY    (-107)
-
-
-/******************************************************************************/
-/***** Main Table API *********************************************************/
-/******************************************************************************/
-
-/* create/destroy */
-int Util_TableCreate(int flags);
-int Util_TableClone(int handle);
-int Util_TableDestroy(int handle);
-
-/* query */
-int Util_TableQueryFlags(int handle);
-int Util_TableQueryNKeys(int handle);
-int Util_TableQueryMaxKeyLength(int handle);
-int Util_TableQueryValueInfo(int handle,
-                             CCTK_INT *type_code, CCTK_INT *N_elements,
-                             const char *key);
-
-/* misc stuff */
-int Util_TableDeleteKey(int handle, const char *key);
-
-/* convenience routines to create and/or set from a "parameter-file" string */
-int Util_TableCreateFromString(const char string[]);
-int Util_TableSetFromString(int handle, const char string[]);
-
-/* set/get a C-style null-terminated character string */
-int Util_TableSetString(int handle,
-                        const char *string,
-                        const char *key);
-int Util_TableGetString(int handle,
-                        int buffer_length, char buffer[],
-                        const char *key);
-
-/* set/get generic types described by CCTK_VARIABLE_* type codes */
-int Util_TableSetGeneric(int handle,
-                         int type_code, const void *value_ptr,
-                         const char *key);
-int Util_TableSetGenericArray(int handle,
-                              int type_code, int N_elements, const void *array,
-                              const char *key);
-int Util_TableGetGeneric(int handle,
-                         int type_code, void *value_ptr,
-                         const char *key);
-int Util_TableGetGenericArray(int handle,
-                              int type_code, int N_elements, void *array,
-                              const char *key);
-
-/**************************************/
-
-/*
- * set routines
- */
-
-/* pointers */
-int Util_TableSetPointer(int handle, CCTK_POINTER value, const char *key);
-int Util_TableSetPointerToConst(int handle,
-                                CCTK_POINTER_TO_CONST value,
-                                const char *key);
-int Util_TableSetFPointer(int handle, CCTK_FPOINTER value, const char *key);
-/*
- * ... the following function (an alias for the previous one) is for
- *     backwards compatability only, and is deprecated as of 4.0beta13
- */
-int Util_TableSetFnPointer(int handle, CCTK_FPOINTER value, const char *key);
-
-/* a single character */
-int Util_TableSetChar(int handle, CCTK_CHAR value, const char *key);
-
-/* integers */
-int Util_TableSetByte(int handle, CCTK_BYTE value, const char *key);
-int Util_TableSetInt(int handle, CCTK_INT value, const char *key);
-#ifdef HAVE_CCTK_INT1
-int Util_TableSetInt1(int handle, CCTK_INT1 value, const char *key);
-#endif
-#ifdef HAVE_CCTK_INT2
-int Util_TableSetInt2(int handle, CCTK_INT2 value, const char *key);
-#endif
-#ifdef HAVE_CCTK_INT4
-int Util_TableSetInt4(int handle, CCTK_INT4 value, const char *key);
-#endif
-#ifdef HAVE_CCTK_INT8
-int Util_TableSetInt8(int handle, CCTK_INT8 value, const char *key);
-#endif
-
-/* real numbers */
-int Util_TableSetReal(int handle, CCTK_REAL value, const char *key);
-#ifdef HAVE_CCTK_REAL4
-int Util_TableSetReal4(int handle, CCTK_REAL4 value, const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL8
-int Util_TableSetReal8(int handle, CCTK_REAL8 value, const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL16
-int Util_TableSetReal16(int handle, CCTK_REAL16 value, const char *key);
-#endif
-
-/* complex numbers */
-int Util_TableSetComplex(int handle, CCTK_COMPLEX value, const char *key);
-#ifdef HAVE_CCTK_REAL4
-int Util_TableSetComplex8(int handle, CCTK_COMPLEX8 value, const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL8
-int Util_TableSetComplex16(int handle, CCTK_COMPLEX16 value, const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL16
-int Util_TableSetComplex32(int handle, CCTK_COMPLEX32 value, const char *key);
-#endif
-
-/**************************************/
-
-/* arrays of pointers */
-int Util_TableSetPointerArray(int handle,
-                              int N_elements, const CCTK_POINTER array[],
-                              const char *key);
-int Util_TableSetPointerToConstArray(int handle,
-                                     int N_elements,
-                                     const CCTK_POINTER_TO_CONST array[],
-                                     const char *key);
-int Util_TableSetFPointerArray(int handle,
-                               int N_elements, const CCTK_FPOINTER array[],
-                               const char *key);
-/*
- * ... the following function (an alias for the previous one) is for
- *     backwards compatability only, and is deprecated as of 4.0beta13
- */
-int Util_TableSetFnPointerArray(int handle,
-                                int N_elements, const CCTK_FPOINTER array[],
-                                const char *key);
-
-/* arrays of characters (i.e. character strings with known length) */
-/* note null termination is *not* required or enforced */
-int Util_TableSetCharArray(int handle,
-                           int N_elements, const CCTK_CHAR array[],
-                           const char *key);
-
-/* arrays of integers */
-int Util_TableSetByteArray(int handle,
-                           int N_elements, const CCTK_BYTE array[],
-                           const char *key);
-int Util_TableSetIntArray(int handle,
-                          int N_elements, const CCTK_INT array[],
-                          const char *key);
-#ifdef HAVE_CCTK_INT1
-int Util_TableSetInt1Array(int handle,
-                           int N_elements, const CCTK_INT1 array[],
-                           const char *key);
-#endif
-#ifdef HAVE_CCTK_INT2
-int Util_TableSetInt2Array(int handle,
-                           int N_elements, const CCTK_INT2 array[],
-                           const char *key);
-#endif
-#ifdef HAVE_CCTK_INT4
-int Util_TableSetInt4Array(int handle,
-                           int N_elements, const CCTK_INT4 array[],
-                           const char *key);
-#endif
-#ifdef HAVE_CCTK_INT8
-int Util_TableSetInt8Array(int handle,
-                           int N_elements, const CCTK_INT8 array[],
-                           const char *key);
-#endif
-
-/* arrays of real numbers */
-int Util_TableSetRealArray(int handle,
-                           int N_elements, const CCTK_REAL array[],
-                           const char *key);
-#ifdef HAVE_CCTK_REAL4
-int Util_TableSetReal4Array(int handle,
-                            int N_elements, const CCTK_REAL4 array[],
-                            const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL8
-int Util_TableSetReal8Array(int handle,
-                            int N_elements, const CCTK_REAL8 array[],
-                            const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL16
-int Util_TableSetReal16Array(int handle,
-                             int N_elements, const CCTK_REAL16 array[],
-                             const char *key);
-#endif
-
-/* arrays of complex numbers */
-int Util_TableSetComplexArray(int handle,
-                              int N_elements, const CCTK_COMPLEX array[],
-                              const char *key);
-#ifdef HAVE_CCTK_REAL4
-int Util_TableSetComplex8Array(int handle,
-                               int N_elements, const CCTK_COMPLEX8 array[],
-                               const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL8
-int Util_TableSetComplex16Array(int handle,
-                                int N_elements, const CCTK_COMPLEX16 array[],
-                                const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL16
-int Util_TableSetComplex32Array(int handle,
-                                int N_elements, const CCTK_COMPLEX32 array[],
-                                const char *key);
-#endif
-
-/**************************************/
-
-/*
- * get routines
- */
-
-/* pointers */
-int Util_TableGetPointer(int handle, CCTK_POINTER *value, const char *key);
-int Util_TableGetPointerToConst(int handle,
-                                CCTK_POINTER_TO_CONST *value,
-                                const char *key);
-
-int Util_TableGetFPointer(int handle, CCTK_FPOINTER *value, const char *key);
-/*
- * ... the following function (an alias for the previous one) is for
- *     backwards compatability only, and is deprecated as of 4.0beta13
- */
-int Util_TableGetFnPointer(int handle, CCTK_FPOINTER *value, const char *key);
-
-/* a single character */
-int Util_TableGetChar(int handle, CCTK_CHAR *value, const char *key);
-
-/* integers */
-int Util_TableGetByte(int handle, CCTK_BYTE *value, const char *key);
-int Util_TableGetInt(int handle, CCTK_INT *value, const char *key);
-#ifdef HAVE_CCTK_INT1
-int Util_TableGetInt1(int handle, CCTK_INT1 *value, const char *key);
-#endif
-#ifdef HAVE_CCTK_INT2
-int Util_TableGetInt2(int handle, CCTK_INT2 *value, const char *key);
-#endif
-#ifdef HAVE_CCTK_INT4
-int Util_TableGetInt4(int handle, CCTK_INT4 *value, const char *key);
-#endif
-#ifdef HAVE_CCTK_INT8
-int Util_TableGetInt8(int handle, CCTK_INT8 *value, const char *key);
-#endif
-
-/* real numbers */
-int Util_TableGetReal(int handle, CCTK_REAL *value, const char *key);
-#ifdef HAVE_CCTK_REAL4
-int Util_TableGetReal4(int handle, CCTK_REAL4 *value, const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL8
-int Util_TableGetReal8(int handle, CCTK_REAL8 *value, const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL16
-int Util_TableGetReal16(int handle, CCTK_REAL16 *value, const char *key);
-#endif
-
-/* complex numbers */
-int Util_TableGetComplex(int handle, CCTK_COMPLEX *value, const char *key);
-#ifdef HAVE_CCTK_REAL4
-int Util_TableGetComplex8(int handle, CCTK_COMPLEX8 *value, const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL8
-int Util_TableGetComplex16(int handle, CCTK_COMPLEX16 *value, const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL16
-int Util_TableGetComplex32(int handle, CCTK_COMPLEX32 *value, const char *key);
-#endif
-
-/**************************************/
-
-/* arrays of pointers */
-int Util_TableGetPointerArray(int handle,
-                              int N_elements, CCTK_POINTER array[],
-                              const char *key);
-int Util_TableGetPointerToConstArray(int handle,
-                                     int N_elements,
-                                     CCTK_POINTER_TO_CONST array[],
-                                     const char *key);
-
-int Util_TableGetFPointerArray(int handle,
-                               int N_elements, CCTK_FPOINTER array[],
-                               const char *key);
-/*
- * ... the following function (an alias for the previous one) is for
- *     backwards compatability only, and is deprecated as of 4.0beta13
- */
-int Util_TableGetFnPointerArray(int handle,
-                                int N_elements, CCTK_FPOINTER array[],
-                                const char *key);
-
-/* arrays of characters (i.e. character strings of known length) */
-/* note null termination is *not* required or enforced */
-int Util_TableGetCharArray(int handle,
-                           int N_elements, CCTK_CHAR array[],
-                           const char *key);
-
-/* integers */
-int Util_TableGetByteArray(int handle,
-                           int N_elements, CCTK_BYTE array[],
-                           const char *key);
-int Util_TableGetIntArray(int handle,
-                          int N_elements, CCTK_INT array[],
-                          const char *key);
-#ifdef HAVE_CCTK_INT1
-int Util_TableGetInt1Array(int handle,
-                           int N_elements, CCTK_INT1 array[],
-                           const char *key);
-#endif
-#ifdef HAVE_CCTK_INT2
-int Util_TableGetInt2Array(int handle,
-                           int N_elements, CCTK_INT2 array[],
-                           const char *key);
-#endif
-#ifdef HAVE_CCTK_INT4
-int Util_TableGetInt4Array(int handle,
-                           int N_elements, CCTK_INT4 array[],
-                           const char *key);
-#endif
-#ifdef HAVE_CCTK_INT8
-int Util_TableGetInt8Array(int handle,
-                           int N_elements, CCTK_INT8 array[],
-                           const char *key);
-#endif
-
-/* real numbers */
-int Util_TableGetRealArray(int handle,
-                           int N_elements, CCTK_REAL array[],
-                           const char *key);
-#ifdef HAVE_CCTK_REAL4
-int Util_TableGetReal4Array(int handle,
-                            int N_elements, CCTK_REAL4 array[],
-                            const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL8
-int Util_TableGetReal8Array(int handle,
-                            int N_elements, CCTK_REAL8 array[],
-                            const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL16
-int Util_TableGetReal16Array(int handle,
-                             int N_elements, CCTK_REAL16 array[],
-                             const char *key);
-#endif
-
-/* complex numbers */
-int Util_TableGetComplexArray(int handle,
-                              int N_elements, CCTK_COMPLEX array[],
-                              const char *key);
-#ifdef HAVE_CCTK_REAL4
-int Util_TableGetComplex8Array(int handle,
-                               int N_elements, CCTK_COMPLEX8 array[],
-                               const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL8
-int Util_TableGetComplex16Array(int handle,
-                                int N_elements, CCTK_COMPLEX16 array[],
-                                const char *key);
-#endif
-#ifdef HAVE_CCTK_REAL16
-int Util_TableGetComplex32Array(int handle,
-                                int N_elements, CCTK_COMPLEX32 array[],
-                                const char *key);
-#endif
-
-/******************************************************************************/
-/***** Table Iterator API *****************************************************/
-/******************************************************************************/
-
-/* create/destroy */
-int Util_TableItCreate(int handle);
-int Util_TableItClone(int ihandle);
-int Util_TableItDestroy(int ihandle);
-
-/* test for "null-pointer" state */
-int Util_TableItQueryIsNull(int ihandle);
-int Util_TableItQueryIsNonNull(int ihandle);
-
-/* query what the iterator points to */
-int Util_TableItQueryTableHandle(int ihandle);
-int Util_TableItQueryKeyValueInfo(int ihandle,
-                                  int key_buffer_length, char key_buffer[],
-                                  CCTK_INT *type_code, CCTK_INT *N_elements);
-
-/* change value of iterator */
-int Util_TableItAdvance(int ihandle);
-int Util_TableItResetToStart(int ihandle);
-int Util_TableItSetToNull(int ihandle);
-int Util_TableItSetToKey(int ihandle, const char *key);
-
-/******************************************************************************/
-/******************************************************************************/
-/******************************************************************************/
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* _UTIL_TABLE_H_ */
+#ifndef _UTIL_TABLE_H_
+#define _UTIL_TABLE_H_  1
+
+#include "cctk_Types.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/******************************************************************************/
+/***** Macros for Flags Word **************************************************/
+/******************************************************************************/
+
+/*
+ * The hexadecimal forms are more convenient for thinking about
+ * bitwise-oring, but alas Fortran 77 doesn't seem to support
+ * hexadecimal constants, so we give the actual values in decimal.
+ */
+
+/*@@
+  @defines      UTIL_TABLE_FLAGS_DEFAULT
+  @desc         flags-word macro: no flags set (default)
+  @@*/
+#define UTIL_TABLE_FLAGS_DEFAULT                0
+
+/*@@
+  @defines      UTIL_TABLE_FLAGS_CASE_INSENSITIVE
+  @desc         flags-word macro: key comparisons are case-insensitive
+  @@*/
+#define UTIL_TABLE_FLAGS_CASE_INSENSITIVE       1       /* 0x1 */
+
+/*@@
+  @defines      UTIL_TABLE_FLAGS_USER_DEFINED_BASE
+  @desc         flags-word macro: user-defined flags word bit masks
+                should use only this and higher bit positions (i.e.
+                all bit positions below this one are reserved for
+                current or future Cactus use)
+  @@*/
+#define UTIL_TABLE_FLAGS_USER_DEFINED_BASE      65536   /* 0x10000 */
+
+/******************************************************************************/
+/***** Error Codes ************************************************************/
+/******************************************************************************/
+
+/*
+ * error codes specific to the table routines (between -100 and -199)
+ */
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_BAD_FLAGS
+  @desc         error return code: flags word is invalid
+  @@*/
+#define UTIL_ERROR_TABLE_BAD_FLAGS              (-100)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_BAD_KEY
+  @desc         error return code: key contains '/' character
+                                   or is otherwise invalid
+  @@*/
+#define UTIL_ERROR_TABLE_BAD_KEY                (-101)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_STRING_TRUNCATED
+  @desc         error return code: string was truncated to fit in buffer
+  @@*/
+#define UTIL_ERROR_TABLE_STRING_TRUNCATED       (-102)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_NO_SUCH_KEY
+  @desc         error return code: no such key in table
+  @@*/
+#define UTIL_ERROR_TABLE_NO_SUCH_KEY            (-103)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_WRONG_DATA_TYPE
+  @desc         error return code: value associated with this key
+                has the wrong data type for this function
+  @@*/
+#define UTIL_ERROR_TABLE_WRONG_DATA_TYPE        (-104)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_VALUE_IS_EMPTY
+  @desc         error return code: value associated with this key
+                is an empty (0-element) array
+  @@*/
+#define UTIL_ERROR_TABLE_VALUE_IS_EMPTY         (-105)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_ITERATOR_IS_NULL
+  @desc         error return code: table iterator is in "null-pointer" state
+  @@*/
+#define UTIL_ERROR_TABLE_ITERATOR_IS_NULL       (-106)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_NO_MIXED_TYPE_ARRAY
+  @desc         error return code: different array values have different
+                datatypes
+  @@*/
+#define UTIL_ERROR_TABLE_NO_MIXED_TYPE_ARRAY    (-107)
+
+
+/******************************************************************************/
+/***** Main Table API *********************************************************/
+/******************************************************************************/
+
+/* create/destroy */
+int Util_TableCreate(int flags);
+int Util_TableClone(int handle);
+int Util_TableDestroy(int handle);
+
+/* query */
+int Util_TableQueryFlags(int handle);
+int Util_TableQueryNKeys(int handle);
+int Util_TableQueryMaxKeyLength(int handle);
+int Util_TableQueryValueInfo(int handle,
+                             CCTK_INT *type_code, CCTK_INT *N_elements,
+                             const char *key);
+
+/* misc stuff */
+int Util_TableDeleteKey(int handle, const char *key);
+
+/* convenience routines to create and/or set from a "parameter-file" string */
+int Util_TableCreateFromString(const char string[]);
+int Util_TableSetFromString(int handle, const char string[]);
+
+/* set/get a C-style null-terminated character string */
+int Util_TableSetString(int handle,
+                        const char *string,
+                        const char *key);
+int Util_TableGetString(int handle,
+                        int buffer_length, char buffer[],
+                        const char *key);
+
+/* set/get generic types described by CCTK_VARIABLE_* type codes */
+int Util_TableSetGeneric(int handle,
+                         int type_code, const void *value_ptr,
+                         const char *key);
+int Util_TableSetGenericArray(int handle,
+                              int type_code, int N_elements, const void *array,
+                              const char *key);
+int Util_TableGetGeneric(int handle,
+                         int type_code, void *value_ptr,
+                         const char *key);
+int Util_TableGetGenericArray(int handle,
+                              int type_code, int N_elements, void *array,
+                              const char *key);
+
+/**************************************/
+
+/*
+ * set routines
+ */
+
+/* pointers */
+int Util_TableSetPointer(int handle, CCTK_POINTER value, const char *key);
+int Util_TableSetPointerToConst(int handle,
+                                CCTK_POINTER_TO_CONST value,
+                                const char *key);
+int Util_TableSetFPointer(int handle, CCTK_FPOINTER value, const char *key);
+/*
+ * ... the following function (an alias for the previous one) is for
+ *     backwards compatability only, and is deprecated as of 4.0beta13
+ */
+int Util_TableSetFnPointer(int handle, CCTK_FPOINTER value, const char *key);
+
+/* a single character */
+int Util_TableSetChar(int handle, CCTK_CHAR value, const char *key);
+
+/* integers */
+int Util_TableSetByte(int handle, CCTK_BYTE value, const char *key);
+int Util_TableSetInt(int handle, CCTK_INT value, const char *key);
+#ifdef HAVE_CCTK_INT1
+int Util_TableSetInt1(int handle, CCTK_INT1 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT2
+int Util_TableSetInt2(int handle, CCTK_INT2 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT4
+int Util_TableSetInt4(int handle, CCTK_INT4 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT8
+int Util_TableSetInt8(int handle, CCTK_INT8 value, const char *key);
+#endif
+
+/* real numbers */
+int Util_TableSetReal(int handle, CCTK_REAL value, const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableSetReal4(int handle, CCTK_REAL4 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableSetReal8(int handle, CCTK_REAL8 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableSetReal16(int handle, CCTK_REAL16 value, const char *key);
+#endif
+
+/* complex numbers */
+int Util_TableSetComplex(int handle, CCTK_COMPLEX value, const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableSetComplex8(int handle, CCTK_COMPLEX8 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableSetComplex16(int handle, CCTK_COMPLEX16 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableSetComplex32(int handle, CCTK_COMPLEX32 value, const char *key);
+#endif
+
+/**************************************/
+
+/* arrays of pointers */
+int Util_TableSetPointerArray(int handle,
+                              int N_elements, const CCTK_POINTER array[],
+                              const char *key);
+int Util_TableSetPointerToConstArray(int handle,
+                                     int N_elements,
+                                     const CCTK_POINTER_TO_CONST array[],
+                                     const char *key);
+int Util_TableSetFPointerArray(int handle,
+                               int N_elements, const CCTK_FPOINTER array[],
+                               const char *key);
+/*
+ * ... the following function (an alias for the previous one) is for
+ *     backwards compatability only, and is deprecated as of 4.0beta13
+ */
+int Util_TableSetFnPointerArray(int handle,
+                                int N_elements, const CCTK_FPOINTER array[],
+                                const char *key);
+
+/* arrays of characters (i.e. character strings with known length) */
+/* note null termination is *not* required or enforced */
+int Util_TableSetCharArray(int handle,
+                           int N_elements, const CCTK_CHAR array[],
+                           const char *key);
+
+/* arrays of integers */
+int Util_TableSetByteArray(int handle,
+                           int N_elements, const CCTK_BYTE array[],
+                           const char *key);
+int Util_TableSetIntArray(int handle,
+                          int N_elements, const CCTK_INT array[],
+                          const char *key);
+#ifdef HAVE_CCTK_INT1
+int Util_TableSetInt1Array(int handle,
+                           int N_elements, const CCTK_INT1 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT2
+int Util_TableSetInt2Array(int handle,
+                           int N_elements, const CCTK_INT2 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT4
+int Util_TableSetInt4Array(int handle,
+                           int N_elements, const CCTK_INT4 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT8
+int Util_TableSetInt8Array(int handle,
+                           int N_elements, const CCTK_INT8 array[],
+                           const char *key);
+#endif
+
+/* arrays of real numbers */
+int Util_TableSetRealArray(int handle,
+                           int N_elements, const CCTK_REAL array[],
+                           const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableSetReal4Array(int handle,
+                            int N_elements, const CCTK_REAL4 array[],
+                            const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableSetReal8Array(int handle,
+                            int N_elements, const CCTK_REAL8 array[],
+                            const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableSetReal16Array(int handle,
+                             int N_elements, const CCTK_REAL16 array[],
+                             const char *key);
+#endif
+
+/* arrays of complex numbers */
+int Util_TableSetComplexArray(int handle,
+                              int N_elements, const CCTK_COMPLEX array[],
+                              const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableSetComplex8Array(int handle,
+                               int N_elements, const CCTK_COMPLEX8 array[],
+                               const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableSetComplex16Array(int handle,
+                                int N_elements, const CCTK_COMPLEX16 array[],
+                                const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableSetComplex32Array(int handle,
+                                int N_elements, const CCTK_COMPLEX32 array[],
+                                const char *key);
+#endif
+
+/**************************************/
+
+/*
+ * get routines
+ */
+
+/* pointers */
+int Util_TableGetPointer(int handle, CCTK_POINTER *value, const char *key);
+int Util_TableGetPointerToConst(int handle,
+                                CCTK_POINTER_TO_CONST *value,
+                                const char *key);
+
+int Util_TableGetFPointer(int handle, CCTK_FPOINTER *value, const char *key);
+/*
+ * ... the following function (an alias for the previous one) is for
+ *     backwards compatability only, and is deprecated as of 4.0beta13
+ */
+int Util_TableGetFnPointer(int handle, CCTK_FPOINTER *value, const char *key);
+
+/* a single character */
+int Util_TableGetChar(int handle, CCTK_CHAR *value, const char *key);
+
+/* integers */
+int Util_TableGetByte(int handle, CCTK_BYTE *value, const char *key);
+int Util_TableGetInt(int handle, CCTK_INT *value, const char *key);
+#ifdef HAVE_CCTK_INT1
+int Util_TableGetInt1(int handle, CCTK_INT1 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT2
+int Util_TableGetInt2(int handle, CCTK_INT2 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT4
+int Util_TableGetInt4(int handle, CCTK_INT4 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT8
+int Util_TableGetInt8(int handle, CCTK_INT8 *value, const char *key);
+#endif
+
+/* real numbers */
+int Util_TableGetReal(int handle, CCTK_REAL *value, const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableGetReal4(int handle, CCTK_REAL4 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableGetReal8(int handle, CCTK_REAL8 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableGetReal16(int handle, CCTK_REAL16 *value, const char *key);
+#endif
+
+/* complex numbers */
+int Util_TableGetComplex(int handle, CCTK_COMPLEX *value, const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableGetComplex8(int handle, CCTK_COMPLEX8 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableGetComplex16(int handle, CCTK_COMPLEX16 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableGetComplex32(int handle, CCTK_COMPLEX32 *value, const char *key);
+#endif
+
+/**************************************/
+
+/* arrays of pointers */
+int Util_TableGetPointerArray(int handle,
+                              int N_elements, CCTK_POINTER array[],
+                              const char *key);
+int Util_TableGetPointerToConstArray(int handle,
+                                     int N_elements,
+                                     CCTK_POINTER_TO_CONST array[],
+                                     const char *key);
+
+int Util_TableGetFPointerArray(int handle,
+                               int N_elements, CCTK_FPOINTER array[],
+                               const char *key);
+/*
+ * ... the following function (an alias for the previous one) is for
+ *     backwards compatability only, and is deprecated as of 4.0beta13
+ */
+int Util_TableGetFnPointerArray(int handle,
+                                int N_elements, CCTK_FPOINTER array[],
+                                const char *key);
+
+/* arrays of characters (i.e. character strings of known length) */
+/* note null termination is *not* required or enforced */
+int Util_TableGetCharArray(int handle,
+                           int N_elements, CCTK_CHAR array[],
+                           const char *key);
+
+/* integers */
+int Util_TableGetByteArray(int handle,
+                           int N_elements, CCTK_BYTE array[],
+                           const char *key);
+int Util_TableGetIntArray(int handle,
+                          int N_elements, CCTK_INT array[],
+                          const char *key);
+#ifdef HAVE_CCTK_INT1
+int Util_TableGetInt1Array(int handle,
+                           int N_elements, CCTK_INT1 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT2
+int Util_TableGetInt2Array(int handle,
+                           int N_elements, CCTK_INT2 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT4
+int Util_TableGetInt4Array(int handle,
+                           int N_elements, CCTK_INT4 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT8
+int Util_TableGetInt8Array(int handle,
+                           int N_elements, CCTK_INT8 array[],
+                           const char *key);
+#endif
+
+/* real numbers */
+int Util_TableGetRealArray(int handle,
+                           int N_elements, CCTK_REAL array[],
+                           const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableGetReal4Array(int handle,
+                            int N_elements, CCTK_REAL4 array[],
+                            const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableGetReal8Array(int handle,
+                            int N_elements, CCTK_REAL8 array[],
+                            const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableGetReal16Array(int handle,
+                             int N_elements, CCTK_REAL16 array[],
+                             const char *key);
+#endif
+
+/* complex numbers */
+int Util_TableGetComplexArray(int handle,
+                              int N_elements, CCTK_COMPLEX array[],
+                              const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableGetComplex8Array(int handle,
+                               int N_elements, CCTK_COMPLEX8 array[],
+                               const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableGetComplex16Array(int handle,
+                                int N_elements, CCTK_COMPLEX16 array[],
+                                const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableGetComplex32Array(int handle,
+                                int N_elements, CCTK_COMPLEX32 array[],
+                                const char *key);
+#endif
+
+/******************************************************************************/
+/***** Table Iterator API *****************************************************/
+/******************************************************************************/
+
+/* create/destroy */
+int Util_TableItCreate(int handle);
+int Util_TableItClone(int ihandle);
+int Util_TableItDestroy(int ihandle);
+
+/* test for "null-pointer" state */
+int Util_TableItQueryIsNull(int ihandle);
+int Util_TableItQueryIsNonNull(int ihandle);
+
+/* query what the iterator points to */
+int Util_TableItQueryTableHandle(int ihandle);
+int Util_TableItQueryKeyValueInfo(int ihandle,
+                                  int key_buffer_length, char key_buffer[],
+                                  CCTK_INT *type_code, CCTK_INT *N_elements);
+
+/* change value of iterator */
+int Util_TableItAdvance(int ihandle);
+int Util_TableItResetToStart(int ihandle);
+int Util_TableItSetToNull(int ihandle);
+int Util_TableItSetToKey(int ihandle, const char *key);
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _UTIL_TABLE_H_ */
--- a/AMSS_NCKU_source/BSSN/adm_constraint.f90
+++ b/AMSS_NCKU_source/BSSN/adm_constraint.f90
@@ -1,382 +1,382 @@
-
-!-------------------------------------------------------------------------------!
-! computed constraint for ADM formalism                                         !
-!-------------------------------------------------------------------------------!
-  subroutine constraint_adm(ex, X, Y, Z,&
-               dxx,gxy,gxz,dyy,gyz,dzz, &
-               Kxx,Kxy,Kxz,Kyy,Kyz,Kzz, &
-               Lap,Sfx,Sfy,Sfz,rho,Sx,Sy,Sz,&
-               ham_Res, movx_Res, movy_Res, movz_Res, &
-               Symmetry)
-
-  implicit none
-!~~~~~~> Input parameters:
-
-  integer,intent(in ):: ex(1:3),symmetry
-  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Kxx,Kxy,Kxz,Kyy,Kyz,Kzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Lap,Sfx,Sfy,Sfz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: rho,Sx,Sy,Sz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: ham_Res, movx_Res, movy_Res, movz_Res
-!~~~~~~> Other variables:
-!  inverse metric
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
-! first order derivative of metric, @_k g_ij
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxx,gxyx,gxzx
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyx,gyzx,gzzx
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxy,gxyy,gxzy
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyy,gyzy,gzzy
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxz,gxyz,gxzz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyz,gyzz,gzzz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz,trK,fx,fy,fz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamxxx, Gamxxy, Gamxxz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamxyy, Gamxyz, Gamxzz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamyxx, Gamyxy, Gamyxz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamyyy, Gamyyz, Gamyzz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamzxx, Gamzxy, Gamzxz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamzyy, Gamzyz, Gamzzz
-
-  integer, parameter :: NO_SYMM = 0, EQUATORIAL = 1, OCTANT = 2
-  real*8, parameter :: ZERO = 0.D0, HALF = 0.5d0, ONE = 1.d0, TWO = 2.d0, FOUR = 4.d0
-  real*8, parameter :: F2o3 = 2.d0/3.d0, F8 = 8.d0, F16 = 1.6d1, SIX = 6.d0
-  real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
-  real*8            :: PI
-
-  call adm_ricci_gamma(ex, X, Y, Z,                        &
-               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz,&
-               Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
-               Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
-               Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
-               Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
-               Symmetry)
-
-  PI = dacos(-ONE)
-
-  gxx = dxx + ONE
-  gyy = dyy + ONE
-  gzz = dzz + ONE
-! invert metric
-  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
-           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
-  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
-  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
-  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
-  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
-  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
-  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
-
-  trK =          gupxx * Kxx + gupyy * Kyy + gupzz * Kzz &
-        + TWO * (gupxy * Kxy + gupxz * Kxz + gupyz * Kyz)
-
-! ham_Res = trR + K^2 - K_ij * K^ij - 16 * PI * rho
-  ham_Res =   gupxx * Rxx + gupyy * Ryy + gupzz * Rzz + &
-        TWO* ( gupxy * Rxy + gupxz * Rxz + gupyz * Ryz )
-
-  ham_Res = ham_Res + trK * trK -(&
-       gupxx * ( &
-       gupxx * Kxx * Kxx + gupyy * Kxy * Kxy + gupzz * Kxz * Kxz + &
-       TWO * (gupxy * Kxx * Kxy + gupxz * Kxx * Kxz + gupyz * Kxy * Kxz) ) + &
-       gupyy * ( &
-       gupxx * Kxy * Kxy + gupyy * Kyy * Kyy + gupzz * Kyz * Kyz + &
-       TWO * (gupxy * Kxy * Kyy + gupxz * Kxy * Kyz + gupyz * Kyy * Kyz) ) + &
-       gupzz * ( &
-       gupxx * Kxz * Kxz + gupyy * Kyz * Kyz + gupzz * Kzz * Kzz + &
-       TWO * (gupxy * Kxz * Kyz + gupxz * Kxz * Kzz + gupyz * Kyz * Kzz) ) + &
-       TWO * ( &
-       gupxy * ( &
-       gupxx * Kxx * Kxy + gupyy * Kxy * Kyy + gupzz * Kxz * Kyz + &
-       gupxy * (Kxx * Kyy + Kxy * Kxy) + &
-       gupxz * (Kxx * Kyz + Kxz * Kxy) + &
-       gupyz * (Kxy * Kyz + Kxz * Kyy) ) + &
-       gupxz * ( &
-       gupxx * Kxx * Kxz + gupyy * Kxy * Kyz + gupzz * Kxz * Kzz + &
-       gupxy * (Kxx * Kyz + Kxy * Kxz) + &
-       gupxz * (Kxx * Kzz + Kxz * Kxz) + &
-       gupyz * (Kxy * Kzz + Kxz * Kyz) ) + &
-       gupyz * ( &
-       gupxx * Kxy * Kxz + gupyy * Kyy * Kyz + gupzz * Kyz * Kzz + &
-       gupxy * (Kxy * Kyz + Kyy * Kxz) + &
-       gupxz * (Kxy * Kzz + Kyz * Kxz) + &
-       gupyz * (Kyy * Kzz + Kyz * Kyz) ) ))- F16 * PI * rho
-
-! mov_Res_j = gupkj*D_k K_ij - d_j trK - 8 PI s_j where D respect to physical metric
-! store D_i K_jk
-  call fderivs(ex,Kxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
-  call fderivs(ex,Kxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0)
-  call fderivs(ex,Kxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0)
-  call fderivs(ex,Kyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
-  call fderivs(ex,Kyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0)
-  call fderivs(ex,Kzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
-
-  gxxx = gxxx - (  Gamxxx * Kxx + Gamyxx * Kxy + Gamzxx * Kxz &
-                 + Gamxxx * Kxx + Gamyxx * Kxy + Gamzxx * Kxz)
-  gxyx = gxyx - (  Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz &
-                 + Gamxxx * Kxy + Gamyxx * Kyy + Gamzxx * Kyz)
-  gxzx = gxzx - (  Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz &
-                 + Gamxxx * Kxz + Gamyxx * Kyz + Gamzxx * Kzz)
-  gyyx = gyyx - (  Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz &
-                 + Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz)
-  gyzx = gyzx - (  Gamxxz * Kxy + Gamyxz * Kyy + Gamzxz * Kyz &
-                 + Gamxxy * Kxz + Gamyxy * Kyz + Gamzxy * Kzz)
-  gzzx = gzzx - (  Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz &
-                 + Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz)
-  gxxy = gxxy - (  Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz &
-                 + Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz)
-  gxyy = gxyy - (  Gamxyy * Kxx + Gamyyy * Kxy + Gamzyy * Kxz &
-                 + Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz)
-  gxzy = gxzy - (  Gamxyz * Kxx + Gamyyz * Kxy + Gamzyz * Kxz &
-                 + Gamxxy * Kxz + Gamyxy * Kyz + Gamzxy * Kzz)
-  gyyy = gyyy - (  Gamxyy * Kxy + Gamyyy * Kyy + Gamzyy * Kyz &
-                 + Gamxyy * Kxy + Gamyyy * Kyy + Gamzyy * Kyz)
-  gyzy = gyzy - (  Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz &
-                 + Gamxyy * Kxz + Gamyyy * Kyz + Gamzyy * Kzz)
-  gzzy = gzzy - (  Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz &
-                 + Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz)
-  gxxz = gxxz - (  Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz &
-                 + Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz)
-  gxyz = gxyz - (  Gamxyz * Kxx + Gamyyz * Kxy + Gamzyz * Kxz &
-                 + Gamxxz * Kxy + Gamyxz * Kyy + Gamzxz * Kyz)
-  gxzz = gxzz - (  Gamxzz * Kxx + Gamyzz * Kxy + Gamzzz * Kxz &
-                 + Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz)
-  gyyz = gyyz - (  Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz &
-                 + Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz)
-  gyzz = gyzz - (  Gamxzz * Kxy + Gamyzz * Kyy + Gamzzz * Kyz &
-                 + Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz)
-  gzzz = gzzz - (  Gamxzz * Kxz + Gamyzz * Kyz + Gamzzz * Kzz &
-                 + Gamxzz * Kxz + Gamyzz * Kyz + Gamzzz * Kzz)
-movx_Res = gupxx*gxxx + gupyy*gxyy + gupzz*gxzz &
-          +gupxy*gxyx + gupxz*gxzx + gupyz*gxzy &
-          +gupxy*gxxy + gupxz*gxxz + gupyz*gxyz
-movy_Res = gupxx*gxyx + gupyy*gyyy + gupzz*gyzz &
-          +gupxy*gyyx + gupxz*gyzx + gupyz*gyzy &
-          +gupxy*gxyy + gupxz*gxyz + gupyz*gyyz
-movz_Res = gupxx*gxzx + gupyy*gyzy + gupzz*gzzz &
-          +gupxy*gyzx + gupxz*gzzx + gupyz*gzzy &
-          +gupxy*gxzy + gupxz*gxzz + gupyz*gyzz
-
-  call fderivs(ex,trK,fx,fy,fz,X,Y,Z,SYM,SYM,SYM,Symmetry,0)
-
-movx_Res = movx_Res - fx - F8*PI*sx
-movy_Res = movy_Res - fy - F8*PI*sy
-movz_Res = movz_Res - fz - F8*PI*sz
-
-  return
-
-  end subroutine constraint_adm
-!-------------------------------------------------------------------------------!
-! computed constraint for ADM formalism for shell                              !
-!-------------------------------------------------------------------------------!
-  subroutine constraint_adm_ss(ex,crho,sigma,R, X, Y, Z,&
-               drhodx, drhody, drhodz,                                         &
-               dsigmadx,dsigmady,dsigmadz,                                     &
-               dRdx,dRdy,dRdz,                                                 &
-               drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz,                &
-               dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz,    &
-               dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz,                            &
-               dxx,gxy,gxz,dyy,gyz,dzz, &
-               Kxx,Kxy,Kxz,Kyy,Kyz,Kzz, &
-               Lap,Sfx,Sfy,Sfz,rho,Sx,Sy,Sz,&
-               Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
-               Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
-               Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
-               Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
-               ham_Res, movx_Res, movy_Res, movz_Res, &
-               Symmetry,Lev,sst)
-
-  implicit none
-!~~~~~~> Input parameters:
-
-  integer,intent(in ):: ex(1:3),symmetry,Lev,sst
-  double precision,intent(in),dimension(ex(1))::crho
-  double precision,intent(in),dimension(ex(2))::sigma
-  double precision,intent(in),dimension(ex(3))::R
-  real*8, intent(in ),dimension(ex(1),ex(2),ex(3)):: X,Y,Z
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::drhodx, drhody, drhodz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dsigmadx,dsigmady,dsigmadz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dRdx,dRdy,dRdz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Kxx,Kxy,Kxz,Kyy,Kyz,Kzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Lap,Sfx,Sfy,Sfz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: rho,Sx,Sy,Sz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
-! second kind of Christofel symble Gamma^i_jk respect to physical metric
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamxxx, Gamxxy, Gamxxz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamxyy, Gamxyz, Gamxzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamyxx, Gamyxy, Gamyxz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamyyy, Gamyyz, Gamyzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamzxx, Gamzxy, Gamzxz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamzyy, Gamzyz, Gamzzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: ham_Res, movx_Res, movy_Res, movz_Res
-!~~~~~~> Other variables:
-!  inverse metric
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
-! first order derivative of metric, @_k g_ij
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxx,gxyx,gxzx
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyx,gyzx,gzzx
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxy,gxyy,gxzy
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyy,gyzy,gzzy
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxz,gxyz,gxzz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyz,gyzz,gzzz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz,trK,fx,fy,fz
-
-  integer, parameter :: NO_SYMM = 0, EQUATORIAL = 1, OCTANT = 2
-  real*8, parameter :: ZERO = 0.D0, HALF = 0.5d0, ONE = 1.d0, TWO = 2.d0, FOUR = 4.d0
-  real*8, parameter :: F2o3 = 2.d0/3.d0, F8 = 8.d0, F16 = 1.6d1, SIX = 6.d0
-  real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
-  real*8            :: PI
-
-  call adm_ricci_gamma_ss(ex,crho,sigma,R,X, Y, Z,                      &
-               drhodx, drhody, drhodz,                                         &
-               dsigmadx,dsigmady,dsigmadz,                                     &
-               dRdx,dRdy,dRdz,                                                 &
-               drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz,                &
-               dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz,    &
-               dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz,                            &
-               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz,&
-               Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
-               Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
-               Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
-               Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
-               Symmetry,Lev,sst)
-
-  PI = dacos(-ONE)
-
-  gxx = dxx + ONE
-  gyy = dyy + ONE
-  gzz = dzz + ONE
-! invert metric
-  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
-           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
-  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
-  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
-  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
-  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
-  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
-  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
-
-  trK =          gupxx * Kxx + gupyy * Kyy + gupzz * Kzz &
-        + TWO * (gupxy * Kxy + gupxz * Kxz + gupyz * Kyz)
-
-! ham_Res = trR + K^2 - K_ij * K^ij - 16 * PI * rho
-  ham_Res =   gupxx * Rxx + gupyy * Ryy + gupzz * Rzz + &
-        TWO* ( gupxy * Rxy + gupxz * Rxz + gupyz * Ryz )
-
-  ham_Res = ham_Res + trK * trK -(&
-       gupxx * ( &
-       gupxx * Kxx * Kxx + gupyy * Kxy * Kxy + gupzz * Kxz * Kxz + &
-       TWO * (gupxy * Kxx * Kxy + gupxz * Kxx * Kxz + gupyz * Kxy * Kxz) ) + &
-       gupyy * ( &
-       gupxx * Kxy * Kxy + gupyy * Kyy * Kyy + gupzz * Kyz * Kyz + &
-       TWO * (gupxy * Kxy * Kyy + gupxz * Kxy * Kyz + gupyz * Kyy * Kyz) ) + &
-       gupzz * ( &
-       gupxx * Kxz * Kxz + gupyy * Kyz * Kyz + gupzz * Kzz * Kzz + &
-       TWO * (gupxy * Kxz * Kyz + gupxz * Kxz * Kzz + gupyz * Kyz * Kzz) ) + &
-       TWO * ( &
-       gupxy * ( &
-       gupxx * Kxx * Kxy + gupyy * Kxy * Kyy + gupzz * Kxz * Kyz + &
-       gupxy * (Kxx * Kyy + Kxy * Kxy) + &
-       gupxz * (Kxx * Kyz + Kxz * Kxy) + &
-       gupyz * (Kxy * Kyz + Kxz * Kyy) ) + &
-       gupxz * ( &
-       gupxx * Kxx * Kxz + gupyy * Kxy * Kyz + gupzz * Kxz * Kzz + &
-       gupxy * (Kxx * Kyz + Kxy * Kxz) + &
-       gupxz * (Kxx * Kzz + Kxz * Kxz) + &
-       gupyz * (Kxy * Kzz + Kxz * Kyz) ) + &
-       gupyz * ( &
-       gupxx * Kxy * Kxz + gupyy * Kyy * Kyz + gupzz * Kyz * Kzz + &
-       gupxy * (Kxy * Kyz + Kyy * Kxz) + &
-       gupxz * (Kxy * Kzz + Kyz * Kxz) + &
-       gupyz * (Kyy * Kzz + Kyz * Kyz) ) ))- F16 * PI * rho
-
-! mov_Res_j = gupkj*D_k K_ij - d_j trK - 8 PI s_j where D respect to physical metric
-! store D_i K_jk
-  call fderivs_shc(ex,Kxx,gxxx,gxxy,gxxz,crho,sigma,R, SYM, SYM,SYM,Symmetry,Lev,sst,          &
-                       drhodx, drhody, drhodz,                                                 &
-                       dsigmadx,dsigmady,dsigmadz,                                             &
-                       dRdx,dRdy,dRdz)
-  call fderivs_shc(ex,Kxy,gxyx,gxyy,gxyz,crho,sigma,R,ANTI,ANTI,SYM,Symmetry,Lev,sst,          &
-                       drhodx, drhody, drhodz,                                                 &
-                       dsigmadx,dsigmady,dsigmadz,                                             &
-                       dRdx,dRdy,dRdz)
-  call fderivs_shc(ex,Kxz,gxzx,gxzy,gxzz,crho,sigma,R,ANTI,SYM ,ANTI,Symmetry,Lev,sst,         &
-                       drhodx, drhody, drhodz,                                                 &
-                       dsigmadx,dsigmady,dsigmadz,                                             &
-                       dRdx,dRdy,dRdz)
-  call fderivs_shc(ex,Kyy,gyyx,gyyy,gyyz,crho,sigma,R, SYM, SYM,SYM,Symmetry,Lev,sst,          &
-                       drhodx, drhody, drhodz,                                                 &
-                       dsigmadx,dsigmady,dsigmadz,                                             &
-                       dRdx,dRdy,dRdz)
-  call fderivs_shc(ex,Kyz,gyzx,gyzy,gyzz,crho,sigma,R,SYM ,ANTI,ANTI,Symmetry,Lev,sst,         &
-                       drhodx, drhody, drhodz,                                                 &
-                       dsigmadx,dsigmady,dsigmadz,                                             &
-                       dRdx,dRdy,dRdz)
-  call fderivs_shc(ex,Kzz,gzzx,gzzy,gzzz,crho,sigma,R, SYM, SYM,SYM,Symmetry,Lev,sst,          &
-                       drhodx, drhody, drhodz,                                                 &
-                       dsigmadx,dsigmady,dsigmadz,                                             &
-                       dRdx,dRdy,dRdz)
-
-  gxxx = gxxx - (  Gamxxx * Kxx + Gamyxx * Kxy + Gamzxx * Kxz &
-                 + Gamxxx * Kxx + Gamyxx * Kxy + Gamzxx * Kxz)
-  gxyx = gxyx - (  Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz &
-                 + Gamxxx * Kxy + Gamyxx * Kyy + Gamzxx * Kyz)
-  gxzx = gxzx - (  Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz &
-                 + Gamxxx * Kxz + Gamyxx * Kyz + Gamzxx * Kzz)
-  gyyx = gyyx - (  Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz &
-                 + Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz)
-  gyzx = gyzx - (  Gamxxz * Kxy + Gamyxz * Kyy + Gamzxz * Kyz &
-                 + Gamxxy * Kxz + Gamyxy * Kyz + Gamzxy * Kzz)
-  gzzx = gzzx - (  Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz &
-                 + Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz)
-  gxxy = gxxy - (  Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz &
-                 + Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz)
-  gxyy = gxyy - (  Gamxyy * Kxx + Gamyyy * Kxy + Gamzyy * Kxz &
-                 + Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz)
-  gxzy = gxzy - (  Gamxyz * Kxx + Gamyyz * Kxy + Gamzyz * Kxz &
-                 + Gamxxy * Kxz + Gamyxy * Kyz + Gamzxy * Kzz)
-  gyyy = gyyy - (  Gamxyy * Kxy + Gamyyy * Kyy + Gamzyy * Kyz &
-                 + Gamxyy * Kxy + Gamyyy * Kyy + Gamzyy * Kyz)
-  gyzy = gyzy - (  Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz &
-                 + Gamxyy * Kxz + Gamyyy * Kyz + Gamzyy * Kzz)
-  gzzy = gzzy - (  Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz &
-                 + Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz)
-  gxxz = gxxz - (  Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz &
-                 + Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz)
-  gxyz = gxyz - (  Gamxyz * Kxx + Gamyyz * Kxy + Gamzyz * Kxz &
-                 + Gamxxz * Kxy + Gamyxz * Kyy + Gamzxz * Kyz)
-  gxzz = gxzz - (  Gamxzz * Kxx + Gamyzz * Kxy + Gamzzz * Kxz &
-                 + Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz)
-  gyyz = gyyz - (  Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz &
-                 + Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz)
-  gyzz = gyzz - (  Gamxzz * Kxy + Gamyzz * Kyy + Gamzzz * Kyz &
-                 + Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz)
-  gzzz = gzzz - (  Gamxzz * Kxz + Gamyzz * Kyz + Gamzzz * Kzz &
-                 + Gamxzz * Kxz + Gamyzz * Kyz + Gamzzz * Kzz)
-movx_Res = gupxx*gxxx + gupyy*gxyy + gupzz*gxzz &
-          +gupxy*gxyx + gupxz*gxzx + gupyz*gxzy &
-          +gupxy*gxxy + gupxz*gxxz + gupyz*gxyz
-movy_Res = gupxx*gxyx + gupyy*gyyy + gupzz*gyzz &
-          +gupxy*gyyx + gupxz*gyzx + gupyz*gyzy &
-          +gupxy*gxyy + gupxz*gxyz + gupyz*gyyz
-movz_Res = gupxx*gxzx + gupyy*gyzy + gupzz*gzzz &
-          +gupxy*gyzx + gupxz*gzzx + gupyz*gzzy &
-          +gupxy*gxzy + gupxz*gxzz + gupyz*gyzz
-
-  call fderivs_shc(ex,trK,fx,fy,fz,crho,sigma,R, SYM, SYM,SYM,Symmetry,Lev,sst,                &
-                       drhodx, drhody, drhodz,                                                 &
-                       dsigmadx,dsigmady,dsigmadz,                                             &
-                       dRdx,dRdy,dRdz)
-
-movx_Res = movx_Res - fx - F8*PI*sx
-movy_Res = movy_Res - fy - F8*PI*sy
-movz_Res = movz_Res - fz - F8*PI*sz
-
-  return
-
-  end subroutine constraint_adm_ss
+
+!-------------------------------------------------------------------------------!
+! computed constraint for ADM formalism                                         !
+!-------------------------------------------------------------------------------!
+  subroutine constraint_adm(ex, X, Y, Z,&
+               dxx,gxy,gxz,dyy,gyz,dzz, &
+               Kxx,Kxy,Kxz,Kyy,Kyz,Kzz, &
+               Lap,Sfx,Sfy,Sfz,rho,Sx,Sy,Sz,&
+               ham_Res, movx_Res, movy_Res, movz_Res, &
+               Symmetry)
+
+  implicit none
+!~~~~~~> Input parameters:
+
+  integer,intent(in ):: ex(1:3),symmetry
+  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Kxx,Kxy,Kxz,Kyy,Kyz,Kzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Lap,Sfx,Sfy,Sfz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: rho,Sx,Sy,Sz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: ham_Res, movx_Res, movy_Res, movz_Res
+!~~~~~~> Other variables:
+!  inverse metric
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
+! first order derivative of metric, @_k g_ij
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxx,gxyx,gxzx
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyx,gyzx,gzzx
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxy,gxyy,gxzy
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyy,gyzy,gzzy
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxz,gxyz,gxzz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyz,gyzz,gzzz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz,trK,fx,fy,fz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamxxx, Gamxxy, Gamxxz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamxyy, Gamxyz, Gamxzz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamyxx, Gamyxy, Gamyxz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamyyy, Gamyyz, Gamyzz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamzxx, Gamzxy, Gamzxz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: Gamzyy, Gamzyz, Gamzzz
+
+  integer, parameter :: NO_SYMM = 0, EQUATORIAL = 1, OCTANT = 2
+  real*8, parameter :: ZERO = 0.D0, HALF = 0.5d0, ONE = 1.d0, TWO = 2.d0, FOUR = 4.d0
+  real*8, parameter :: F2o3 = 2.d0/3.d0, F8 = 8.d0, F16 = 1.6d1, SIX = 6.d0
+  real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
+  real*8            :: PI
+
+  call adm_ricci_gamma(ex, X, Y, Z,                        &
+               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz,&
+               Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
+               Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
+               Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
+               Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
+               Symmetry)
+
+  PI = dacos(-ONE)
+
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE
+! invert metric
+  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
+           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
+  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
+  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
+  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
+  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
+  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
+  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
+
+  trK =          gupxx * Kxx + gupyy * Kyy + gupzz * Kzz &
+        + TWO * (gupxy * Kxy + gupxz * Kxz + gupyz * Kyz)
+
+! ham_Res = trR + K^2 - K_ij * K^ij - 16 * PI * rho
+  ham_Res =   gupxx * Rxx + gupyy * Ryy + gupzz * Rzz + &
+        TWO* ( gupxy * Rxy + gupxz * Rxz + gupyz * Ryz )
+
+  ham_Res = ham_Res + trK * trK -(&
+       gupxx * ( &
+       gupxx * Kxx * Kxx + gupyy * Kxy * Kxy + gupzz * Kxz * Kxz + &
+       TWO * (gupxy * Kxx * Kxy + gupxz * Kxx * Kxz + gupyz * Kxy * Kxz) ) + &
+       gupyy * ( &
+       gupxx * Kxy * Kxy + gupyy * Kyy * Kyy + gupzz * Kyz * Kyz + &
+       TWO * (gupxy * Kxy * Kyy + gupxz * Kxy * Kyz + gupyz * Kyy * Kyz) ) + &
+       gupzz * ( &
+       gupxx * Kxz * Kxz + gupyy * Kyz * Kyz + gupzz * Kzz * Kzz + &
+       TWO * (gupxy * Kxz * Kyz + gupxz * Kxz * Kzz + gupyz * Kyz * Kzz) ) + &
+       TWO * ( &
+       gupxy * ( &
+       gupxx * Kxx * Kxy + gupyy * Kxy * Kyy + gupzz * Kxz * Kyz + &
+       gupxy * (Kxx * Kyy + Kxy * Kxy) + &
+       gupxz * (Kxx * Kyz + Kxz * Kxy) + &
+       gupyz * (Kxy * Kyz + Kxz * Kyy) ) + &
+       gupxz * ( &
+       gupxx * Kxx * Kxz + gupyy * Kxy * Kyz + gupzz * Kxz * Kzz + &
+       gupxy * (Kxx * Kyz + Kxy * Kxz) + &
+       gupxz * (Kxx * Kzz + Kxz * Kxz) + &
+       gupyz * (Kxy * Kzz + Kxz * Kyz) ) + &
+       gupyz * ( &
+       gupxx * Kxy * Kxz + gupyy * Kyy * Kyz + gupzz * Kyz * Kzz + &
+       gupxy * (Kxy * Kyz + Kyy * Kxz) + &
+       gupxz * (Kxy * Kzz + Kyz * Kxz) + &
+       gupyz * (Kyy * Kzz + Kyz * Kyz) ) ))- F16 * PI * rho
+
+! mov_Res_j = gupkj*D_k K_ij - d_j trK - 8 PI s_j where D respect to physical metric
+! store D_i K_jk
+  call fderivs(ex,Kxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
+  call fderivs(ex,Kxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0)
+  call fderivs(ex,Kxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0)
+  call fderivs(ex,Kyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
+  call fderivs(ex,Kyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0)
+  call fderivs(ex,Kzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
+
+  gxxx = gxxx - (  Gamxxx * Kxx + Gamyxx * Kxy + Gamzxx * Kxz &
+                 + Gamxxx * Kxx + Gamyxx * Kxy + Gamzxx * Kxz)
+  gxyx = gxyx - (  Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz &
+                 + Gamxxx * Kxy + Gamyxx * Kyy + Gamzxx * Kyz)
+  gxzx = gxzx - (  Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz &
+                 + Gamxxx * Kxz + Gamyxx * Kyz + Gamzxx * Kzz)
+  gyyx = gyyx - (  Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz &
+                 + Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz)
+  gyzx = gyzx - (  Gamxxz * Kxy + Gamyxz * Kyy + Gamzxz * Kyz &
+                 + Gamxxy * Kxz + Gamyxy * Kyz + Gamzxy * Kzz)
+  gzzx = gzzx - (  Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz &
+                 + Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz)
+  gxxy = gxxy - (  Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz &
+                 + Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz)
+  gxyy = gxyy - (  Gamxyy * Kxx + Gamyyy * Kxy + Gamzyy * Kxz &
+                 + Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz)
+  gxzy = gxzy - (  Gamxyz * Kxx + Gamyyz * Kxy + Gamzyz * Kxz &
+                 + Gamxxy * Kxz + Gamyxy * Kyz + Gamzxy * Kzz)
+  gyyy = gyyy - (  Gamxyy * Kxy + Gamyyy * Kyy + Gamzyy * Kyz &
+                 + Gamxyy * Kxy + Gamyyy * Kyy + Gamzyy * Kyz)
+  gyzy = gyzy - (  Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz &
+                 + Gamxyy * Kxz + Gamyyy * Kyz + Gamzyy * Kzz)
+  gzzy = gzzy - (  Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz &
+                 + Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz)
+  gxxz = gxxz - (  Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz &
+                 + Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz)
+  gxyz = gxyz - (  Gamxyz * Kxx + Gamyyz * Kxy + Gamzyz * Kxz &
+                 + Gamxxz * Kxy + Gamyxz * Kyy + Gamzxz * Kyz)
+  gxzz = gxzz - (  Gamxzz * Kxx + Gamyzz * Kxy + Gamzzz * Kxz &
+                 + Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz)
+  gyyz = gyyz - (  Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz &
+                 + Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz)
+  gyzz = gyzz - (  Gamxzz * Kxy + Gamyzz * Kyy + Gamzzz * Kyz &
+                 + Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz)
+  gzzz = gzzz - (  Gamxzz * Kxz + Gamyzz * Kyz + Gamzzz * Kzz &
+                 + Gamxzz * Kxz + Gamyzz * Kyz + Gamzzz * Kzz)
+movx_Res = gupxx*gxxx + gupyy*gxyy + gupzz*gxzz &
+          +gupxy*gxyx + gupxz*gxzx + gupyz*gxzy &
+          +gupxy*gxxy + gupxz*gxxz + gupyz*gxyz
+movy_Res = gupxx*gxyx + gupyy*gyyy + gupzz*gyzz &
+          +gupxy*gyyx + gupxz*gyzx + gupyz*gyzy &
+          +gupxy*gxyy + gupxz*gxyz + gupyz*gyyz
+movz_Res = gupxx*gxzx + gupyy*gyzy + gupzz*gzzz &
+          +gupxy*gyzx + gupxz*gzzx + gupyz*gzzy &
+          +gupxy*gxzy + gupxz*gxzz + gupyz*gyzz
+
+  call fderivs(ex,trK,fx,fy,fz,X,Y,Z,SYM,SYM,SYM,Symmetry,0)
+
+movx_Res = movx_Res - fx - F8*PI*sx
+movy_Res = movy_Res - fy - F8*PI*sy
+movz_Res = movz_Res - fz - F8*PI*sz
+
+  return
+
+  end subroutine constraint_adm
+!-------------------------------------------------------------------------------!
+! computed constraint for ADM formalism for shell                              !
+!-------------------------------------------------------------------------------!
+  subroutine constraint_adm_ss(ex,crho,sigma,R, X, Y, Z,&
+               drhodx, drhody, drhodz,                                         &
+               dsigmadx,dsigmady,dsigmadz,                                     &
+               dRdx,dRdy,dRdz,                                                 &
+               drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz,                &
+               dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz,    &
+               dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz,                            &
+               dxx,gxy,gxz,dyy,gyz,dzz, &
+               Kxx,Kxy,Kxz,Kyy,Kyz,Kzz, &
+               Lap,Sfx,Sfy,Sfz,rho,Sx,Sy,Sz,&
+               Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
+               Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
+               Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
+               Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
+               ham_Res, movx_Res, movy_Res, movz_Res, &
+               Symmetry,Lev,sst)
+
+  implicit none
+!~~~~~~> Input parameters:
+
+  integer,intent(in ):: ex(1:3),symmetry,Lev,sst
+  double precision,intent(in),dimension(ex(1))::crho
+  double precision,intent(in),dimension(ex(2))::sigma
+  double precision,intent(in),dimension(ex(3))::R
+  real*8, intent(in ),dimension(ex(1),ex(2),ex(3)):: X,Y,Z
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::drhodx, drhody, drhodz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dsigmadx,dsigmady,dsigmadz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dRdx,dRdy,dRdz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Kxx,Kxy,Kxz,Kyy,Kyz,Kzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Lap,Sfx,Sfy,Sfz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: rho,Sx,Sy,Sz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
+! second kind of Christofel symble Gamma^i_jk respect to physical metric
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamxxx, Gamxxy, Gamxxz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamxyy, Gamxyz, Gamxzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamyxx, Gamyxy, Gamyxz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamyyy, Gamyyz, Gamyzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamzxx, Gamzxy, Gamzxz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamzyy, Gamzyz, Gamzzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: ham_Res, movx_Res, movy_Res, movz_Res
+!~~~~~~> Other variables:
+!  inverse metric
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
+! first order derivative of metric, @_k g_ij
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxx,gxyx,gxzx
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyx,gyzx,gzzx
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxy,gxyy,gxzy
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyy,gyzy,gzzy
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxxz,gxyz,gxzz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gyyz,gyzz,gzzz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz,trK,fx,fy,fz
+
+  integer, parameter :: NO_SYMM = 0, EQUATORIAL = 1, OCTANT = 2
+  real*8, parameter :: ZERO = 0.D0, HALF = 0.5d0, ONE = 1.d0, TWO = 2.d0, FOUR = 4.d0
+  real*8, parameter :: F2o3 = 2.d0/3.d0, F8 = 8.d0, F16 = 1.6d1, SIX = 6.d0
+  real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
+  real*8            :: PI
+
+  call adm_ricci_gamma_ss(ex,crho,sigma,R,X, Y, Z,                      &
+               drhodx, drhody, drhodz,                                         &
+               dsigmadx,dsigmady,dsigmadz,                                     &
+               dRdx,dRdy,dRdz,                                                 &
+               drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz,                &
+               dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz,    &
+               dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz,                            &
+               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz,&
+               Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
+               Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
+               Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
+               Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
+               Symmetry,Lev,sst)
+
+  PI = dacos(-ONE)
+
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE
+! invert metric
+  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
+           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
+  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
+  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
+  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
+  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
+  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
+  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
+
+  trK =          gupxx * Kxx + gupyy * Kyy + gupzz * Kzz &
+        + TWO * (gupxy * Kxy + gupxz * Kxz + gupyz * Kyz)
+
+! ham_Res = trR + K^2 - K_ij * K^ij - 16 * PI * rho
+  ham_Res =   gupxx * Rxx + gupyy * Ryy + gupzz * Rzz + &
+        TWO* ( gupxy * Rxy + gupxz * Rxz + gupyz * Ryz )
+
+  ham_Res = ham_Res + trK * trK -(&
+       gupxx * ( &
+       gupxx * Kxx * Kxx + gupyy * Kxy * Kxy + gupzz * Kxz * Kxz + &
+       TWO * (gupxy * Kxx * Kxy + gupxz * Kxx * Kxz + gupyz * Kxy * Kxz) ) + &
+       gupyy * ( &
+       gupxx * Kxy * Kxy + gupyy * Kyy * Kyy + gupzz * Kyz * Kyz + &
+       TWO * (gupxy * Kxy * Kyy + gupxz * Kxy * Kyz + gupyz * Kyy * Kyz) ) + &
+       gupzz * ( &
+       gupxx * Kxz * Kxz + gupyy * Kyz * Kyz + gupzz * Kzz * Kzz + &
+       TWO * (gupxy * Kxz * Kyz + gupxz * Kxz * Kzz + gupyz * Kyz * Kzz) ) + &
+       TWO * ( &
+       gupxy * ( &
+       gupxx * Kxx * Kxy + gupyy * Kxy * Kyy + gupzz * Kxz * Kyz + &
+       gupxy * (Kxx * Kyy + Kxy * Kxy) + &
+       gupxz * (Kxx * Kyz + Kxz * Kxy) + &
+       gupyz * (Kxy * Kyz + Kxz * Kyy) ) + &
+       gupxz * ( &
+       gupxx * Kxx * Kxz + gupyy * Kxy * Kyz + gupzz * Kxz * Kzz + &
+       gupxy * (Kxx * Kyz + Kxy * Kxz) + &
+       gupxz * (Kxx * Kzz + Kxz * Kxz) + &
+       gupyz * (Kxy * Kzz + Kxz * Kyz) ) + &
+       gupyz * ( &
+       gupxx * Kxy * Kxz + gupyy * Kyy * Kyz + gupzz * Kyz * Kzz + &
+       gupxy * (Kxy * Kyz + Kyy * Kxz) + &
+       gupxz * (Kxy * Kzz + Kyz * Kxz) + &
+       gupyz * (Kyy * Kzz + Kyz * Kyz) ) ))- F16 * PI * rho
+
+! mov_Res_j = gupkj*D_k K_ij - d_j trK - 8 PI s_j where D respect to physical metric
+! store D_i K_jk
+  call fderivs_shc(ex,Kxx,gxxx,gxxy,gxxz,crho,sigma,R, SYM, SYM,SYM,Symmetry,Lev,sst,          &
+                       drhodx, drhody, drhodz,                                                 &
+                       dsigmadx,dsigmady,dsigmadz,                                             &
+                       dRdx,dRdy,dRdz)
+  call fderivs_shc(ex,Kxy,gxyx,gxyy,gxyz,crho,sigma,R,ANTI,ANTI,SYM,Symmetry,Lev,sst,          &
+                       drhodx, drhody, drhodz,                                                 &
+                       dsigmadx,dsigmady,dsigmadz,                                             &
+                       dRdx,dRdy,dRdz)
+  call fderivs_shc(ex,Kxz,gxzx,gxzy,gxzz,crho,sigma,R,ANTI,SYM ,ANTI,Symmetry,Lev,sst,         &
+                       drhodx, drhody, drhodz,                                                 &
+                       dsigmadx,dsigmady,dsigmadz,                                             &
+                       dRdx,dRdy,dRdz)
+  call fderivs_shc(ex,Kyy,gyyx,gyyy,gyyz,crho,sigma,R, SYM, SYM,SYM,Symmetry,Lev,sst,          &
+                       drhodx, drhody, drhodz,                                                 &
+                       dsigmadx,dsigmady,dsigmadz,                                             &
+                       dRdx,dRdy,dRdz)
+  call fderivs_shc(ex,Kyz,gyzx,gyzy,gyzz,crho,sigma,R,SYM ,ANTI,ANTI,Symmetry,Lev,sst,         &
+                       drhodx, drhody, drhodz,                                                 &
+                       dsigmadx,dsigmady,dsigmadz,                                             &
+                       dRdx,dRdy,dRdz)
+  call fderivs_shc(ex,Kzz,gzzx,gzzy,gzzz,crho,sigma,R, SYM, SYM,SYM,Symmetry,Lev,sst,          &
+                       drhodx, drhody, drhodz,                                                 &
+                       dsigmadx,dsigmady,dsigmadz,                                             &
+                       dRdx,dRdy,dRdz)
+
+  gxxx = gxxx - (  Gamxxx * Kxx + Gamyxx * Kxy + Gamzxx * Kxz &
+                 + Gamxxx * Kxx + Gamyxx * Kxy + Gamzxx * Kxz)
+  gxyx = gxyx - (  Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz &
+                 + Gamxxx * Kxy + Gamyxx * Kyy + Gamzxx * Kyz)
+  gxzx = gxzx - (  Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz &
+                 + Gamxxx * Kxz + Gamyxx * Kyz + Gamzxx * Kzz)
+  gyyx = gyyx - (  Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz &
+                 + Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz)
+  gyzx = gyzx - (  Gamxxz * Kxy + Gamyxz * Kyy + Gamzxz * Kyz &
+                 + Gamxxy * Kxz + Gamyxy * Kyz + Gamzxy * Kzz)
+  gzzx = gzzx - (  Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz &
+                 + Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz)
+  gxxy = gxxy - (  Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz &
+                 + Gamxxy * Kxx + Gamyxy * Kxy + Gamzxy * Kxz)
+  gxyy = gxyy - (  Gamxyy * Kxx + Gamyyy * Kxy + Gamzyy * Kxz &
+                 + Gamxxy * Kxy + Gamyxy * Kyy + Gamzxy * Kyz)
+  gxzy = gxzy - (  Gamxyz * Kxx + Gamyyz * Kxy + Gamzyz * Kxz &
+                 + Gamxxy * Kxz + Gamyxy * Kyz + Gamzxy * Kzz)
+  gyyy = gyyy - (  Gamxyy * Kxy + Gamyyy * Kyy + Gamzyy * Kyz &
+                 + Gamxyy * Kxy + Gamyyy * Kyy + Gamzyy * Kyz)
+  gyzy = gyzy - (  Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz &
+                 + Gamxyy * Kxz + Gamyyy * Kyz + Gamzyy * Kzz)
+  gzzy = gzzy - (  Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz &
+                 + Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz)
+  gxxz = gxxz - (  Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz &
+                 + Gamxxz * Kxx + Gamyxz * Kxy + Gamzxz * Kxz)
+  gxyz = gxyz - (  Gamxyz * Kxx + Gamyyz * Kxy + Gamzyz * Kxz &
+                 + Gamxxz * Kxy + Gamyxz * Kyy + Gamzxz * Kyz)
+  gxzz = gxzz - (  Gamxzz * Kxx + Gamyzz * Kxy + Gamzzz * Kxz &
+                 + Gamxxz * Kxz + Gamyxz * Kyz + Gamzxz * Kzz)
+  gyyz = gyyz - (  Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz &
+                 + Gamxyz * Kxy + Gamyyz * Kyy + Gamzyz * Kyz)
+  gyzz = gyzz - (  Gamxzz * Kxy + Gamyzz * Kyy + Gamzzz * Kyz &
+                 + Gamxyz * Kxz + Gamyyz * Kyz + Gamzyz * Kzz)
+  gzzz = gzzz - (  Gamxzz * Kxz + Gamyzz * Kyz + Gamzzz * Kzz &
+                 + Gamxzz * Kxz + Gamyzz * Kyz + Gamzzz * Kzz)
+movx_Res = gupxx*gxxx + gupyy*gxyy + gupzz*gxzz &
+          +gupxy*gxyx + gupxz*gxzx + gupyz*gxzy &
+          +gupxy*gxxy + gupxz*gxxz + gupyz*gxyz
+movy_Res = gupxx*gxyx + gupyy*gyyy + gupzz*gyzz &
+          +gupxy*gyyx + gupxz*gyzx + gupyz*gyzy &
+          +gupxy*gxyy + gupxz*gxyz + gupyz*gyyz
+movz_Res = gupxx*gxzx + gupyy*gyzy + gupzz*gzzz &
+          +gupxy*gyzx + gupxz*gzzx + gupyz*gzzy &
+          +gupxy*gxzy + gupxz*gxzz + gupyz*gyzz
+
+  call fderivs_shc(ex,trK,fx,fy,fz,crho,sigma,R, SYM, SYM,SYM,Symmetry,Lev,sst,                &
+                       drhodx, drhody, drhodz,                                                 &
+                       dsigmadx,dsigmady,dsigmadz,                                             &
+                       dRdx,dRdy,dRdz)
+
+movx_Res = movx_Res - fx - F8*PI*sx
+movy_Res = movy_Res - fy - F8*PI*sy
+movz_Res = movz_Res - fz - F8*PI*sz
+
+  return
+
+  end subroutine constraint_adm_ss
--- a/AMSS_NCKU_source/BSSN/bssn2adm.f90
+++ b/AMSS_NCKU_source/BSSN/bssn2adm.f90
@@ -1,40 +1,40 @@
-
-!-------------------------------------------------------------------------------!
-! convert bssn variables to ADM variables                                       !
-!-------------------------------------------------------------------------------!
-  subroutine bssn2adm(ex,chi,trK, &
-               gxx,gxy,gxz,gyy,gyz,gzz, &
-               Axx,Axy,Axz,Ayy,Ayz,Azz, &
-               adm_gxx,adm_gxy,adm_gxz,adm_gyy,adm_gyz,adm_gzz, &
-               Kxx,Kxy,Kxz,Kyy,Kyz,Kzz)
-
-  implicit none
-!~~~~~~> Input parameters:
-
-  integer,intent(in ):: ex(1:3)
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::chi,trK
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::gxx,gxy,gxz,gyy,gyz,gzz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::Axx,Axy,Axz,Ayy,Ayz,Azz
-
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: adm_gxx,adm_gxy,adm_gxz,adm_gyy,adm_gyz,adm_gzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Kxx,Kxy,Kxz,Kyy,Kyz,Kzz
-
-  real*8, parameter :: F1o3=1.d0/3.d0
-
-  adm_gxx = gxx/chi
-  adm_gxy = gxy/chi
-  adm_gxz = gxz/chi
-  adm_gyy = gyy/chi
-  adm_gyz = gyz/chi
-  adm_gzz = gzz/chi
-
-  Kxx = Axx/chi+F1o3*trK*adm_gxx
-  Kxy = Axy/chi+F1o3*trK*adm_gxy
-  Kxz = Axz/chi+F1o3*trK*adm_gxz
-  Kyy = Ayy/chi+F1o3*trK*adm_gyy
-  Kyz = Ayz/chi+F1o3*trK*adm_gyz
-  Kzz = Azz/chi+F1o3*trK*adm_gzz
-
-  return
-
-  end subroutine bssn2adm
+
+!-------------------------------------------------------------------------------!
+! convert bssn variables to ADM variables                                       !
+!-------------------------------------------------------------------------------!
+  subroutine bssn2adm(ex,chi,trK, &
+               gxx,gxy,gxz,gyy,gyz,gzz, &
+               Axx,Axy,Axz,Ayy,Ayz,Azz, &
+               adm_gxx,adm_gxy,adm_gxz,adm_gyy,adm_gyz,adm_gzz, &
+               Kxx,Kxy,Kxz,Kyy,Kyz,Kzz)
+
+  implicit none
+!~~~~~~> Input parameters:
+
+  integer,intent(in ):: ex(1:3)
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::chi,trK
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::gxx,gxy,gxz,gyy,gyz,gzz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::Axx,Axy,Axz,Ayy,Ayz,Azz
+
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: adm_gxx,adm_gxy,adm_gxz,adm_gyy,adm_gyz,adm_gzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Kxx,Kxy,Kxz,Kyy,Kyz,Kzz
+
+  real*8, parameter :: F1o3=1.d0/3.d0
+
+  adm_gxx = gxx/chi
+  adm_gxy = gxy/chi
+  adm_gxz = gxz/chi
+  adm_gyy = gyy/chi
+  adm_gyz = gyz/chi
+  adm_gzz = gzz/chi
+
+  Kxx = Axx/chi+F1o3*trK*adm_gxx
+  Kxy = Axy/chi+F1o3*trK*adm_gxy
+  Kxz = Axz/chi+F1o3*trK*adm_gxz
+  Kyy = Ayy/chi+F1o3*trK*adm_gyy
+  Kyz = Ayz/chi+F1o3*trK*adm_gyz
+  Kzz = Azz/chi+F1o3*trK*adm_gzz
+
+  return
+
+  end subroutine bssn2adm
--- a/AMSS_NCKU_source/BSSN/bssnEM_class.C
+++ b/AMSS_NCKU_source/BSSN/bssnEM_class.C
--- a/AMSS_NCKU_source/BSSN/bssnEM_class.h
+++ b/AMSS_NCKU_source/BSSN/bssnEM_class.h
@@ -1,69 +1,69 @@
-
-#ifndef BSSNEM_CLASS_H
-#define BSSNEM_CLASS_H
-
-#ifdef newc
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <cstdlib>
-#include <string>
-#include <cmath>
-using namespace std;
-#else
-#include <iostream.h>
-#include <iomanip.h>
-#include <fstream.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#endif
-
-#include <mpi.h>
-
-#include "cgh.h"
-#include "ShellPatch.h"
-#include "misc.h"
-#include "var.h"
-#include "MyList.h"
-#include "monitor.h"
-#include "surface_integral.h"
-
-#include "macrodef.h"
-
-#ifdef USE_GPU
-#include "bssn_gpu_class.h"
-#else
-#include "bssn_class.h"
-#endif
-
-class bssnEM_class : public bssn_class
-{
-public:
-     bssnEM_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
-                  int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
-                  int a_levi, int maxli, int decni, double maxrexi, double drexi);
-     ~bssnEM_class();
-
-     void Initialize();
-     void Read_Ansorg();
-     void Setup_Initial_Data();
-     void Step(int lev, int YN);
-     void Compute_Phi2(int lev);
-     void AnalysisStuff_EM(int lev, double dT_lev);
-     void Interp_Constraint();
-
-protected:
-     var *Exo, *Eyo, *Ezo, *Bxo, *Byo, *Bzo, *Kpsio, *Kphio;
-     var *Ex0, *Ey0, *Ez0, *Bx0, *By0, *Bz0, *Kpsi0, *Kphi0;
-     var *Ex, *Ey, *Ez, *Bx, *By, *Bz, *Kpsi, *Kphi;
-     var *Ex1, *Ey1, *Ez1, *Bx1, *By1, *Bz1, *Kpsi1, *Kphi1;
-     var *Ex_rhs, *Ey_rhs, *Ez_rhs, *Bx_rhs, *By_rhs, *Bz_rhs, *Kpsi_rhs, *Kphi_rhs;
-     var *Jx, *Jy, *Jz, *qchar;
-     var *Rphi2, *Iphi2;
-     var *Rphi1, *Iphi1;
-
-     monitor *Phi2Monitor;
-     monitor *Phi1Monitor;
-};
-#endif /* BSSNEM_CLASS_H */
+
+#ifndef BSSNEM_CLASS_H
+#define BSSNEM_CLASS_H
+
+#ifdef newc
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <cstdlib>
+#include <string>
+#include <cmath>
+using namespace std;
+#else
+#include <iostream.h>
+#include <iomanip.h>
+#include <fstream.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#endif
+
+#include <mpi.h>
+
+#include "cgh.h"
+#include "ShellPatch.h"
+#include "misc.h"
+#include "var.h"
+#include "MyList.h"
+#include "monitor.h"
+#include "surface_integral.h"
+
+#include "macrodef.h"
+
+#ifdef USE_GPU
+#include "bssn_gpu_class.h"
+#else
+#include "bssn_class.h"
+#endif
+
+class bssnEM_class : public bssn_class
+{
+public:
+     bssnEM_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
+                  int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
+                  int a_levi, int maxli, int decni, double maxrexi, double drexi);
+     ~bssnEM_class();
+
+     void Initialize();
+     void Read_Ansorg();
+     void Setup_Initial_Data();
+     void Step(int lev, int YN);
+     void Compute_Phi2(int lev);
+     void AnalysisStuff_EM(int lev, double dT_lev);
+     void Interp_Constraint();
+
+protected:
+     var *Exo, *Eyo, *Ezo, *Bxo, *Byo, *Bzo, *Kpsio, *Kphio;
+     var *Ex0, *Ey0, *Ez0, *Bx0, *By0, *Bz0, *Kpsi0, *Kphi0;
+     var *Ex, *Ey, *Ez, *Bx, *By, *Bz, *Kpsi, *Kphi;
+     var *Ex1, *Ey1, *Ez1, *Bx1, *By1, *Bz1, *Kpsi1, *Kphi1;
+     var *Ex_rhs, *Ey_rhs, *Ez_rhs, *Bx_rhs, *By_rhs, *Bz_rhs, *Kpsi_rhs, *Kphi_rhs;
+     var *Jx, *Jy, *Jz, *qchar;
+     var *Rphi2, *Iphi2;
+     var *Rphi1, *Iphi1;
+
+     monitor *Phi2Monitor;
+     monitor *Phi1Monitor;
+};
+#endif /* BSSNEM_CLASS_H */
--- a/AMSS_NCKU_source/BSSN/bssn_class.C
+++ b/AMSS_NCKU_source/BSSN/bssn_class.C
--- a/AMSS_NCKU_source/BSSN/bssn_class.h
+++ b/AMSS_NCKU_source/BSSN/bssn_class.h
@@ -1,203 +1,206 @@
-
-#ifndef BSSN_CLASS_H
-#define BSSN_CLASS_H
-
-#ifdef newc
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <cstdlib>
-#include <string>
-#include <cmath>
-using namespace std;
-#else
-#include <iostream.h>
-#include <iomanip.h>
-#include <fstream.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#endif
-
-#include <mpi.h>
-
-#include "macrodef.h"
-#include "cgh.h"
-#include "ShellPatch.h"
-#include "misc.h"
-#include "var.h"
-#include "MyList.h"
-#include "monitor.h"
-#include "surface_integral.h"
-#include "checkpoint.h"
-
-extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
-
-class bssn_class
-{
-public:
-       int ngfs;
-       int nprocs, myrank;
-       cgh *GH;
-       ShellPatch *SH;
-       double PhysTime;
-
-       int checkrun;
-       char checkfilename[50];
-       int Steps;
-       double StartTime, TotalTime;
-       double AnasTime, DumpTime, d2DumpTime, CheckTime;
-       double LastAnas, LastConsOut;
-       double Courant;
-       double numepss, numepsb, numepsh;
-       int Symmetry;
-       int maxl, decn;
-       double maxrex, drex;
-       int trfls, a_lev;
-
-       double dT;
-       double chitiny;
-
-       double **Porg0, **Porgbr, **Porg, **Porg1, **Porg_rhs;
-       int BH_num, BH_num_input;
-       double *Mass, *Pmom, *Spin;
-       double ADMMass;
-
-       var *phio, *trKo;
-       var *gxxo, *gxyo, *gxzo, *gyyo, *gyzo, *gzzo;
-       var *Axxo, *Axyo, *Axzo, *Ayyo, *Ayzo, *Azzo;
-       var *Gmxo, *Gmyo, *Gmzo;
-       var *Lapo, *Sfxo, *Sfyo, *Sfzo;
-       var *dtSfxo, *dtSfyo, *dtSfzo;
-
-       var *phi0, *trK0;
-       var *gxx0, *gxy0, *gxz0, *gyy0, *gyz0, *gzz0;
-       var *Axx0, *Axy0, *Axz0, *Ayy0, *Ayz0, *Azz0;
-       var *Gmx0, *Gmy0, *Gmz0;
-       var *Lap0, *Sfx0, *Sfy0, *Sfz0;
-       var *dtSfx0, *dtSfy0, *dtSfz0;
-
-       var *phi, *trK;
-       var *gxx, *gxy, *gxz, *gyy, *gyz, *gzz;
-       var *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;
-       var *Gmx, *Gmy, *Gmz;
-       var *Lap, *Sfx, *Sfy, *Sfz;
-       var *dtSfx, *dtSfy, *dtSfz;
-
-       var *phi1, *trK1;
-       var *gxx1, *gxy1, *gxz1, *gyy1, *gyz1, *gzz1;
-       var *Axx1, *Axy1, *Axz1, *Ayy1, *Ayz1, *Azz1;
-       var *Gmx1, *Gmy1, *Gmz1;
-       var *Lap1, *Sfx1, *Sfy1, *Sfz1;
-       var *dtSfx1, *dtSfy1, *dtSfz1;
-
-       var *phi_rhs, *trK_rhs;
-       var *gxx_rhs, *gxy_rhs, *gxz_rhs, *gyy_rhs, *gyz_rhs, *gzz_rhs;
-       var *Axx_rhs, *Axy_rhs, *Axz_rhs, *Ayy_rhs, *Ayz_rhs, *Azz_rhs;
-       var *Gmx_rhs, *Gmy_rhs, *Gmz_rhs;
-       var *Lap_rhs, *Sfx_rhs, *Sfy_rhs, *Sfz_rhs;
-       var *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;
-
-       var *rho, *Sx, *Sy, *Sz, *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;
-
-       var *Gamxxx, *Gamxxy, *Gamxxz, *Gamxyy, *Gamxyz, *Gamxzz;
-       var *Gamyxx, *Gamyxy, *Gamyxz, *Gamyyy, *Gamyyz, *Gamyzz;
-       var *Gamzxx, *Gamzxy, *Gamzxz, *Gamzyy, *Gamzyz, *Gamzzz;
-
-       var *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz;
-
-       var *Rpsi4, *Ipsi4;
-       var *t1Rpsi4, *t1Ipsi4, *t2Rpsi4, *t2Ipsi4;
-
-       var *Cons_Ham, *Cons_Px, *Cons_Py, *Cons_Pz, *Cons_Gx, *Cons_Gy, *Cons_Gz;
-
-#ifdef Point_Psi4
-       var *phix, *phiy, *phiz;
-       var *trKx, *trKy, *trKz;
-       var *Axxx, *Axxy, *Axxz;
-       var *Axyx, *Axyy, *Axyz;
-       var *Axzx, *Axzy, *Axzz;
-       var *Ayyx, *Ayyy, *Ayyz;
-       var *Ayzx, *Ayzy, *Ayzz;
-       var *Azzx, *Azzy, *Azzz;
-#endif
-       // FIXME: uc = StateList, up = OldStateList, upp = SynchList_cor; so never touch these three data
-       MyList<var> *StateList, *SynchList_pre, *SynchList_cor, *RHSList;
-       MyList<var> *OldStateList, *DumpList;
-       MyList<var> *ConstraintList;
-
-       Parallel::SyncCache *sync_cache_pre;  // per-level cache for predictor sync
-       Parallel::SyncCache *sync_cache_cor;  // per-level cache for corrector sync
-       Parallel::SyncCache *sync_cache_rp_coarse;  // RestrictProlong sync on PatL[lev-1]
-       Parallel::SyncCache *sync_cache_rp_fine;    // RestrictProlong sync on PatL[lev]
-
-       monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
-       monitor *ConVMonitor;
-       surface_integral *Waveshell;
-       checkpoint *CheckPoint;
-
-public:
-       bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
-                  int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
-                  int a_levi, int maxli, int decni, double maxrexi, double drexi);
-       ~bssn_class();
-
-       void Evolve(int Steps);
-       void RecursiveStep(int lev);
-#if (PSTR == 3)
-       void RecursiveStep(int lev, int num);
-#endif
-#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
-       void ParallelStep();
-       void SHStep();
-#endif
-       void RestrictProlong(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
-       void RestrictProlong_aux(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
-       void RestrictProlong(int lev, int YN, bool BB);
-       void ProlongRestrict(int lev, int YN, bool BB);
-       void Setup_Black_Hole_position();
-       void compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int lev);
-       bool read_Pablo_file(int *ext, double *datain, char *filename);
-       void write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
-                             char *filename);
-       void AnalysisStuff(int lev, double dT_lev);
-       void Setup_KerrSchild();
-       void Enforce_algcon(int lev, int fg);
-
-       void testRestrict();
-       void testOutBd();
-       
-       bool check_Stdin_Abort(); 
-
-       virtual void Setup_Initial_Data_Cao();
-       virtual void Setup_Initial_Data_Lousto();
-       virtual void Initialize();
-       virtual void Read_Ansorg();
-       virtual void Read_Pablo() {};
-       virtual void Compute_Psi4(int lev);
-       virtual void Step(int lev, int YN);
-       virtual void Interp_Constraint(bool infg);
-       virtual void Constraint_Out();
-       virtual void Compute_Constraint();
-
-#ifdef With_AHF
-protected:
-       MyList<var> *AHList, *AHDList, *GaugeList;
-       int AHfindevery;
-       double AHdumptime;
-       int *lastahdumpid, HN_num; // number of possible horizons
-       int *findeveryl;
-       double *xc, *yc, *zc, *xr, *yr, *zr;
-       bool *trigger;
-       double *dTT;
-       int *dumpid;
-
-public:
-       void AH_Prepare_derivatives();
-       bool AH_Interp_Points(MyList<var> *VarList,
-                             int NN, double **XX,
-                             double *Shellf, int Symmetryi);
-       void AH_Step_Find(int lev, double dT_lev);
-#endif
-};
-#endif /* BSSN_CLASS_H */
+
+#ifndef BSSN_CLASS_H
+#define BSSN_CLASS_H
+
+#ifdef newc
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <cstdlib>
+#include <string>
+#include <cmath>
+using namespace std;
+#else
+#include <iostream.h>
+#include <iomanip.h>
+#include <fstream.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#endif
+
+#include <mpi.h>
+
+#include "macrodef.h"
+#include "cgh.h"
+#include "ShellPatch.h"
+#include "misc.h"
+#include "var.h"
+#include "MyList.h"
+#include "monitor.h"
+#include "surface_integral.h"
+#include "checkpoint.h"
+
+extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
+
+class bssn_class
+{
+public:
+       int ngfs;
+       int nprocs, myrank;
+       cgh *GH;
+       ShellPatch *SH;
+       double PhysTime;
+
+       int checkrun;
+       char checkfilename[50];
+       int Steps;
+       double StartTime, TotalTime;
+       double AnasTime, DumpTime, d2DumpTime, CheckTime;
+       double LastAnas, LastConsOut;
+       int *ConstraintRefreshLevels;
+       double Courant;
+       double numepss, numepsb, numepsh;
+       int Symmetry;
+       int maxl, decn;
+       double maxrex, drex;
+       int trfls, a_lev;
+
+       double dT;
+       double chitiny;
+
+       double **Porg0, **Porgbr, **Porg, **Porg1, **Porg_rhs;
+       int BH_num, BH_num_input;
+       double *Mass, *Pmom, *Spin;
+       double ADMMass;
+
+       var *phio, *trKo;
+       var *gxxo, *gxyo, *gxzo, *gyyo, *gyzo, *gzzo;
+       var *Axxo, *Axyo, *Axzo, *Ayyo, *Ayzo, *Azzo;
+       var *Gmxo, *Gmyo, *Gmzo;
+       var *Lapo, *Sfxo, *Sfyo, *Sfzo;
+       var *dtSfxo, *dtSfyo, *dtSfzo;
+
+       var *phi0, *trK0;
+       var *gxx0, *gxy0, *gxz0, *gyy0, *gyz0, *gzz0;
+       var *Axx0, *Axy0, *Axz0, *Ayy0, *Ayz0, *Azz0;
+       var *Gmx0, *Gmy0, *Gmz0;
+       var *Lap0, *Sfx0, *Sfy0, *Sfz0;
+       var *dtSfx0, *dtSfy0, *dtSfz0;
+
+       var *phi, *trK;
+       var *gxx, *gxy, *gxz, *gyy, *gyz, *gzz;
+       var *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;
+       var *Gmx, *Gmy, *Gmz;
+       var *Lap, *Sfx, *Sfy, *Sfz;
+       var *dtSfx, *dtSfy, *dtSfz;
+
+       var *phi1, *trK1;
+       var *gxx1, *gxy1, *gxz1, *gyy1, *gyz1, *gzz1;
+       var *Axx1, *Axy1, *Axz1, *Ayy1, *Ayz1, *Azz1;
+       var *Gmx1, *Gmy1, *Gmz1;
+       var *Lap1, *Sfx1, *Sfy1, *Sfz1;
+       var *dtSfx1, *dtSfy1, *dtSfz1;
+
+       var *phi_rhs, *trK_rhs;
+       var *gxx_rhs, *gxy_rhs, *gxz_rhs, *gyy_rhs, *gyz_rhs, *gzz_rhs;
+       var *Axx_rhs, *Axy_rhs, *Axz_rhs, *Ayy_rhs, *Ayz_rhs, *Azz_rhs;
+       var *Gmx_rhs, *Gmy_rhs, *Gmz_rhs;
+       var *Lap_rhs, *Sfx_rhs, *Sfy_rhs, *Sfz_rhs;
+       var *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;
+
+       var *rho, *Sx, *Sy, *Sz, *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;
+
+       var *Gamxxx, *Gamxxy, *Gamxxz, *Gamxyy, *Gamxyz, *Gamxzz;
+       var *Gamyxx, *Gamyxy, *Gamyxz, *Gamyyy, *Gamyyz, *Gamyzz;
+       var *Gamzxx, *Gamzxy, *Gamzxz, *Gamzyy, *Gamzyz, *Gamzzz;
+
+       var *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz;
+
+       var *Rpsi4, *Ipsi4;
+       var *t1Rpsi4, *t1Ipsi4, *t2Rpsi4, *t2Ipsi4;
+
+       var *Cons_Ham, *Cons_Px, *Cons_Py, *Cons_Pz, *Cons_Gx, *Cons_Gy, *Cons_Gz;
+
+#ifdef Point_Psi4
+       var *phix, *phiy, *phiz;
+       var *trKx, *trKy, *trKz;
+       var *Axxx, *Axxy, *Axxz;
+       var *Axyx, *Axyy, *Axyz;
+       var *Axzx, *Axzy, *Axzz;
+       var *Ayyx, *Ayyy, *Ayyz;
+       var *Ayzx, *Ayzy, *Ayzz;
+       var *Azzx, *Azzy, *Azzz;
+#endif
+       // FIXME: uc = StateList, up = OldStateList, upp = SynchList_cor; so never touch these three data
+       MyList<var> *StateList, *SynchList_pre, *SynchList_cor, *RHSList;
+       MyList<var> *OldStateList, *DumpList;
+       MyList<var> *ConstraintList;
+
+       Parallel::SyncCache *sync_cache_pre;  // per-level cache for predictor sync
+       Parallel::SyncCache *sync_cache_cor;  // per-level cache for corrector sync
+       Parallel::SyncCache *sync_cache_rp_coarse;  // RestrictProlong sync on PatL[lev-1]
+       Parallel::SyncCache *sync_cache_rp_fine;    // RestrictProlong sync on PatL[lev]
+       Parallel::SyncCache *sync_cache_restrict;   // cached Restrict in RestrictProlong
+       Parallel::SyncCache *sync_cache_outbd;      // cached OutBdLow2Hi in RestrictProlong
+
+       monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
+       monitor *ConVMonitor, *TimingMonitor;
+       surface_integral *Waveshell;
+       checkpoint *CheckPoint;
+
+public:
+       bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
+                  int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
+                  int a_levi, int maxli, int decni, double maxrexi, double drexi);
+       ~bssn_class();
+
+       void Evolve(int Steps);
+       void RecursiveStep(int lev);
+#if (PSTR == 3)
+       void RecursiveStep(int lev, int num);
+#endif
+#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
+       void ParallelStep();
+       void SHStep();
+#endif
+       void RestrictProlong(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
+       void RestrictProlong_aux(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
+       void RestrictProlong(int lev, int YN, bool BB);
+       void ProlongRestrict(int lev, int YN, bool BB);
+       void Setup_Black_Hole_position();
+       void compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int lev);
+       bool read_Pablo_file(int *ext, double *datain, char *filename);
+       void write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
+                             char *filename);
+       void AnalysisStuff(int lev, double dT_lev);
+       void Setup_KerrSchild();
+       void Enforce_algcon(int lev, int fg);
+
+       void testRestrict();
+       void testOutBd();
+       
+       bool check_Stdin_Abort(); 
+
+       virtual void Setup_Initial_Data_Cao();
+       virtual void Setup_Initial_Data_Lousto();
+       virtual void Initialize();
+       virtual void Read_Ansorg();
+       virtual void Read_Pablo() {};
+       virtual void Compute_Psi4(int lev);
+       virtual void Step(int lev, int YN);
+       virtual void Interp_Constraint(bool infg);
+       virtual void Constraint_Out();
+       virtual void Compute_Constraint();
+
+#ifdef With_AHF
+protected:
+       MyList<var> *AHList, *AHDList, *GaugeList;
+       int AHfindevery;
+       double AHdumptime;
+       int *lastahdumpid, HN_num; // number of possible horizons
+       int *findeveryl;
+       double *xc, *yc, *zc, *xr, *yr, *zr;
+       bool *trigger;
+       double *dTT;
+       int *dumpid;
+
+public:
+       void AH_Prepare_derivatives();
+       bool AH_Interp_Points(MyList<var> *VarList,
+                             int NN, double **XX,
+                             double *Shellf, int Symmetryi);
+       void AH_Step_Find(int lev, double dT_lev);
+#endif
+};
+#endif /* BSSN_CLASS_H */
--- a/AMSS_NCKU_source/BSSN/bssn_constraint.f90
+++ b/AMSS_NCKU_source/BSSN/bssn_constraint.f90
--- a/AMSS_NCKU_source/BSSN/bssn_rhs.f90
+++ b/AMSS_NCKU_source/BSSN/bssn_rhs.f90
--- a/AMSS_NCKU_source/BSSN/bssn_rhs.h
+++ b/AMSS_NCKU_source/BSSN/bssn_rhs.h
@@ -1,231 +1,244 @@
-
-#ifndef BSSN_H
-#define BSSN_H
-
-#ifdef fortran1
-#define f_compute_rhs_bssn compute_rhs_bssn
-#define f_compute_rhs_bssn_ss compute_rhs_bssn_ss
-#define f_compute_rhs_bssn_escalar compute_rhs_bssn_escalar
-#define f_compute_rhs_bssn_escalar_ss compute_rhs_bssn_escalar_ss
-#define f_compute_rhs_Z4c compute_rhs_z4c
-#define f_compute_rhs_Z4cnot compute_rhs_z4cnot
-#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss
-#define f_compute_constraint_fr compute_constraint_fr
-#endif
-#ifdef fortran2
-#define f_compute_rhs_bssn COMPUTE_RHS_BSSN
-#define f_compute_rhs_bssn_ss COMPUTE_RHS_BSSN_SS
-#define f_compute_rhs_bssn_escalar COMPUTE_RHS_BSSN_ESCALAR
-#define f_compute_rhs_bssn_escalar_ss COMPUTE_RHS_BSSN_ESCALAR_SS
-#define f_compute_rhs_Z4c COMPUTE_RHS_Z4C
-#define f_compute_rhs_Z4cnot COMPUTE_RHS_Z4CNOT
-#define f_compute_rhs_Z4c_ss COMPUTE_RHS_Z4C_SS
-#define f_compute_constraint_fr COMPUTE_CONSTRAINT_FR
-#endif
-#ifdef fortran3
-#define f_compute_rhs_bssn compute_rhs_bssn_
-#define f_compute_rhs_bssn_ss compute_rhs_bssn_ss_
-#define f_compute_rhs_bssn_escalar compute_rhs_bssn_escalar_
-#define f_compute_rhs_bssn_escalar_ss compute_rhs_bssn_escalar_ss_
-#define f_compute_rhs_Z4c compute_rhs_z4c_
-#define f_compute_rhs_Z4cnot compute_rhs_z4cnot_
-#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
-#define f_compute_constraint_fr compute_constraint_fr_
-#endif
-extern "C"
-{
-        int f_compute_rhs_bssn(int *, double &, double *, double *, double *,                                                      // ex,T,X,Y,Z
-                               double *, double *,                                                                                 // chi, trK
-                               double *, double *, double *, double *, double *, double *,                                         // gij
-                               double *, double *, double *, double *, double *, double *,                                         // Aij
-                               double *, double *, double *,                                                                       // Gam
-                               double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                               double *, double *,                                                                                 // chi, trK
-                               double *, double *, double *, double *, double *, double *,                                         // gij
-                               double *, double *, double *, double *, double *, double *,                                         // Aij
-                               double *, double *, double *,                                                                       // Gam
-                               double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                               double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
-                               double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                               double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                               double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                               double *, double *, double *, double *, double *, double *,                                         // Ricci
-                               double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
-                               int &, int &, double &, int &);
-}
-
-extern "C"
-{
-        int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *,                                                      // ex,T,rho,sigma,R
-                                  double *, double *, double *,                                                                       // X,Y,Z
-                                  double *, double *, double *,                                                                       // drhodx,drhody,drhodz
-                                  double *, double *, double *,                                                                       // dsigmadx,dsigmady,dsigmadz
-                                  double *, double *, double *,                                                                       // dRdx,dRdy,dRdz
-                                  double *, double *, double *, double *, double *, double *,                                         // drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
-                                  double *, double *, double *, double *, double *, double *,                                         // dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
-                                  double *, double *, double *, double *, double *, double *,                                         // dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
-                                  double *, double *,                                                                                 // chi, trK
-                                  double *, double *, double *, double *, double *, double *,                                         // gij
-                                  double *, double *, double *, double *, double *, double *,                                         // Aij
-                                  double *, double *, double *,                                                                       // Gam
-                                  double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                                  double *, double *,                                                                                 // chi, trK
-                                  double *, double *, double *, double *, double *, double *,                                         // gij
-                                  double *, double *, double *, double *, double *, double *,                                         // Aij
-                                  double *, double *, double *,                                                                       // Gam
-                                  double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                                  double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
-                                  double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                  double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                  double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                  double *, double *, double *, double *, double *, double *,                                         // Ricci
-                                  double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
-                                  int &, int &, double &, int &, int &);
-}
-
-extern "C"
-{
-        int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *,                                                      // ex,T,X,Y,Z
-                                       double *, double *,                                                                                 // chi, trK
-                                       double *, double *, double *, double *, double *, double *,                                         // gij
-                                       double *, double *, double *, double *, double *, double *,                                         // Aij
-                                       double *, double *, double *,                                                                       // Gam
-                                       double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                                       double *, double *,                                                                                 // Sphi, Spi
-                                       double *, double *,                                                                                 // chi, trK
-                                       double *, double *, double *, double *, double *, double *,                                         // gij
-                                       double *, double *, double *, double *, double *, double *,                                         // Aij
-                                       double *, double *, double *,                                                                       // Gam
-                                       double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                                       double *, double *,                                                                                 // Sphi, Spi
-                                       double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
-                                       double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                       double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                       double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                       double *, double *, double *, double *, double *, double *,                                         // Ricci
-                                       double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
-                                       int &, int &, double &, int &);
-}
-
-extern "C"
-{
-        int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *,                                                      // ex,T,rho,sigma,R
-                                          double *, double *, double *,                                                                       // X,Y,Z
-                                          double *, double *, double *,                                                                       // drhodx,drhody,drhodz
-                                          double *, double *, double *,                                                                       // dsigmadx,dsigmady,dsigmadz
-                                          double *, double *, double *,                                                                       // dRdx,dRdy,dRdz
-                                          double *, double *, double *, double *, double *, double *,                                         // drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
-                                          double *, double *, double *, double *, double *, double *,                                         // dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
-                                          double *, double *, double *, double *, double *, double *,                                         // dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
-                                          double *, double *,                                                                                 // chi, trK
-                                          double *, double *, double *, double *, double *, double *,                                         // gij
-                                          double *, double *, double *, double *, double *, double *,                                         // Aij
-                                          double *, double *, double *,                                                                       // Gam
-                                          double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                                          double *, double *,                                                                                 // Sphi,Spi
-                                          double *, double *,                                                                                 // chi, trK
-                                          double *, double *, double *, double *, double *, double *,                                         // gij
-                                          double *, double *, double *, double *, double *, double *,                                         // Aij
-                                          double *, double *, double *,                                                                       // Gam
-                                          double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                                          double *, double *,                                                                                 // Sphi,Spi
-                                          double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
-                                          double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                          double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                          double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                          double *, double *, double *, double *, double *, double *,                                         // Ricci
-                                          double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
-                                          int &, int &, double &, int &, int &);
-}
-
-extern "C"
-{
-        int f_compute_rhs_Z4c(int *, double &, double *, double *, double *,                        // ex,T,X,Y,Z
-                              double *, double *,                                                   // chi, trK
-                              double *, double *, double *, double *, double *, double *,           // gij
-                              double *, double *, double *, double *, double *, double *,           // Aij
-                              double *, double *, double *,                                         // Gam
-                              double *, double *, double *, double *, double *, double *, double *, // Gauge
-                              double *,                                                             // Z4
-                              double *, double *,                                                   // chi, trK
-                              double *, double *, double *, double *, double *, double *,           // gij
-                              double *, double *, double *, double *, double *, double *,           // Aij
-                              double *, double *, double *,                                         // Gam
-                              double *, double *, double *, double *, double *, double *, double *, // Gauge
-                              double *,                                                             // Z4
-                              double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
-                              double *, double *, double *, double *, double *, double *,
-                              double *, double *, double *, double *, double *, double *,
-                              double *, double *, double *, double *, double *, double *,
-                              double *, double *, double *, double *, double *, double *,
-                              double *, double *, double *, double *, double *, double *, double *,
-                              int &, int &, double &, int &);
-}
-
-extern "C"
-{
-        int f_compute_rhs_Z4c_ss(int *, double &, double *, double *, double *,                                                      // ex,T,rho,sigma,R
-                                 double *, double *, double *,                                                                       // X,Y,Z
-                                 double *, double *, double *,                                                                       // drhodx,drhody,drhodz
-                                 double *, double *, double *,                                                                       // dsigmadx,dsigmady,dsigmadz
-                                 double *, double *, double *,                                                                       // dRdx,dRdy,dRdz
-                                 double *, double *, double *, double *, double *, double *,                                         // drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
-                                 double *, double *, double *, double *, double *, double *,                                         // dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
-                                 double *, double *, double *, double *, double *, double *,                                         // dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
-                                 double *, double *,                                                                                 // chi, trK
-                                 double *, double *, double *, double *, double *, double *,                                         // gij
-                                 double *, double *, double *, double *, double *, double *,                                         // Aij
-                                 double *, double *, double *,                                                                       // Gam
-                                 double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                                 double *,                                                                                           // TZ
-                                 double *, double *,                                                                                 // chi, trK
-                                 double *, double *, double *, double *, double *, double *,                                         // gij
-                                 double *, double *, double *, double *, double *, double *,                                         // Aij
-                                 double *, double *, double *,                                                                       // Gam
-                                 double *, double *, double *, double *, double *, double *, double *,                               // Gauge
-                                 double *,                                                                                           // TZ
-                                 double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
-                                 double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                 double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                 double *, double *, double *, double *, double *, double *,                                         // Christoffel
-                                 double *, double *, double *, double *, double *, double *,                                         // Ricci
-                                 double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
-                                 int &, int &, double &, int &, int &);
-}
-
-extern "C"
-{
-        int f_compute_rhs_Z4cnot(int *, double &, double *, double *, double *,                        // ex,T,X,Y,Z
-                                 double *, double *,                                                   // chi, trK
-                                 double *, double *, double *, double *, double *, double *,           // gij
-                                 double *, double *, double *, double *, double *, double *,           // Aij
-                                 double *, double *, double *,                                         // Gam
-                                 double *, double *, double *, double *, double *, double *, double *, // Gauge
-                                 double *,                                                             // Z4
-                                 double *, double *,                                                   // chi, trK
-                                 double *, double *, double *, double *, double *, double *,           // gij
-                                 double *, double *, double *, double *, double *, double *,           // Aij
-                                 double *, double *, double *,                                         // Gam
-                                 double *, double *, double *, double *, double *, double *, double *, // Gauge
-                                 double *,                                                             // Z4
-                                 double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
-                                 double *, double *, double *, double *, double *, double *,
-                                 double *, double *, double *, double *, double *, double *,
-                                 double *, double *, double *, double *, double *, double *,
-                                 double *, double *, double *, double *, double *, double *,
-                                 double *, double *, double *, double *, double *, double *, double *,
-                                 int &, int &, double &, int &, double &);
-}
-
-extern "C"
-{
-        void f_compute_constraint_fr(int *, double *, double *, double *,                        // ex,X,Y,Z
-                                     double *, double *, double *, double *,                     // chi, trK,rho,Sphi
-                                     double *, double *, double *, double *, double *, double *, // gij
-                                     double *, double *, double *, double *, double *, double *, // Aij
-                                     double *, double *, double *, double *, double *, double *, // Rij
-                                     double *, double *, double *, double *, double *, double *, // Sij
-                                     double *);
-} // FR_cons
-
-#endif /* BSSN_H */
+
+#ifndef BSSN_H
+#define BSSN_H
+
+#ifdef fortran1
+#define f_compute_rhs_bssn compute_rhs_bssn
+#define f_compute_rhs_bssn_ss compute_rhs_bssn_ss
+#define f_compute_rhs_bssn_escalar compute_rhs_bssn_escalar
+#define f_compute_rhs_bssn_escalar_ss compute_rhs_bssn_escalar_ss
+#define f_compute_rhs_Z4c compute_rhs_z4c
+#define f_compute_rhs_Z4cnot compute_rhs_z4cnot
+#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss
+#define f_compute_constraint_fr compute_constraint_fr
+#endif
+#ifdef fortran2
+#define f_compute_rhs_bssn COMPUTE_RHS_BSSN
+#define f_compute_rhs_bssn_ss COMPUTE_RHS_BSSN_SS
+#define f_compute_rhs_bssn_escalar COMPUTE_RHS_BSSN_ESCALAR
+#define f_compute_rhs_bssn_escalar_ss COMPUTE_RHS_BSSN_ESCALAR_SS
+#define f_compute_rhs_Z4c COMPUTE_RHS_Z4C
+#define f_compute_rhs_Z4cnot COMPUTE_RHS_Z4CNOT
+#define f_compute_rhs_Z4c_ss COMPUTE_RHS_Z4C_SS
+#define f_compute_constraint_fr COMPUTE_CONSTRAINT_FR
+#endif
+#ifdef fortran3
+#define f_compute_rhs_bssn compute_rhs_bssn_
+#define f_compute_rhs_bssn_ss compute_rhs_bssn_ss_
+#define f_compute_rhs_bssn_escalar compute_rhs_bssn_escalar_
+#define f_compute_rhs_bssn_escalar_ss compute_rhs_bssn_escalar_ss_
+#define f_compute_rhs_Z4c compute_rhs_z4c_
+#define f_compute_rhs_Z4cnot compute_rhs_z4cnot_
+#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
+#define f_compute_constraint_fr compute_constraint_fr_
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+        void f_bssn_rhs_kernel_timing_reset();
+        int f_bssn_rhs_kernel_timing_bucket_count();
+        const double *f_bssn_rhs_kernel_timing_local_seconds();
+        const char *f_bssn_rhs_kernel_timing_label(int);
+#ifdef __cplusplus
+}
+#endif
+
+extern "C"
+{
+        int f_compute_rhs_bssn(int *, double &, double *, double *, double *,                                                      // ex,T,X,Y,Z
+                               double *, double *,                                                                                 // chi, trK
+                               double *, double *, double *, double *, double *, double *,                                         // gij
+                               double *, double *, double *, double *, double *, double *,                                         // Aij
+                               double *, double *, double *,                                                                       // Gam
+                               double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                               double *, double *,                                                                                 // chi, trK
+                               double *, double *, double *, double *, double *, double *,                                         // gij
+                               double *, double *, double *, double *, double *, double *,                                         // Aij
+                               double *, double *, double *,                                                                       // Gam
+                               double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                               double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
+                               double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                               double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                               double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                               double *, double *, double *, double *, double *, double *,                                         // Ricci
+                               double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
+                               int &, int &, double &, int &);
+}
+
+extern "C"
+{
+        int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *,                                                      // ex,T,rho,sigma,R
+                                  double *, double *, double *,                                                                       // X,Y,Z
+                                  double *, double *, double *,                                                                       // drhodx,drhody,drhodz
+                                  double *, double *, double *,                                                                       // dsigmadx,dsigmady,dsigmadz
+                                  double *, double *, double *,                                                                       // dRdx,dRdy,dRdz
+                                  double *, double *, double *, double *, double *, double *,                                         // drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
+                                  double *, double *, double *, double *, double *, double *,                                         // dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
+                                  double *, double *, double *, double *, double *, double *,                                         // dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
+                                  double *, double *,                                                                                 // chi, trK
+                                  double *, double *, double *, double *, double *, double *,                                         // gij
+                                  double *, double *, double *, double *, double *, double *,                                         // Aij
+                                  double *, double *, double *,                                                                       // Gam
+                                  double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                                  double *, double *,                                                                                 // chi, trK
+                                  double *, double *, double *, double *, double *, double *,                                         // gij
+                                  double *, double *, double *, double *, double *, double *,                                         // Aij
+                                  double *, double *, double *,                                                                       // Gam
+                                  double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                                  double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
+                                  double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                  double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                  double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                  double *, double *, double *, double *, double *, double *,                                         // Ricci
+                                  double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
+                                  int &, int &, double &, int &, int &);
+}
+
+extern "C"
+{
+        int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *,                                                      // ex,T,X,Y,Z
+                                       double *, double *,                                                                                 // chi, trK
+                                       double *, double *, double *, double *, double *, double *,                                         // gij
+                                       double *, double *, double *, double *, double *, double *,                                         // Aij
+                                       double *, double *, double *,                                                                       // Gam
+                                       double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                                       double *, double *,                                                                                 // Sphi, Spi
+                                       double *, double *,                                                                                 // chi, trK
+                                       double *, double *, double *, double *, double *, double *,                                         // gij
+                                       double *, double *, double *, double *, double *, double *,                                         // Aij
+                                       double *, double *, double *,                                                                       // Gam
+                                       double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                                       double *, double *,                                                                                 // Sphi, Spi
+                                       double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
+                                       double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                       double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                       double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                       double *, double *, double *, double *, double *, double *,                                         // Ricci
+                                       double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
+                                       int &, int &, double &, int &);
+}
+
+extern "C"
+{
+        int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *,                                                      // ex,T,rho,sigma,R
+                                          double *, double *, double *,                                                                       // X,Y,Z
+                                          double *, double *, double *,                                                                       // drhodx,drhody,drhodz
+                                          double *, double *, double *,                                                                       // dsigmadx,dsigmady,dsigmadz
+                                          double *, double *, double *,                                                                       // dRdx,dRdy,dRdz
+                                          double *, double *, double *, double *, double *, double *,                                         // drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
+                                          double *, double *, double *, double *, double *, double *,                                         // dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
+                                          double *, double *, double *, double *, double *, double *,                                         // dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
+                                          double *, double *,                                                                                 // chi, trK
+                                          double *, double *, double *, double *, double *, double *,                                         // gij
+                                          double *, double *, double *, double *, double *, double *,                                         // Aij
+                                          double *, double *, double *,                                                                       // Gam
+                                          double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                                          double *, double *,                                                                                 // Sphi,Spi
+                                          double *, double *,                                                                                 // chi, trK
+                                          double *, double *, double *, double *, double *, double *,                                         // gij
+                                          double *, double *, double *, double *, double *, double *,                                         // Aij
+                                          double *, double *, double *,                                                                       // Gam
+                                          double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                                          double *, double *,                                                                                 // Sphi,Spi
+                                          double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
+                                          double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                          double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                          double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                          double *, double *, double *, double *, double *, double *,                                         // Ricci
+                                          double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
+                                          int &, int &, double &, int &, int &);
+}
+
+extern "C"
+{
+        int f_compute_rhs_Z4c(int *, double &, double *, double *, double *,                        // ex,T,X,Y,Z
+                              double *, double *,                                                   // chi, trK
+                              double *, double *, double *, double *, double *, double *,           // gij
+                              double *, double *, double *, double *, double *, double *,           // Aij
+                              double *, double *, double *,                                         // Gam
+                              double *, double *, double *, double *, double *, double *, double *, // Gauge
+                              double *,                                                             // Z4
+                              double *, double *,                                                   // chi, trK
+                              double *, double *, double *, double *, double *, double *,           // gij
+                              double *, double *, double *, double *, double *, double *,           // Aij
+                              double *, double *, double *,                                         // Gam
+                              double *, double *, double *, double *, double *, double *, double *, // Gauge
+                              double *,                                                             // Z4
+                              double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
+                              double *, double *, double *, double *, double *, double *,
+                              double *, double *, double *, double *, double *, double *,
+                              double *, double *, double *, double *, double *, double *,
+                              double *, double *, double *, double *, double *, double *,
+                              double *, double *, double *, double *, double *, double *, double *,
+                              int &, int &, double &, int &);
+}
+
+extern "C"
+{
+        int f_compute_rhs_Z4c_ss(int *, double &, double *, double *, double *,                                                      // ex,T,rho,sigma,R
+                                 double *, double *, double *,                                                                       // X,Y,Z
+                                 double *, double *, double *,                                                                       // drhodx,drhody,drhodz
+                                 double *, double *, double *,                                                                       // dsigmadx,dsigmady,dsigmadz
+                                 double *, double *, double *,                                                                       // dRdx,dRdy,dRdz
+                                 double *, double *, double *, double *, double *, double *,                                         // drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
+                                 double *, double *, double *, double *, double *, double *,                                         // dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
+                                 double *, double *, double *, double *, double *, double *,                                         // dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
+                                 double *, double *,                                                                                 // chi, trK
+                                 double *, double *, double *, double *, double *, double *,                                         // gij
+                                 double *, double *, double *, double *, double *, double *,                                         // Aij
+                                 double *, double *, double *,                                                                       // Gam
+                                 double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                                 double *,                                                                                           // TZ
+                                 double *, double *,                                                                                 // chi, trK
+                                 double *, double *, double *, double *, double *, double *,                                         // gij
+                                 double *, double *, double *, double *, double *, double *,                                         // Aij
+                                 double *, double *, double *,                                                                       // Gam
+                                 double *, double *, double *, double *, double *, double *, double *,                               // Gauge
+                                 double *,                                                                                           // TZ
+                                 double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
+                                 double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                 double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                 double *, double *, double *, double *, double *, double *,                                         // Christoffel
+                                 double *, double *, double *, double *, double *, double *,                                         // Ricci
+                                 double *, double *, double *, double *, double *, double *, double *,                               // constraint violation
+                                 int &, int &, double &, int &, int &);
+}
+
+extern "C"
+{
+        int f_compute_rhs_Z4cnot(int *, double &, double *, double *, double *,                        // ex,T,X,Y,Z
+                                 double *, double *,                                                   // chi, trK
+                                 double *, double *, double *, double *, double *, double *,           // gij
+                                 double *, double *, double *, double *, double *, double *,           // Aij
+                                 double *, double *, double *,                                         // Gam
+                                 double *, double *, double *, double *, double *, double *, double *, // Gauge
+                                 double *,                                                             // Z4
+                                 double *, double *,                                                   // chi, trK
+                                 double *, double *, double *, double *, double *, double *,           // gij
+                                 double *, double *, double *, double *, double *, double *,           // Aij
+                                 double *, double *, double *,                                         // Gam
+                                 double *, double *, double *, double *, double *, double *, double *, // Gauge
+                                 double *,                                                             // Z4
+                                 double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
+                                 double *, double *, double *, double *, double *, double *,
+                                 double *, double *, double *, double *, double *, double *,
+                                 double *, double *, double *, double *, double *, double *,
+                                 double *, double *, double *, double *, double *, double *,
+                                 double *, double *, double *, double *, double *, double *, double *,
+                                 int &, int &, double &, int &, double &);
+}
+
+extern "C"
+{
+        void f_compute_constraint_fr(int *, double *, double *, double *,                        // ex,X,Y,Z
+                                     double *, double *, double *, double *,                     // chi, trK,rho,Sphi
+                                     double *, double *, double *, double *, double *, double *, // gij
+                                     double *, double *, double *, double *, double *, double *, // Aij
+                                     double *, double *, double *, double *, double *, double *, // Rij
+                                     double *, double *, double *, double *, double *, double *, // Sij
+                                     double *);
+} // FR_cons
+
+#endif /* BSSN_H */
--- a/AMSS_NCKU_source/BSSN/bssn_rhs_c.C
+++ b/AMSS_NCKU_source/BSSN/bssn_rhs_c.C
--- a/AMSS_NCKU_source/BSSN/bssn_rhs_ss.f90
+++ b/AMSS_NCKU_source/BSSN/bssn_rhs_ss.f90
--- a/AMSS_NCKU_source/BSSN/empart.f90
+++ b/AMSS_NCKU_source/BSSN/empart.f90
--- a/AMSS_NCKU_source/BSSN/empart.h
+++ b/AMSS_NCKU_source/BSSN/empart.h
@@ -1,45 +1,45 @@
-
-#ifndef EMPART_H
-#define EMPART_H
-
-#ifdef fortran1
-#define f_compute_rhs_empart compute_rhs_empart
-#define f_compute_rhs_empart_ss compute_rhs_empart_ss
-#endif
-#ifdef fortran2
-#define f_compute_rhs_empart COMPUTE_RHS_EMPART
-#define f_compute_rhs_empart_ss COMPUTE_RHS_EMPART_SS
-#endif
-#ifdef fortran3
-#define f_compute_rhs_empart compute_rhs_empart_
-#define f_compute_rhs_empart_ss compute_rhs_empart_ss_
-#endif
-
-extern "C"
-{
-    int f_compute_rhs_empart(int *, double *, double *, double *,
-                             double *, double *, double *, double *, double *, double *, double *,
-                             double *, double *, double *, double *, double *,
-                             double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
-                             double *, double *, double *, double *, double *, double *, double *, double *,
-                             double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
-                             int &, int &, double &);
-}
-
-extern "C"
-{
-    int f_compute_rhs_empart_ss(int *, double *, double *, double *, double *, double *, double *,
-                                double *, double *, double *,
-                                double *, double *, double *,
-                                double *, double *, double *,
-                                double *, double *, double *, double *, double *, double *,
-                                double *, double *, double *, double *, double *, double *,
-                                double *, double *, double *, double *, double *, double *,
-                                double *, double *, double *, double *, double *, double *, double *,
-                                double *, double *, double *, double *, double *,
-                                double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
-                                double *, double *, double *, double *, double *, double *, double *, double *,
-                                double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
-                                int &, int &, double &, int &);
-}
-#endif /* EMPART_H */
+
+#ifndef EMPART_H
+#define EMPART_H
+
+#ifdef fortran1
+#define f_compute_rhs_empart compute_rhs_empart
+#define f_compute_rhs_empart_ss compute_rhs_empart_ss
+#endif
+#ifdef fortran2
+#define f_compute_rhs_empart COMPUTE_RHS_EMPART
+#define f_compute_rhs_empart_ss COMPUTE_RHS_EMPART_SS
+#endif
+#ifdef fortran3
+#define f_compute_rhs_empart compute_rhs_empart_
+#define f_compute_rhs_empart_ss compute_rhs_empart_ss_
+#endif
+
+extern "C"
+{
+    int f_compute_rhs_empart(int *, double *, double *, double *,
+                             double *, double *, double *, double *, double *, double *, double *,
+                             double *, double *, double *, double *, double *,
+                             double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
+                             double *, double *, double *, double *, double *, double *, double *, double *,
+                             double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
+                             int &, int &, double &);
+}
+
+extern "C"
+{
+    int f_compute_rhs_empart_ss(int *, double *, double *, double *, double *, double *, double *,
+                                double *, double *, double *,
+                                double *, double *, double *,
+                                double *, double *, double *,
+                                double *, double *, double *, double *, double *, double *,
+                                double *, double *, double *, double *, double *, double *,
+                                double *, double *, double *, double *, double *, double *,
+                                double *, double *, double *, double *, double *, double *, double *,
+                                double *, double *, double *, double *, double *,
+                                double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
+                                double *, double *, double *, double *, double *, double *, double *, double *,
+                                double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
+                                int &, int &, double &, int &);
+}
+#endif /* EMPART_H */
--- a/AMSS_NCKU_source/BSSN/enforce_algebra.f90
+++ b/AMSS_NCKU_source/BSSN/enforce_algebra.f90
@@ -1,230 +1,230 @@
-
-!-----------------------------------------------------------------------------
-!
-! remove the trace of Aij
-! trace-free Aij and enforce the determinant of bssn metric to one
-!-----------------------------------------------------------------------------
-
-  subroutine enforce_ag(ex,  dxx,  gxy,  gxz,  dyy,  gyz,  dzz, &
-                             Axx,  Axy,  Axz,  Ayy,  Ayz,  Azz)
-  implicit none
-
-!~~~~~~> Input parameters:
-
-  integer,                              intent(in)    :: ex(1:3)
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
-
-!~~~~~~~> Local variable:
-
-  integer :: i,j,k
-  real*8 :: lgxx,lgyy,lgzz,ldetg
-  real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
-  real*8 :: ltrA,lscale
-  real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
-
-!~~~~~~>
-
-  do k=1,ex(3)
-  do j=1,ex(2)
-  do i=1,ex(1)
-
-    lgxx = dxx(i,j,k) + ONE
-    lgyy = dyy(i,j,k) + ONE
-    lgzz = dzz(i,j,k) + ONE
-
-    ldetg =  lgxx * lgyy * lgzz &
-           + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) &
-           + gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) &
-           - gxz(i,j,k) * lgyy * gxz(i,j,k) &
-           - gxy(i,j,k) * gxy(i,j,k) * lgzz &
-           - lgxx * gyz(i,j,k) * gyz(i,j,k)
-
-    lgupxx =   ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg
-    lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg
-    lgupxz =   ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg
-    lgupyy =   ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg
-    lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg
-    lgupzz =   ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg
-
-    ltrA =         lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
-                 + lgupzz * Azz(i,j,k) &
-         + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
-                 + lgupyz * Ayz(i,j,k))
-
-    Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
-    Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA
-    Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA
-    Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
-    Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA
-    Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
-
-    lscale = ONE / ( ldetg ** F1o3 )
-
-    dxx(i,j,k) = lgxx * lscale - ONE
-    gxy(i,j,k) = gxy(i,j,k) * lscale
-    gxz(i,j,k) = gxz(i,j,k) * lscale
-    dyy(i,j,k) = lgyy * lscale - ONE
-    gyz(i,j,k) = gyz(i,j,k) * lscale
-    dzz(i,j,k) = lgzz * lscale - ONE
-
-  enddo
-  enddo
-  enddo
-
-  return
-
-  end subroutine enforce_ag
-#if 1 
-!----------------------------------------------------------------------------------  
-! swap the turn of a and g
-!----------------------------------------------------------------------------------
-  subroutine enforce_ga(ex,  dxx,  gxy,  gxz,  dyy,  gyz,  dzz, &
-                             Axx,  Axy,  Axz,  Ayy,  Ayz,  Azz)
-  implicit none
-
-!~~~~~~> Input parameters:
-
-  integer,                              intent(in)    :: ex(1:3)
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
-
-!~~~~~~~> Local variable:
-
-  integer :: i,j,k
-  real*8 :: lgxx,lgyy,lgzz,lscale
-  real*8 :: lgxy,lgxz,lgyz
-  real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
-  real*8 :: ltrA
-  real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
-
-!~~~~~~>
-
-  do k=1,ex(3)
-  do j=1,ex(2)
-  do i=1,ex(1)
-
-! for g: normalize determinant first
-    lgxx = dxx(i,j,k) + ONE
-    lgyy = dyy(i,j,k) + ONE
-    lgzz = dzz(i,j,k) + ONE
-    lgxy = gxy(i,j,k)
-    lgxz = gxz(i,j,k)
-    lgyz = gyz(i,j,k)
-
-    lscale =  lgxx * lgyy * lgzz + lgxy * lgyz * lgxz &
-            + lgxz * lgxy * lgyz - lgxz * lgyy * lgxz &
-            - lgxy * lgxy * lgzz - lgxx * lgyz * lgyz
-
-    lscale = ONE / ( lscale ** F1o3 )
-
-    lgxx = lgxx * lscale
-    lgxy = lgxy * lscale
-    lgxz = lgxz * lscale
-    lgyy = lgyy * lscale
-    lgyz = lgyz * lscale
-    lgzz = lgzz * lscale
-
-    dxx(i,j,k) = lgxx - ONE
-    gxy(i,j,k) = lgxy
-    gxz(i,j,k) = lgxz
-    dyy(i,j,k) = lgyy - ONE
-    gyz(i,j,k) = lgyz
-    dzz(i,j,k) = lgzz - ONE
-
-! for A: trace-free using normalized metric (det=1, no division needed)
-    lgupxx =   ( lgyy * lgzz - lgyz * lgyz )
-    lgupxy = - ( lgxy * lgzz - lgyz * lgxz )
-    lgupxz =   ( lgxy * lgyz - lgyy * lgxz )
-    lgupyy =   ( lgxx * lgzz - lgxz * lgxz )
-    lgupyz = - ( lgxx * lgyz - lgxy * lgxz )
-    lgupzz =   ( lgxx * lgyy - lgxy * lgxy )
-
-    ltrA =         lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
-                 + lgupzz * Azz(i,j,k) &
-         + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
-                 + lgupyz * Ayz(i,j,k))
-
-    Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
-    Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA
-    Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA
-    Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
-    Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA
-    Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
-
-  enddo
-  enddo
-  enddo
-
-  return
-
-  end subroutine enforce_ga
-#else
-!----------------------------------------------------------------------------------  
-! duplicate bam
-!----------------------------------------------------------------------------------
-  subroutine enforce_ga(ex,  dxx,  gxy,  gxz,  dyy,  gyz,  dzz, &
-                             Axx,  Axy,  Axz,  Ayy,  Ayz,  Azz)
-  implicit none
-
-!~~~~~~> Input parameters:
-
-  integer,                              intent(in)    :: ex(1:3)
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
-
-!~~~~~~~> Local variable:
-  
-  real*8, dimension(ex(1),ex(2),ex(3)) :: trA
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz 
-  real*8, dimension(ex(1),ex(2),ex(3)) :: aux,detginv
-  real*8, parameter :: oot = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
-
-!~~~~~~>
-
-  gxx = dxx + ONE
-  gyy = dyy + ONE
-  gzz = dzz + ONE
-! for g
-aux = (2.d0*gxy*gxz*gyz + gxx*gyy*gzz &
-    - gzz*gxy**2 - gyy*gxz**2 - gxx*gyz**2)**(-oot)
-
-  gxx = gxx * aux
-  gxy = gxy * aux
-  gxz = gxz * aux
-  gyy = gyy * aux
-  gyz = gyz * aux
-  gzz = gzz * aux
-
-  dxx = gxx - ONE
-  dyy = gyy - ONE
-  dzz = gzz - ONE
-! for A  
-
-detginv = 1/(2.d0*gxy*gxz*gyz + gxx*gyy*gzz &
-    - gzz*gxy**2 - gyy*gxz**2 - gxx*gyz**2)
-
-trA = detginv*(-2.d0*Ayz*gxx*gyz + Axx*gyy*gzz + &
-    gxx*(Azz*gyy + Ayy*gzz) + 2.d0*(gxz*(Ayz*gxy - Axz*gyy + &
-    Axy*gyz) + gxy*(Axz*gyz - Axy*gzz)) - Azz*gxy**2 - Ayy*gxz**2 - &
-    Axx*gyz**2)
-
-aux = -(oot*trA)
-
-  Axx = Axx + aux * gxx
-  Axy = Axy + aux * gxy
-  Axz = Axz + aux * gxz
-  Ayy = Ayy + aux * gyy
-  Ayz = Ayz + aux * gyz
-  Azz = Azz + aux * gzz
-
-  return
-
-  end subroutine enforce_ga
-#endif
+
+!-----------------------------------------------------------------------------
+!
+! remove the trace of Aij
+! trace-free Aij and enforce the determinant of bssn metric to one
+!-----------------------------------------------------------------------------
+
+  subroutine enforce_ag(ex,  dxx,  gxy,  gxz,  dyy,  gyz,  dzz, &
+                             Axx,  Axy,  Axz,  Ayy,  Ayz,  Azz)
+  implicit none
+
+!~~~~~~> Input parameters:
+
+  integer,                              intent(in)    :: ex(1:3)
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
+
+!~~~~~~~> Local variable:
+
+  integer :: i,j,k
+  real*8 :: lgxx,lgyy,lgzz,ldetg
+  real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
+  real*8 :: ltrA,lscale
+  real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
+
+!~~~~~~>
+
+  do k=1,ex(3)
+  do j=1,ex(2)
+  do i=1,ex(1)
+
+    lgxx = dxx(i,j,k) + ONE
+    lgyy = dyy(i,j,k) + ONE
+    lgzz = dzz(i,j,k) + ONE
+
+    ldetg =  lgxx * lgyy * lgzz &
+           + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) &
+           + gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) &
+           - gxz(i,j,k) * lgyy * gxz(i,j,k) &
+           - gxy(i,j,k) * gxy(i,j,k) * lgzz &
+           - lgxx * gyz(i,j,k) * gyz(i,j,k)
+
+    lgupxx =   ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg
+    lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg
+    lgupxz =   ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg
+    lgupyy =   ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg
+    lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg
+    lgupzz =   ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg
+
+    ltrA =         lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
+                 + lgupzz * Azz(i,j,k) &
+         + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
+                 + lgupyz * Ayz(i,j,k))
+
+    Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
+    Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA
+    Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA
+    Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
+    Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA
+    Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
+
+    lscale = ONE / ( ldetg ** F1o3 )
+
+    dxx(i,j,k) = lgxx * lscale - ONE
+    gxy(i,j,k) = gxy(i,j,k) * lscale
+    gxz(i,j,k) = gxz(i,j,k) * lscale
+    dyy(i,j,k) = lgyy * lscale - ONE
+    gyz(i,j,k) = gyz(i,j,k) * lscale
+    dzz(i,j,k) = lgzz * lscale - ONE
+
+  enddo
+  enddo
+  enddo
+
+  return
+
+  end subroutine enforce_ag
+#if 1 
+!----------------------------------------------------------------------------------  
+! swap the turn of a and g
+!----------------------------------------------------------------------------------
+  subroutine enforce_ga(ex,  dxx,  gxy,  gxz,  dyy,  gyz,  dzz, &
+                             Axx,  Axy,  Axz,  Ayy,  Ayz,  Azz)
+  implicit none
+
+!~~~~~~> Input parameters:
+
+  integer,                              intent(in)    :: ex(1:3)
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
+
+!~~~~~~~> Local variable:
+
+  integer :: i,j,k
+  real*8 :: lgxx,lgyy,lgzz,lscale
+  real*8 :: lgxy,lgxz,lgyz
+  real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
+  real*8 :: ltrA
+  real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
+
+!~~~~~~>
+
+  do k=1,ex(3)
+  do j=1,ex(2)
+  do i=1,ex(1)
+
+! for g: normalize determinant first
+    lgxx = dxx(i,j,k) + ONE
+    lgyy = dyy(i,j,k) + ONE
+    lgzz = dzz(i,j,k) + ONE
+    lgxy = gxy(i,j,k)
+    lgxz = gxz(i,j,k)
+    lgyz = gyz(i,j,k)
+
+    lscale =  lgxx * lgyy * lgzz + lgxy * lgyz * lgxz &
+            + lgxz * lgxy * lgyz - lgxz * lgyy * lgxz &
+            - lgxy * lgxy * lgzz - lgxx * lgyz * lgyz
+
+    lscale = ONE / ( lscale ** F1o3 )
+
+    lgxx = lgxx * lscale
+    lgxy = lgxy * lscale
+    lgxz = lgxz * lscale
+    lgyy = lgyy * lscale
+    lgyz = lgyz * lscale
+    lgzz = lgzz * lscale
+
+    dxx(i,j,k) = lgxx - ONE
+    gxy(i,j,k) = lgxy
+    gxz(i,j,k) = lgxz
+    dyy(i,j,k) = lgyy - ONE
+    gyz(i,j,k) = lgyz
+    dzz(i,j,k) = lgzz - ONE
+
+! for A: trace-free using normalized metric (det=1, no division needed)
+    lgupxx =   ( lgyy * lgzz - lgyz * lgyz )
+    lgupxy = - ( lgxy * lgzz - lgyz * lgxz )
+    lgupxz =   ( lgxy * lgyz - lgyy * lgxz )
+    lgupyy =   ( lgxx * lgzz - lgxz * lgxz )
+    lgupyz = - ( lgxx * lgyz - lgxy * lgxz )
+    lgupzz =   ( lgxx * lgyy - lgxy * lgxy )
+
+    ltrA =         lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
+                 + lgupzz * Azz(i,j,k) &
+         + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
+                 + lgupyz * Ayz(i,j,k))
+
+    Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
+    Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA
+    Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA
+    Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
+    Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA
+    Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
+
+  enddo
+  enddo
+  enddo
+
+  return
+
+  end subroutine enforce_ga
+#else
+!----------------------------------------------------------------------------------  
+! duplicate bam
+!----------------------------------------------------------------------------------
+  subroutine enforce_ga(ex,  dxx,  gxy,  gxz,  dyy,  gyz,  dzz, &
+                             Axx,  Axy,  Axz,  Ayy,  Ayz,  Azz)
+  implicit none
+
+!~~~~~~> Input parameters:
+
+  integer,                              intent(in)    :: ex(1:3)
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
+
+!~~~~~~~> Local variable:
+  
+  real*8, dimension(ex(1),ex(2),ex(3)) :: trA
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz 
+  real*8, dimension(ex(1),ex(2),ex(3)) :: aux,detginv
+  real*8, parameter :: oot = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
+
+!~~~~~~>
+
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE
+! for g
+aux = (2.d0*gxy*gxz*gyz + gxx*gyy*gzz &
+    - gzz*gxy**2 - gyy*gxz**2 - gxx*gyz**2)**(-oot)
+
+  gxx = gxx * aux
+  gxy = gxy * aux
+  gxz = gxz * aux
+  gyy = gyy * aux
+  gyz = gyz * aux
+  gzz = gzz * aux
+
+  dxx = gxx - ONE
+  dyy = gyy - ONE
+  dzz = gzz - ONE
+! for A  
+
+detginv = 1/(2.d0*gxy*gxz*gyz + gxx*gyy*gzz &
+    - gzz*gxy**2 - gyy*gxz**2 - gxx*gyz**2)
+
+trA = detginv*(-2.d0*Ayz*gxx*gyz + Axx*gyy*gzz + &
+    gxx*(Azz*gyy + Ayy*gzz) + 2.d0*(gxz*(Ayz*gxy - Axz*gyy + &
+    Axy*gyz) + gxy*(Axz*gyz - Axy*gzz)) - Azz*gxy**2 - Ayy*gxz**2 - &
+    Axx*gyz**2)
+
+aux = -(oot*trA)
+
+  Axx = Axx + aux * gxx
+  Axy = Axy + aux * gxy
+  Axz = Axz + aux * gxz
+  Ayy = Ayy + aux * gyy
+  Ayz = Ayz + aux * gyz
+  Azz = Azz + aux * gzz
+
+  return
+
+  end subroutine enforce_ga
+#endif
--- a/AMSS_NCKU_source/BSSN/enforce_algebra.h
+++ b/AMSS_NCKU_source/BSSN/enforce_algebra.h
@@ -1,30 +1,30 @@
-
-#ifndef ENFORCE_ALGEBRA_H
-#define ENFORCE_ALGEBRA_H
-
-#ifdef fortran1
-#define f_enforce_ag enforce_ag
-#define f_enforce_ga enforce_ga
-#endif
-#ifdef fortran2
-#define f_enforce_ag ENFORCE_AG
-#define f_enforce_ga ENFORCE_GA
-#endif
-#ifdef fortran3
-#define f_enforce_ag enforce_ag_
-#define f_enforce_ga enforce_ga_
-#endif
-
-extern "C"
-{
-	void f_enforce_ag(int *,
-					  double *, double *, double *, double *, double *, double *,
-					  double *, double *, double *, double *, double *, double *);
-}
-extern "C"
-{
-	void f_enforce_ga(int *,
-					  double *, double *, double *, double *, double *, double *,
-					  double *, double *, double *, double *, double *, double *);
-}
-#endif /* ENFORCE_ALGEBRA_H */
+
+#ifndef ENFORCE_ALGEBRA_H
+#define ENFORCE_ALGEBRA_H
+
+#ifdef fortran1
+#define f_enforce_ag enforce_ag
+#define f_enforce_ga enforce_ga
+#endif
+#ifdef fortran2
+#define f_enforce_ag ENFORCE_AG
+#define f_enforce_ga ENFORCE_GA
+#endif
+#ifdef fortran3
+#define f_enforce_ag enforce_ag_
+#define f_enforce_ga enforce_ga_
+#endif
+
+extern "C"
+{
+	void f_enforce_ag(int *,
+					  double *, double *, double *, double *, double *, double *,
+					  double *, double *, double *, double *, double *, double *);
+}
+extern "C"
+{
+	void f_enforce_ga(int *,
+					  double *, double *, double *, double *, double *, double *,
+					  double *, double *, double *, double *, double *, double *);
+}
+#endif /* ENFORCE_ALGEBRA_H */
--- a/AMSS_NCKU_source/BSSN/fadmquantites_bssn.f90
+++ b/AMSS_NCKU_source/BSSN/fadmquantites_bssn.f90
@@ -1,245 +1,245 @@
-
-!-----------------------------------------------------------------------------
-! ADM quantites for surface intergral
-!-----------------------------------------------------------------------------
-  subroutine admmass_bssn(ex, X, Y, Z,                            &
-               chi    ,   trK, &
-               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz    , &
-               Axx    ,   Axy    ,   Axz    ,   Ayy    ,   Ayz    ,   Azz    , &
-               Gamx   ,  Gamy   ,  Gamz   ,  &
-               massx,massy,massz, symmetry)
-
-  implicit none
- !~~~~~~= Input parameters:
- 
-  integer,intent(in) :: ex(1:3),symmetry
-  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: chi,trK
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Gamx,Gamy,Gamz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: massx,massy,massz
-! local variables
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
-!  inverse metric
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
-!  partial derivative of chi, chi_i
-  real*8, dimension(ex(1),ex(2),ex(3)) :: chix,chiy,chiz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: f
-  real*8 :: PI, F1o2pi
-  real*8, parameter :: ONE = 1.d0, F1o8 = 1.d0/8.d0
-  real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
-  real*8            :: dX, dY, dZ
-
-  dX = X(2) - X(1)
-  dY = Y(2) - Y(1)
-  dZ = Z(2) - Z(1)
-
-       PI = dacos( - ONE )
-  F1o2pi = ONE / ( 2.d0 * PI )
-
-  gxx = dxx + ONE
-  gyy = dyy + ONE
-  gzz = dzz + ONE
-
-  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
-           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
-  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
-  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
-  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
-  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
-  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
-  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
-
-  call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,0)
-
-  f=1/4.d0/(chi+ONE)**1.25d0
-! mass_i = (Gami/8 + gupij*phi_j/(4*chi^1.25))/(2*Pi)
-  massx = (F1o8*Gamx + f*(gupxx*chix+gupxy*chiy+gupxz*chiz))*F1o2pi
-  massy = (F1o8*Gamy + f*(gupxy*chix+gupyy*chiy+gupyz*chiz))*F1o2pi
-  massz = (F1o8*Gamz + f*(gupxz*chix+gupyz*chiy+gupzz*chiz))*F1o2pi
-
-  return
-
-  end subroutine admmass_bssn
-!-----------------------------------------------------------------------------------------------
-! P^i = int r^j p_ji
-!-----------------------------------------------------------------------------------------------
-  subroutine admmomentum_bssn(ex,                            &
-               chi, trK, &
-               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz    , &
-               Axx    ,   Axy    ,   Axz    ,   Ayy    ,   Ayz    ,   Azz    , &
-               Gamx   ,  Gamy   ,  Gamz   ,  &
-               pxx,pxy,pxz,pyy,pyz,pzz)
-
-  implicit none
- !~~~~~~= Input parameters:
- 
-  integer,intent(in) :: ex(1:3)
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: chi,trK
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Gamx,Gamy,Gamz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: pxx,pxy,pxz,pyy,pyz,pzz
-! local variables
-  real*8, dimension(ex(1),ex(2),ex(3)) :: Kxx,Kxy,Kxz,Kyy,Kyz,Kzz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz,chim4
-  real*8 :: PI, F1o8pi
-  real*8, parameter :: ONE = 1.d0, F1o3 = 1.d0/3.d0
-
-       PI = acos( - ONE )
-  F1o8pi = ONE / ( 8.d0 * PI )
-
-  gxx = dxx + ONE
-  gyy = dyy + ONE
-  gzz = dzz + ONE
-
-  chim4=1.d0/(chi+ONE)**4
-  Kxx = chim4*(Axx+F1o3*gxx*trK)
-  Kxy = chim4*(Axy+F1o3*gxy*trK)
-  Kxz = chim4*(Axz+F1o3*gxz*trK)
-  Kyy = chim4*(Ayy+F1o3*gyy*trK)
-  Kyz = chim4*(Ayz+F1o3*gyz*trK)
-  Kzz = chim4*(Azz+F1o3*gzz*trK)
-
-  pxx = (Kxx-trK)*F1o8pi
-  pxy = (Kxy    )*F1o8pi
-  pxz = (Kxz    )*F1o8pi
-  pyy = (Kyy-trK)*F1o8pi
-  pyz = (Kyz    )*F1o8pi
-  pzz = (Kzz-trK)*F1o8pi
-
-  return
-
-  end subroutine admmomentum_bssn
-!-----------------------------------------------------------------------------------------------
-! S^i = int r^j s_ji
-!-----------------------------------------------------------------------------------------------
-  subroutine admangularmomentum_bssn(ex,X,Y,Z,&
-               pxx,pxy,pxz,pyy,pyz,pzz, &
-               sxx,sxy,sxz,syx,syy,syz,szx,szy,szz)
-
-  implicit none
- !~~~~~~= Input parameters:
- 
-  integer,intent(in) :: ex(1:3)
-  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: pxx,pxy,pxz,pyy,pyz,pzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: sxx,sxy,sxz,syx,syy,syz,szx,szy,szz
-!local variable
-  real*8, dimension(ex(1),ex(2),ex(3))::XX,YY,ZZ
-  integer::i,j,k
-
-  do j = 1,ex(2)
-  do k = 1,ex(3)
-     XX(:,j,k) = X
-  enddo
-  enddo
-
-  do i = 1,ex(1)
-  do k = 1,ex(3)
-     YY(i,:,k) = Y
-  enddo
-  enddo
-
-  do i = 1,ex(1)
-  do j = 1,ex(2)
-     ZZ(i,j,:) = Z
-  enddo
-  enddo
-
-  sxx = YY*pxy - ZZ*pxz
-  sxy = YY*pyy - ZZ*pyz
-  sxz = YY*pyz - ZZ*pzz
-  syx = ZZ*pxy - YY*pxz
-  syy = ZZ*pyy - YY*pyz
-  syz = ZZ*pyz - YY*pzz
-  szx = XX*pxy - YY*pxx
-  szy = XX*pyy - YY*pxy
-  szz = XX*pyz - YY*pxz
-
-  return
-
-  end subroutine admangularmomentum_bssn
-
-! for shell
-  subroutine admmass_bssn_ss(ex,crho,sigma,R, X, Y, Z,                         &
-               drhodx, drhody, drhodz,                                         &
-               dsigmadx,dsigmady,dsigmadz,                                     &
-               dRdx,dRdy,dRdz,                                                 &
-               drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz,                &
-               dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz,    &
-               dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz,                            &
-               chi    ,   trK, &
-               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz    , &
-               Axx    ,   Axy    ,   Axz    ,   Ayy    ,   Ayz    ,   Azz    , &
-               Gamx   ,  Gamy   ,  Gamz   ,  &
-               massx,massy,massz, symmetry,sst)
-
-  implicit none
- !~~~~~~= Input parameters:
- 
-  integer,intent(in) :: ex(1:3),symmetry,sst
-  double precision,intent(in),dimension(ex(1))::crho
-  double precision,intent(in),dimension(ex(2))::sigma
-  double precision,intent(in),dimension(ex(3))::R
-  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::drhodx, drhody, drhodz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dsigmadx,dsigmady,dsigmadz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dRdx,dRdy,dRdz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
-  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: chi,trK
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Gamx,Gamy,Gamz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: massx,massy,massz
-! local variables
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
-!  inverse metric
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
-!  partial derivative of chi, chi_i
-  real*8, dimension(ex(1),ex(2),ex(3)) :: chix,chiy,chiz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: f
-  real*8 :: PI, F1o2pi
-  real*8, parameter :: ONE = 1.d0, F1o8 = 1.d0/8.d0
-  real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
-  real*8            :: dX, dY, dZ
-
-  dX = X(2) - X(1)
-  dY = Y(2) - Y(1)
-  dZ = Z(2) - Z(1)
-
-       PI = dacos( - ONE )
-  F1o2pi = ONE / ( 2.d0 * PI )
-
-  gxx = dxx + ONE
-  gyy = dyy + ONE
-  gzz = dzz + ONE
-
-  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
-           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
-  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
-  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
-  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
-  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
-  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
-  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
-
-  call fderivs_shc(ex,chi,chix,chiy,chiz,crho,sigma,R, SYM, SYM,SYM,Symmetry,0,sst,          &
-                       drhodx, drhody, drhodz,                                               &
-                       dsigmadx,dsigmady,dsigmadz,                                           &
-                       dRdx,dRdy,dRdz)
-
-  f=1/4.d0/(chi+ONE)**1.25d0
-! mass_i = (Gami/8 + gupij*phi_j/(4*chi^1.25))/(2*Pi)
-  massx = (F1o8*Gamx + f*(gupxx*chix+gupxy*chiy+gupxz*chiz))*F1o2pi
-  massy = (F1o8*Gamy + f*(gupxy*chix+gupyy*chiy+gupyz*chiz))*F1o2pi
-  massz = (F1o8*Gamz + f*(gupxz*chix+gupyz*chiy+gupzz*chiz))*F1o2pi
-
-  return
-
-  end subroutine admmass_bssn_ss
+
+!-----------------------------------------------------------------------------
+! ADM quantites for surface intergral
+!-----------------------------------------------------------------------------
+  subroutine admmass_bssn(ex, X, Y, Z,                            &
+               chi    ,   trK, &
+               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz    , &
+               Axx    ,   Axy    ,   Axz    ,   Ayy    ,   Ayz    ,   Azz    , &
+               Gamx   ,  Gamy   ,  Gamz   ,  &
+               massx,massy,massz, symmetry)
+
+  implicit none
+ !~~~~~~= Input parameters:
+ 
+  integer,intent(in) :: ex(1:3),symmetry
+  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: chi,trK
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Gamx,Gamy,Gamz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: massx,massy,massz
+! local variables
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
+!  inverse metric
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
+!  partial derivative of chi, chi_i
+  real*8, dimension(ex(1),ex(2),ex(3)) :: chix,chiy,chiz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: f
+  real*8 :: PI, F1o2pi
+  real*8, parameter :: ONE = 1.d0, F1o8 = 1.d0/8.d0
+  real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
+  real*8            :: dX, dY, dZ
+
+  dX = X(2) - X(1)
+  dY = Y(2) - Y(1)
+  dZ = Z(2) - Z(1)
+
+       PI = dacos( - ONE )
+  F1o2pi = ONE / ( 2.d0 * PI )
+
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE
+
+  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
+           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
+  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
+  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
+  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
+  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
+  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
+  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
+
+  call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,0)
+
+  f=1/4.d0/(chi+ONE)**1.25d0
+! mass_i = (Gami/8 + gupij*phi_j/(4*chi^1.25))/(2*Pi)
+  massx = (F1o8*Gamx + f*(gupxx*chix+gupxy*chiy+gupxz*chiz))*F1o2pi
+  massy = (F1o8*Gamy + f*(gupxy*chix+gupyy*chiy+gupyz*chiz))*F1o2pi
+  massz = (F1o8*Gamz + f*(gupxz*chix+gupyz*chiy+gupzz*chiz))*F1o2pi
+
+  return
+
+  end subroutine admmass_bssn
+!-----------------------------------------------------------------------------------------------
+! P^i = int r^j p_ji
+!-----------------------------------------------------------------------------------------------
+  subroutine admmomentum_bssn(ex,                            &
+               chi, trK, &
+               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz    , &
+               Axx    ,   Axy    ,   Axz    ,   Ayy    ,   Ayz    ,   Azz    , &
+               Gamx   ,  Gamy   ,  Gamz   ,  &
+               pxx,pxy,pxz,pyy,pyz,pzz)
+
+  implicit none
+ !~~~~~~= Input parameters:
+ 
+  integer,intent(in) :: ex(1:3)
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: chi,trK
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Gamx,Gamy,Gamz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: pxx,pxy,pxz,pyy,pyz,pzz
+! local variables
+  real*8, dimension(ex(1),ex(2),ex(3)) :: Kxx,Kxy,Kxz,Kyy,Kyz,Kzz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz,chim4
+  real*8 :: PI, F1o8pi
+  real*8, parameter :: ONE = 1.d0, F1o3 = 1.d0/3.d0
+
+       PI = acos( - ONE )
+  F1o8pi = ONE / ( 8.d0 * PI )
+
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE
+
+  chim4=1.d0/(chi+ONE)**4
+  Kxx = chim4*(Axx+F1o3*gxx*trK)
+  Kxy = chim4*(Axy+F1o3*gxy*trK)
+  Kxz = chim4*(Axz+F1o3*gxz*trK)
+  Kyy = chim4*(Ayy+F1o3*gyy*trK)
+  Kyz = chim4*(Ayz+F1o3*gyz*trK)
+  Kzz = chim4*(Azz+F1o3*gzz*trK)
+
+  pxx = (Kxx-trK)*F1o8pi
+  pxy = (Kxy    )*F1o8pi
+  pxz = (Kxz    )*F1o8pi
+  pyy = (Kyy-trK)*F1o8pi
+  pyz = (Kyz    )*F1o8pi
+  pzz = (Kzz-trK)*F1o8pi
+
+  return
+
+  end subroutine admmomentum_bssn
+!-----------------------------------------------------------------------------------------------
+! S^i = int r^j s_ji
+!-----------------------------------------------------------------------------------------------
+  subroutine admangularmomentum_bssn(ex,X,Y,Z,&
+               pxx,pxy,pxz,pyy,pyz,pzz, &
+               sxx,sxy,sxz,syx,syy,syz,szx,szy,szz)
+
+  implicit none
+ !~~~~~~= Input parameters:
+ 
+  integer,intent(in) :: ex(1:3)
+  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: pxx,pxy,pxz,pyy,pyz,pzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: sxx,sxy,sxz,syx,syy,syz,szx,szy,szz
+!local variable
+  real*8, dimension(ex(1),ex(2),ex(3))::XX,YY,ZZ
+  integer::i,j,k
+
+  do j = 1,ex(2)
+  do k = 1,ex(3)
+     XX(:,j,k) = X
+  enddo
+  enddo
+
+  do i = 1,ex(1)
+  do k = 1,ex(3)
+     YY(i,:,k) = Y
+  enddo
+  enddo
+
+  do i = 1,ex(1)
+  do j = 1,ex(2)
+     ZZ(i,j,:) = Z
+  enddo
+  enddo
+
+  sxx = YY*pxy - ZZ*pxz
+  sxy = YY*pyy - ZZ*pyz
+  sxz = YY*pyz - ZZ*pzz
+  syx = ZZ*pxy - YY*pxz
+  syy = ZZ*pyy - YY*pyz
+  syz = ZZ*pyz - YY*pzz
+  szx = XX*pxy - YY*pxx
+  szy = XX*pyy - YY*pxy
+  szz = XX*pyz - YY*pxz
+
+  return
+
+  end subroutine admangularmomentum_bssn
+
+! for shell
+  subroutine admmass_bssn_ss(ex,crho,sigma,R, X, Y, Z,                         &
+               drhodx, drhody, drhodz,                                         &
+               dsigmadx,dsigmady,dsigmadz,                                     &
+               dRdx,dRdy,dRdz,                                                 &
+               drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz,                &
+               dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz,    &
+               dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz,                            &
+               chi    ,   trK, &
+               dxx    ,   gxy    ,   gxz    ,   dyy    ,   gyz    ,   dzz    , &
+               Axx    ,   Axy    ,   Axz    ,   Ayy    ,   Ayz    ,   Azz    , &
+               Gamx   ,  Gamy   ,  Gamz   ,  &
+               massx,massy,massz, symmetry,sst)
+
+  implicit none
+ !~~~~~~= Input parameters:
+ 
+  integer,intent(in) :: ex(1:3),symmetry,sst
+  double precision,intent(in),dimension(ex(1))::crho
+  double precision,intent(in),dimension(ex(2))::sigma
+  double precision,intent(in),dimension(ex(3))::R
+  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::drhodx, drhody, drhodz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dsigmadx,dsigmady,dsigmadz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dRdx,dRdy,dRdz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz
+  double precision,intent(in),dimension(ex(1),ex(2),ex(3))::dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: chi,trK
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Gamx,Gamy,Gamz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: massx,massy,massz
+! local variables
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
+!  inverse metric
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
+!  partial derivative of chi, chi_i
+  real*8, dimension(ex(1),ex(2),ex(3)) :: chix,chiy,chiz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: f
+  real*8 :: PI, F1o2pi
+  real*8, parameter :: ONE = 1.d0, F1o8 = 1.d0/8.d0
+  real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
+  real*8            :: dX, dY, dZ
+
+  dX = X(2) - X(1)
+  dY = Y(2) - Y(1)
+  dZ = Z(2) - Z(1)
+
+       PI = dacos( - ONE )
+  F1o2pi = ONE / ( 2.d0 * PI )
+
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE
+
+  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
+           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
+  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
+  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
+  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
+  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
+  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
+  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
+
+  call fderivs_shc(ex,chi,chix,chiy,chiz,crho,sigma,R, SYM, SYM,SYM,Symmetry,0,sst,          &
+                       drhodx, drhody, drhodz,                                               &
+                       dsigmadx,dsigmady,dsigmadz,                                           &
+                       dRdx,dRdy,dRdz)
+
+  f=1/4.d0/(chi+ONE)**1.25d0
+! mass_i = (Gami/8 + gupij*phi_j/(4*chi^1.25))/(2*Pi)
+  massx = (F1o8*Gamx + f*(gupxx*chix+gupxy*chiy+gupxz*chiz))*F1o2pi
+  massy = (F1o8*Gamy + f*(gupxy*chix+gupyy*chiy+gupyz*chiz))*F1o2pi
+  massz = (F1o8*Gamz + f*(gupxz*chix+gupyz*chiy+gupzz*chiz))*F1o2pi
+
+  return
+
+  end subroutine admmass_bssn_ss
--- a/AMSS_NCKU_source/BSSN/fadmquantites_bssn.h
+++ b/AMSS_NCKU_source/BSSN/fadmquantites_bssn.h
@@ -1,60 +1,60 @@
-
-#ifndef FADMQUANTITES_H
-#define FADMQUANTITES_H
-
-#ifdef fortran1
-#define f_admmass_bssn admmass_bssn
-#define f_admmass_bssn_ss admmass_bssn_ss
-#define f_admmomentum_bssn admmomentum_bssn
-#endif
-#ifdef fortran2
-#define f_admmass_bssn ADMMASS_BSSN
-#define f_admmass_bssn_ss ADMMASS_BSSN_SS
-#define f_admmomentum_bssn ADMMOMENTUM_BSSN
-#endif
-#ifdef fortran3
-#define f_admmass_bssn admmass_bssn_
-#define f_admmass_bssn_ss admmass_bssn_ss_
-#define f_admmomentum_bssn admmomentum_bssn_
-#endif
-
-extern "C"
-{
-	void f_admmass_bssn(int *, double *, double *, double *,
-						double *, double *,
-						double *, double *, double *, double *, double *, double *,
-						double *, double *, double *, double *, double *, double *,
-						double *, double *, double *,
-						double *, double *, double *,
-						int &);
-}
-
-extern "C"
-{
-	void f_admmass_bssn_ss(int *, double *, double *, double *,
-						   double *, double *, double *,
-						   double *, double *, double *,
-						   double *, double *, double *,
-						   double *, double *, double *,
-						   double *, double *, double *, double *, double *, double *,
-						   double *, double *, double *, double *, double *, double *,
-						   double *, double *, double *, double *, double *, double *,
-						   double *, double *,
-						   double *, double *, double *, double *, double *, double *,
-						   double *, double *, double *, double *, double *, double *,
-						   double *, double *, double *,
-						   double *, double *, double *,
-						   int &, int &);
-}
-
-extern "C"
-{
-	void f_admmomentum_bssn(int *, double *, double *, double *,
-							double *, double *,
-							double *, double *, double *, double *, double *, double *,
-							double *, double *, double *, double *, double *, double *,
-							double *, double *, double *,
-							double *, double *, double *,
-							double *, double *, double *, double *, double *, double *);
-}
-#endif /* FADMQUANTITES_H */
+
+#ifndef FADMQUANTITES_H
+#define FADMQUANTITES_H
+
+#ifdef fortran1
+#define f_admmass_bssn admmass_bssn
+#define f_admmass_bssn_ss admmass_bssn_ss
+#define f_admmomentum_bssn admmomentum_bssn
+#endif
+#ifdef fortran2
+#define f_admmass_bssn ADMMASS_BSSN
+#define f_admmass_bssn_ss ADMMASS_BSSN_SS
+#define f_admmomentum_bssn ADMMOMENTUM_BSSN
+#endif
+#ifdef fortran3
+#define f_admmass_bssn admmass_bssn_
+#define f_admmass_bssn_ss admmass_bssn_ss_
+#define f_admmomentum_bssn admmomentum_bssn_
+#endif
+
+extern "C"
+{
+	void f_admmass_bssn(int *, double *, double *, double *,
+						double *, double *,
+						double *, double *, double *, double *, double *, double *,
+						double *, double *, double *, double *, double *, double *,
+						double *, double *, double *,
+						double *, double *, double *,
+						int &);
+}
+
+extern "C"
+{
+	void f_admmass_bssn_ss(int *, double *, double *, double *,
+						   double *, double *, double *,
+						   double *, double *, double *,
+						   double *, double *, double *,
+						   double *, double *, double *,
+						   double *, double *, double *, double *, double *, double *,
+						   double *, double *, double *, double *, double *, double *,
+						   double *, double *, double *, double *, double *, double *,
+						   double *, double *,
+						   double *, double *, double *, double *, double *, double *,
+						   double *, double *, double *, double *, double *, double *,
+						   double *, double *, double *,
+						   double *, double *, double *,
+						   int &, int &);
+}
+
+extern "C"
+{
+	void f_admmomentum_bssn(int *, double *, double *, double *,
+							double *, double *,
+							double *, double *, double *, double *, double *, double *,
+							double *, double *, double *, double *, double *, double *,
+							double *, double *, double *,
+							double *, double *, double *,
+							double *, double *, double *, double *, double *, double *);
+}
+#endif /* FADMQUANTITES_H */
--- a/AMSS_NCKU_source/BSSN/fourdcurvature.f90
+++ b/AMSS_NCKU_source/BSSN/fourdcurvature.f90
@@ -1,91 +1,91 @@
-
-
-#include "macrodef.fh"
-
-!-----------------------------------------------------------------------------
-!
-! compute 4 dimensional Ricci scalar
-! this routine is valid for both box and shell
-!
-!-----------------------------------------------------------------------------
-
-  subroutine get4ricciscalar(ex, X, Y, Z, &
-               chi, trK, rho, &
-               dxx,gxy,gxz,dyy,gyz,dzz, &
-               Axx,Axy,Axz,Ayy,Ayz,Azz, &
-               Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
-               Sxx,Sxy,Sxz,Syy,Syz,Szz,&
-               RR)
-
-  implicit none
-
-!~~~~~~> Input parameters:
-
-  integer,intent(in ):: ex(1:3)
-  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: chi,trK,rho
-! physical Ricci tensor  
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
-! matter 
-  real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: Sxx,Sxy,Sxz,Syy,Syz,Szz
-  real*8, dimension(ex(1),ex(2),ex(3)), intent(out):: RR
-
-!~~~~~~> Other variables:
-
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz,chipn1
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
-  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
-  real*8, parameter :: ONE = 1.d0, TWO = 2.d0, THR = 3.d0, F8 = 8.d0, F2o3 = 2.d0/3.d0
-  real*8 :: PI
-
-  PI = dacos(-ONE)
-
-  gxx = dxx + ONE
-  gyy = dyy + ONE
-  gzz = dzz + ONE
-  chipn1= chi + ONE
-
-! invert tilted metric
-  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
-           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
-  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
-  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
-  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
-  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
-  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
-  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
-
-  RR =(gupxx * ( &
-       gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
-       TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz) ) + &
-       gupyy * ( &
-       gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
-       TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz) ) + &
-       gupzz * ( &
-       gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
-       TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz) ) + &
-       TWO * ( &
-       gupxy * ( &
-       gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
-       gupxy * (Axx * Ayy + Axy * Axy) + &
-       gupxz * (Axx * Ayz + Axz * Axy) + &
-       gupyz * (Axy * Ayz + Axz * Ayy) ) + &
-       gupxz * ( &
-       gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
-       gupxy * (Axx * Ayz + Axy * Axz) + &
-       gupxz * (Axx * Azz + Axz * Axz) + &
-       gupyz * (Axy * Azz + Axz * Ayz) ) + &
-       gupyz * ( &
-       gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
-       gupxy * (Axy * Ayz + Ayy * Axz) + &
-       gupxz * (Axy * Azz + Ayz * Axz) + &
-       gupyz * (Ayy * Azz + Ayz * Ayz) ) )) - F2o3*trK*trK &
-       -(gupxx*Rxx+gupyy*Ryy+gupzz*Rzz+TWO*(gupxy*Rxy+gupxz*Rxz+gupyz*Ryz))*chipn1 &
-       -F8*PI*(THR*rho- &
-       (gupxx*Sxx+gupyy*Syy+gupzz*Szz+TWO*(gupxy*Sxy+gupxz*Sxz+gupyz*Syz))*chipn1)
-
-  return
-
-  end subroutine get4ricciscalar  
+
+
+#include "macrodef.fh"
+
+!-----------------------------------------------------------------------------
+!
+! compute 4 dimensional Ricci scalar
+! this routine is valid for both box and shell
+!
+!-----------------------------------------------------------------------------
+
+  subroutine get4ricciscalar(ex, X, Y, Z, &
+               chi, trK, rho, &
+               dxx,gxy,gxz,dyy,gyz,dzz, &
+               Axx,Axy,Axz,Ayy,Ayz,Azz, &
+               Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
+               Sxx,Sxy,Sxz,Syy,Syz,Szz,&
+               RR)
+
+  implicit none
+
+!~~~~~~> Input parameters:
+
+  integer,intent(in ):: ex(1:3)
+  real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dxx,gxy,gxz,dyy,gyz,dzz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: chi,trK,rho
+! physical Ricci tensor  
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
+! matter 
+  real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: Sxx,Sxy,Sxz,Syy,Syz,Szz
+  real*8, dimension(ex(1),ex(2),ex(3)), intent(out):: RR
+
+!~~~~~~> Other variables:
+
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz,chipn1
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
+  real*8, parameter :: ONE = 1.d0, TWO = 2.d0, THR = 3.d0, F8 = 8.d0, F2o3 = 2.d0/3.d0
+  real*8 :: PI
+
+  PI = dacos(-ONE)
+
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE
+  chipn1= chi + ONE
+
+! invert tilted metric
+  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
+           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
+  gupxx =   ( gyy * gzz - gyz * gyz ) / gupzz
+  gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
+  gupxz =   ( gxy * gyz - gyy * gxz ) / gupzz
+  gupyy =   ( gxx * gzz - gxz * gxz ) / gupzz
+  gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
+  gupzz =   ( gxx * gyy - gxy * gxy ) / gupzz
+
+  RR =(gupxx * ( &
+       gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
+       TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz) ) + &
+       gupyy * ( &
+       gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
+       TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz) ) + &
+       gupzz * ( &
+       gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
+       TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz) ) + &
+       TWO * ( &
+       gupxy * ( &
+       gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
+       gupxy * (Axx * Ayy + Axy * Axy) + &
+       gupxz * (Axx * Ayz + Axz * Axy) + &
+       gupyz * (Axy * Ayz + Axz * Ayy) ) + &
+       gupxz * ( &
+       gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
+       gupxy * (Axx * Ayz + Axy * Axz) + &
+       gupxz * (Axx * Azz + Axz * Axz) + &
+       gupyz * (Axy * Azz + Axz * Ayz) ) + &
+       gupyz * ( &
+       gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
+       gupxy * (Axy * Ayz + Ayy * Axz) + &
+       gupxz * (Axy * Azz + Ayz * Axz) + &
+       gupyz * (Ayy * Azz + Ayz * Ayz) ) )) - F2o3*trK*trK &
+       -(gupxx*Rxx+gupyy*Ryy+gupzz*Rzz+TWO*(gupxy*Rxy+gupxz*Rxz+gupyz*Ryz))*chipn1 &
+       -F8*PI*(THR*rho- &
+       (gupxx*Sxx+gupyy*Syy+gupzz*Szz+TWO*(gupxy*Sxy+gupxz*Sxz+gupyz*Syz))*chipn1)
+
+  return
+
+  end subroutine get4ricciscalar  
--- a/AMSS_NCKU_source/BSSN/lopsided_c.C
+++ b/AMSS_NCKU_source/BSSN/lopsided_c.C
@@ -0,0 +1,255 @@
+#include "tool.h"
+/*
+ * 你需要提供 symmetry_bd 的 C 版本（或 Fortran 绑到 C 的接口）。
+ * Fortran: call symmetry_bd(3,ex,f,fh,SoA)
+ *
+ * 约定：
+ *   nghost = 3
+ *   ex[3]  = {ex1,ex2,ex3}
+ *   f      = 原始网格 (ex1*ex2*ex3)
+ *   fh     = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3))，对应 Fortran 的 (-2:ex1, ...)
+ *   SoA[3] = 输入参数
+ */
+void lopsided(const int ex[3],
+              const double *X, const double *Y, const double *Z,
+              const double *f, double *f_rhs,
+              const double *Sfx, const double *Sfy, const double *Sfz,
+              int Symmetry, const double SoA[3])
+{
+    const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
+    const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
+    const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
+
+    const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
+    (void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
+
+    const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
+
+    // 对应 Fortran: dX = X(2)-X(1)  （Fortran 1-based）
+    // C: X[1]-X[0]
+    const double dX = X[1] - X[0];
+    const double dY = Y[1] - Y[0];
+    const double dZ = Z[1] - Z[0];
+
+    const double d12dx = ONE / F12 / dX;
+    const double d12dy = ONE / F12 / dY;
+    const double d12dz = ONE / F12 / dZ;
+
+    // Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到（保持一致也算出来）
+    const double d2dx  = ONE / TWO / dX;
+    const double d2dy  = ONE / TWO / dY;
+    const double d2dz  = ONE / TWO / dZ;
+    (void)d2dx; (void)d2dy; (void)d2dz;
+
+    // Fortran:
+    // imax = ex(1); jmax = ex(2); kmax = ex(3)
+    const int imaxF = ex1;
+    const int jmaxF = ex2;
+    const int kmaxF = ex3;
+
+    // Fortran:
+    // imin=jmin=kmin=1; 若满足对称条件则设为 -2
+    int iminF = 1, jminF = 1, kminF = 1;
+    if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
+    if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
+    if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
+
+    // 分配 fh：大小 (ex1+3)*(ex2+3)*(ex3+3)
+    const size_t nx = (size_t)ex1 + 3;
+    const size_t ny = (size_t)ex2 + 3;
+    const size_t nz = (size_t)ex3 + 3;
+    const size_t fh_size = nx * ny * nz;
+
+    double *fh = (double*)malloc(fh_size * sizeof(double));
+    if (!fh) return; // 内存不足：直接返回（你也可以改成 abort/报错）
+
+    // Fortran: call symmetry_bd(3,ex,f,fh,SoA)
+    symmetry_bd(3, ex, f, fh, SoA);
+
+    /*
+     * Fortran 主循环：
+     * do k=1,ex(3)-1
+     * do j=1,ex(2)-1
+     * do i=1,ex(1)-1
+     *
+     * 转成 C 0-based：
+     * k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
+     *
+     * 并且 Fortran 里的 i/j/k 在 fh 访问时，仍然是 Fortran 索引值：
+     * iF=i0+1, jF=j0+1, kF=k0+1
+     */
+    for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
+        const int kF = k0 + 1;
+        for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
+            const int jF = j0 + 1;
+            for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
+                const int iF = i0 + 1;
+
+                const size_t p = idx_ex(i0, j0, k0, ex);
+
+                // ---------------- x direction ----------------
+                const double sfx = Sfx[p];
+                if (sfx > ZEO) {
+                    // Fortran: if(i+3 <= imax)
+                    // iF+3 <= ex1  <=> i0+4 <= ex1 <=> i0 <= ex1-4
+                    if (i0 <= ex1 - 4) {
+                        f_rhs[p] += sfx * d12dx *
+                            (-F3  * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF    , jF, kF, ex)]
+                             +F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+                             +      fh[idx_fh_F(iF + 3, jF, kF, ex)]);
+                    }
+                    // elseif(i+2 <= imax)  <=> i0 <= ex1-3
+                    else if (i0 <= ex1 - 3) {
+                        f_rhs[p] += sfx * d12dx *
+                            ( fh[idx_fh_F(iF - 2, jF, kF, ex)]
+                             -EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             +EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -      fh[idx_fh_F(iF + 2, jF, kF, ex)]);
+                    }
+                    // elseif(i+1 <= imax)  <=> i0 <= ex1-2（循环里总成立）
+                    else if (i0 <= ex1 - 2) {
+                        f_rhs[p] -= sfx * d12dx *
+                            (-F3  * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF    , jF, kF, ex)]
+                             +F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+                             +      fh[idx_fh_F(iF - 3, jF, kF, ex)]);
+                    }
+                } else if (sfx < ZEO) {
+                    // Fortran: if(i-3 >= imin)
+                    // (iF-3) >= iminF  <=> (i0-2) >= iminF
+                    if ((i0 - 2) >= iminF) {
+                        f_rhs[p] -= sfx * d12dx *
+                            (-F3  * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF    , jF, kF, ex)]
+                             +F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+                             +      fh[idx_fh_F(iF - 3, jF, kF, ex)]);
+                    }
+                    // elseif(i-2 >= imin) <=> (i0-1) >= iminF
+                    else if ((i0 - 1) >= iminF) {
+                        f_rhs[p] += sfx * d12dx *
+                            ( fh[idx_fh_F(iF - 2, jF, kF, ex)]
+                             -EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             +EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -      fh[idx_fh_F(iF + 2, jF, kF, ex)]);
+                    }
+                    // elseif(i-1 >= imin) <=> i0 >= iminF
+                    else if (i0 >= iminF) {
+                        f_rhs[p] += sfx * d12dx *
+                            (-F3  * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF    , jF, kF, ex)]
+                             +F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+                             +      fh[idx_fh_F(iF + 3, jF, kF, ex)]);
+                    }
+                }
+
+                // ---------------- y direction ----------------
+                const double sfy = Sfy[p];
+                if (sfy > ZEO) {
+                    // jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
+                    if (j0 <= ex2 - 4) {
+                        f_rhs[p] += sfy * d12dy *
+                            (-F3  * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF    , kF, ex)]
+                             +F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+                             +      fh[idx_fh_F(iF, jF + 3, kF, ex)]);
+                    } else if (j0 <= ex2 - 3) {
+                        f_rhs[p] += sfy * d12dy *
+                            ( fh[idx_fh_F(iF, jF - 2, kF, ex)]
+                             -EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             +EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -      fh[idx_fh_F(iF, jF + 2, kF, ex)]);
+                    } else if (j0 <= ex2 - 2) {
+                        f_rhs[p] -= sfy * d12dy *
+                            (-F3  * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF    , kF, ex)]
+                             +F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+                             +      fh[idx_fh_F(iF, jF - 3, kF, ex)]);
+                    }
+                } else if (sfy < ZEO) {
+                    if ((j0 - 2) >= jminF) {
+                        f_rhs[p] -= sfy * d12dy *
+                            (-F3  * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF    , kF, ex)]
+                             +F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+                             +      fh[idx_fh_F(iF, jF - 3, kF, ex)]);
+                    } else if ((j0 - 1) >= jminF) {
+                        f_rhs[p] += sfy * d12dy *
+                            ( fh[idx_fh_F(iF, jF - 2, kF, ex)]
+                             -EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             +EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -      fh[idx_fh_F(iF, jF + 2, kF, ex)]);
+                    } else if (j0 >= jminF) {
+                        f_rhs[p] += sfy * d12dy *
+                            (-F3  * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF    , kF, ex)]
+                             +F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+                             +      fh[idx_fh_F(iF, jF + 3, kF, ex)]);
+                    }
+                }
+
+                // ---------------- z direction ----------------
+                const double sfz = Sfz[p];
+                if (sfz > ZEO) {
+                    if (k0 <= ex3 - 4) {
+                        f_rhs[p] += sfz * d12dz *
+                            (-F3  * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF, kF    , ex)]
+                             +F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+                             +      fh[idx_fh_F(iF, jF, kF + 3, ex)]);
+                    } else if (k0 <= ex3 - 3) {
+                        f_rhs[p] += sfz * d12dz *
+                            ( fh[idx_fh_F(iF, jF, kF - 2, ex)]
+                             -EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             +EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -      fh[idx_fh_F(iF, jF, kF + 2, ex)]);
+                    } else if (k0 <= ex3 - 2) {
+                        f_rhs[p] -= sfz * d12dz *
+                            (-F3  * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF, kF    , ex)]
+                             +F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+                             +      fh[idx_fh_F(iF, jF, kF - 3, ex)]);
+                    }
+                } else if (sfz < ZEO) {
+                    if ((k0 - 2) >= kminF) {
+                        f_rhs[p] -= sfz * d12dz *
+                            (-F3  * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF, kF    , ex)]
+                             +F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+                             +      fh[idx_fh_F(iF, jF, kF - 3, ex)]);
+                    } else if ((k0 - 1) >= kminF) {
+                        f_rhs[p] += sfz * d12dz *
+                            ( fh[idx_fh_F(iF, jF, kF - 2, ex)]
+                             -EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             +EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -      fh[idx_fh_F(iF, jF, kF + 2, ex)]);
+                    } else if (k0 >= kminF) {
+                        f_rhs[p] += sfz * d12dz *
+                            (-F3  * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF, kF    , ex)]
+                             +F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+                             +      fh[idx_fh_F(iF, jF, kF + 3, ex)]);
+                    }
+                }
+            }
+        }
+    }
+    free(fh);
+}
+
+
+
+
+
--- a/AMSS_NCKU_source/BSSN/lopsided_kodis_c.C
+++ b/AMSS_NCKU_source/BSSN/lopsided_kodis_c.C
@@ -0,0 +1,248 @@
+#include "tool.h"
+
+/*
+ * Combined advection (lopsided) + KO dissipation (kodis).
+ * Uses one shared symmetry_bd buffer per call.
+ */
+void lopsided_kodis(const int ex[3],
+                    const double *X, const double *Y, const double *Z,
+                    const double *f, double *f_rhs,
+                    const double *Sfx, const double *Sfy, const double *Sfz,
+                    int Symmetry, const double SoA[3], double eps)
+{
+    const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
+    const double F6 = 6.0, F18 = 18.0;
+    const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
+    const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
+    const double cof = 64.0; // 2^6
+
+    const int NO_SYMM = 0, EQ_SYMM = 1;
+
+    const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
+
+    const double dX = X[1] - X[0];
+    const double dY = Y[1] - Y[0];
+    const double dZ = Z[1] - Z[0];
+
+    const double d12dx = ONE / F12 / dX;
+    const double d12dy = ONE / F12 / dY;
+    const double d12dz = ONE / F12 / dZ;
+
+    const int imaxF = ex1;
+    const int jmaxF = ex2;
+    const int kmaxF = ex3;
+
+    int iminF = 1, jminF = 1, kminF = 1;
+    if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
+    if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
+    if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
+
+    // fh for Fortran-style domain (-2:ex1,-2:ex2,-2:ex3)
+    const size_t nx = (size_t)ex1 + 3;
+    const size_t ny = (size_t)ex2 + 3;
+    const size_t nz = (size_t)ex3 + 3;
+    const size_t fh_size = nx * ny * nz;
+
+    double *fh = (double*)malloc(fh_size * sizeof(double));
+    if (!fh) return;
+
+    symmetry_bd(3, ex, f, fh, SoA);
+
+    // Advection (same stencil logic as lopsided_c.C)
+    for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
+        const int kF = k0 + 1;
+        for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
+            const int jF = j0 + 1;
+            for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
+                const int iF = i0 + 1;
+                const size_t p = idx_ex(i0, j0, k0, ex);
+
+                const double sfx = Sfx[p];
+                if (sfx > ZEO) {
+                    if (i0 <= ex1 - 4) {
+                        f_rhs[p] += sfx * d12dx *
+                            (-F3  * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF    , jF, kF, ex)]
+                             +F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+                             +      fh[idx_fh_F(iF + 3, jF, kF, ex)]);
+                    } else if (i0 <= ex1 - 3) {
+                        f_rhs[p] += sfx * d12dx *
+                            ( fh[idx_fh_F(iF - 2, jF, kF, ex)]
+                             -EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             +EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -      fh[idx_fh_F(iF + 2, jF, kF, ex)]);
+                    } else if (i0 <= ex1 - 2) {
+                        f_rhs[p] -= sfx * d12dx *
+                            (-F3  * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF    , jF, kF, ex)]
+                             +F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+                             +      fh[idx_fh_F(iF - 3, jF, kF, ex)]);
+                    }
+                } else if (sfx < ZEO) {
+                    if ((i0 - 2) >= iminF) {
+                        f_rhs[p] -= sfx * d12dx *
+                            (-F3  * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF    , jF, kF, ex)]
+                             +F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+                             +      fh[idx_fh_F(iF - 3, jF, kF, ex)]);
+                    } else if ((i0 - 1) >= iminF) {
+                        f_rhs[p] += sfx * d12dx *
+                            ( fh[idx_fh_F(iF - 2, jF, kF, ex)]
+                             -EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             +EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -      fh[idx_fh_F(iF + 2, jF, kF, ex)]);
+                    } else if (i0 >= iminF) {
+                        f_rhs[p] += sfx * d12dx *
+                            (-F3  * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF    , jF, kF, ex)]
+                             +F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+                             +      fh[idx_fh_F(iF + 3, jF, kF, ex)]);
+                    }
+                }
+
+                const double sfy = Sfy[p];
+                if (sfy > ZEO) {
+                    if (j0 <= ex2 - 4) {
+                        f_rhs[p] += sfy * d12dy *
+                            (-F3  * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF    , kF, ex)]
+                             +F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+                             +      fh[idx_fh_F(iF, jF + 3, kF, ex)]);
+                    } else if (j0 <= ex2 - 3) {
+                        f_rhs[p] += sfy * d12dy *
+                            ( fh[idx_fh_F(iF, jF - 2, kF, ex)]
+                             -EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             +EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -      fh[idx_fh_F(iF, jF + 2, kF, ex)]);
+                    } else if (j0 <= ex2 - 2) {
+                        f_rhs[p] -= sfy * d12dy *
+                            (-F3  * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF    , kF, ex)]
+                             +F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+                             +      fh[idx_fh_F(iF, jF - 3, kF, ex)]);
+                    }
+                } else if (sfy < ZEO) {
+                    if ((j0 - 2) >= jminF) {
+                        f_rhs[p] -= sfy * d12dy *
+                            (-F3  * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF    , kF, ex)]
+                             +F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+                             +      fh[idx_fh_F(iF, jF - 3, kF, ex)]);
+                    } else if ((j0 - 1) >= jminF) {
+                        f_rhs[p] += sfy * d12dy *
+                            ( fh[idx_fh_F(iF, jF - 2, kF, ex)]
+                             -EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             +EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -      fh[idx_fh_F(iF, jF + 2, kF, ex)]);
+                    } else if (j0 >= jminF) {
+                        f_rhs[p] += sfy * d12dy *
+                            (-F3  * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF    , kF, ex)]
+                             +F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+                             +      fh[idx_fh_F(iF, jF + 3, kF, ex)]);
+                    }
+                }
+
+                const double sfz = Sfz[p];
+                if (sfz > ZEO) {
+                    if (k0 <= ex3 - 4) {
+                        f_rhs[p] += sfz * d12dz *
+                            (-F3  * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF, kF    , ex)]
+                             +F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+                             +      fh[idx_fh_F(iF, jF, kF + 3, ex)]);
+                    } else if (k0 <= ex3 - 3) {
+                        f_rhs[p] += sfz * d12dz *
+                            ( fh[idx_fh_F(iF, jF, kF - 2, ex)]
+                             -EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             +EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -      fh[idx_fh_F(iF, jF, kF + 2, ex)]);
+                    } else if (k0 <= ex3 - 2) {
+                        f_rhs[p] -= sfz * d12dz *
+                            (-F3  * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF, kF    , ex)]
+                             +F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+                             +      fh[idx_fh_F(iF, jF, kF - 3, ex)]);
+                    }
+                } else if (sfz < ZEO) {
+                    if ((k0 - 2) >= kminF) {
+                        f_rhs[p] -= sfz * d12dz *
+                            (-F3  * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF, kF    , ex)]
+                             +F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+                             +      fh[idx_fh_F(iF, jF, kF - 3, ex)]);
+                    } else if ((k0 - 1) >= kminF) {
+                        f_rhs[p] += sfz * d12dz *
+                            ( fh[idx_fh_F(iF, jF, kF - 2, ex)]
+                             -EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             +EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -      fh[idx_fh_F(iF, jF, kF + 2, ex)]);
+                    } else if (k0 >= kminF) {
+                        f_rhs[p] += sfz * d12dz *
+                            (-F3  * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+                             -F10 * fh[idx_fh_F(iF, jF, kF    , ex)]
+                             +F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
+                             -F6  * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+                             +      fh[idx_fh_F(iF, jF, kF + 3, ex)]);
+                    }
+                }
+            }
+        }
+    }
+
+    // KO dissipation (same domain restriction as kodiss_c.C)
+    if (eps > ZEO) {
+        const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
+        const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
+        const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
+        const int i0_hi = imaxF - 4; // inclusive
+        const int j0_hi = jmaxF - 4;
+        const int k0_hi = kmaxF - 4;
+
+        if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
+            for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
+                const int kF = k0 + 1;
+                for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
+                    const int jF = j0 + 1;
+                    for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
+                        const int iF = i0 + 1;
+                        const size_t p = idx_ex(i0, j0, k0, ex);
+
+                        const double Dx_term =
+                            ((fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
+                             SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
+                             FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
+                             TWT *  fh[idx_fh_F(iF,     jF, kF, ex)]) / dX;
+
+                        const double Dy_term =
+                            ((fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
+                             SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
+                             FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
+                             TWT *  fh[idx_fh_F(iF, jF,     kF, ex)]) / dY;
+
+                        const double Dz_term =
+                            ((fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
+                             SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
+                             FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
+                             TWT *  fh[idx_fh_F(iF, jF, kF,     ex)]) / dZ;
+
+                        f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
+                    }
+                }
+            }
+        }
+    }
+
+    free(fh);
+}
--- a/AMSS_NCKU_source/BSSN/lopsidediff.f90
+++ b/AMSS_NCKU_source/BSSN/lopsidediff.f90
--- a/AMSS_NCKU_source/BSSN/prolongrestrict.f90
+++ b/AMSS_NCKU_source/BSSN/prolongrestrict.f90
--- a/AMSS_NCKU_source/BSSN/prolongrestrict.h
+++ b/AMSS_NCKU_source/BSSN/prolongrestrict.h
@@ -1,55 +1,55 @@
-
-#ifndef PROLONGRESTRICT_H
-#define PROLONGRESTRICT_H
-
-#ifdef fortran1
-#define f_prolong3 prolong3
-#define f_prolongmix3 prolongmix3
-#define f_prolongcopy3 prolongcopy3
-#define f_restrict3 restrict3
-#endif
-
-#ifdef fortran2
-#define f_prolong3 PROLONG3
-#define f_prolongmix3 PROLONGMIX3
-#define f_prolongcopy3 PROLONGCOPY3
-#define f_restrict3 RESTRICT3
-#endif
-
-#ifdef fortran3
-#define f_prolong3 prolong3_
-#define f_prolongmix3 prolongmix3_
-#define f_prolongcopy3 prolongcopy3_
-#define f_restrict3 restrict3_
-#endif
-
-extern "C"
-{
-	int f_prolong3(int &, double *, double *, int *, double *,
-				   double *, double *, int *, double *,
-				   double *, double *, double *, int &);
-}
-
-extern "C"
-{
-	void f_restrict3(int &, double *, double *, int *, double *,
-					 double *, double *, int *, double *,
-					 double *, double *, double *, int &);
-}
-
-extern "C"
-{
-	int f_prolongmix3(int &, double *, double *, int *, double *,
-					  double *, double *, int *, double *,
-					  double *, double *, double *, int &,
-					  double *, double *);
-}
-
-extern "C"
-{
-	int f_prolongcopy3(int &, double *, double *, int *, double *,
-					   double *, double *, int *, double *,
-					   double *, double *, double *, int &);
-}
-
-#endif /* PROLONGRESTRICT_H */
+
+#ifndef PROLONGRESTRICT_H
+#define PROLONGRESTRICT_H
+
+#ifdef fortran1
+#define f_prolong3 prolong3
+#define f_prolongmix3 prolongmix3
+#define f_prolongcopy3 prolongcopy3
+#define f_restrict3 restrict3
+#endif
+
+#ifdef fortran2
+#define f_prolong3 PROLONG3
+#define f_prolongmix3 PROLONGMIX3
+#define f_prolongcopy3 PROLONGCOPY3
+#define f_restrict3 RESTRICT3
+#endif
+
+#ifdef fortran3
+#define f_prolong3 prolong3_
+#define f_prolongmix3 prolongmix3_
+#define f_prolongcopy3 prolongcopy3_
+#define f_restrict3 restrict3_
+#endif
+
+extern "C"
+{
+	int f_prolong3(int &, double *, double *, int *, double *,
+				   double *, double *, int *, double *,
+				   double *, double *, double *, int &);
+}
+
+extern "C"
+{
+	void f_restrict3(int &, double *, double *, int *, double *,
+					 double *, double *, int *, double *,
+					 double *, double *, double *, int &);
+}
+
+extern "C"
+{
+	int f_prolongmix3(int &, double *, double *, int *, double *,
+					  double *, double *, int *, double *,
+					  double *, double *, double *, int &,
+					  double *, double *);
+}
+
+extern "C"
+{
+	int f_prolongcopy3(int &, double *, double *, int *, double *,
+					   double *, double *, int *, double *,
+					   double *, double *, double *, int &);
+}
+
+#endif /* PROLONGRESTRICT_H */
--- a/AMSS_NCKU_source/BSSN/prolongrestrict_cell.f90
+++ b/AMSS_NCKU_source/BSSN/prolongrestrict_cell.f90
--- a/AMSS_NCKU_source/BSSN/prolongrestrict_vertex.f90
+++ b/AMSS_NCKU_source/BSSN/prolongrestrict_vertex.f90
--- a/AMSS_NCKU_source/BSSN/sommerfeld_rout.f90
+++ b/AMSS_NCKU_source/BSSN/sommerfeld_rout.f90
--- a/AMSS_NCKU_source/BSSN/sommerfeld_rout.h
+++ b/AMSS_NCKU_source/BSSN/sommerfeld_rout.h
@@ -1,53 +1,53 @@
-
-#ifndef SOMMERFELD_ROUT_H
-#define SOMMERFELD_ROUT_H
-
-#ifdef fortran1
-#define f_sommerfeld_rout sommerfeld_rout
-#define f_sommerfeld_routbam sommerfeld_routbam
-#define f_sommerfeld_routbam_ss sommerfeld_routbam_ss
-#define f_falloff_ss falloff_ss
-#endif
-#ifdef fortran2
-#define f_sommerfeld_rout SOMMERFELD_ROUT
-#define f_sommerfeld_rout SOMMERFELD_ROUTBAM
-#define f_sommerfeld_rout_ss SOMMERFELD_ROUTBAM_SS
-#define f_falloff_ss FALLOFF_SS
-#endif
-#ifdef fortran3
-#define f_sommerfeld_rout sommerfeld_rout_
-#define f_sommerfeld_routbam sommerfeld_routbam_
-#define f_sommerfeld_routbam_ss sommerfeld_routbam_ss_
-#define f_falloff_ss falloff_ss_
-#endif
-
-extern "C"
-{
-	void f_sommerfeld_rout(int *, double *, double *, double *,
-						   double &, double &, double &, double &, double &, double &, double &, double *,
-						   double *, double *, double *, double *,
-						   int &, int &);
-}
-
-extern "C"
-{
-	void f_sommerfeld_routbam(int *, double *, double *, double *,
-							  double &, double &, double &, double &, double &, double &, double *,
-							  double *, double &, double *, int &);
-}
-
-extern "C"
-{
-	void f_sommerfeld_routbam_ss(int *, double *, double *, double *,
-								 double &, double &, double &, double &, double &, double &, double *,
-								 double *, double &, double *, int &);
-}
-
-extern "C"
-{
-	void f_falloff_ss(int *, double *, double *, double *,
-					  double &, double &, double &, double &, double &, double &, double *,
-					  int &, double *, int &);
-}
-
-#endif /* SOMMERFELD_ROUT_H */
+
+#ifndef SOMMERFELD_ROUT_H
+#define SOMMERFELD_ROUT_H
+
+#ifdef fortran1
+#define f_sommerfeld_rout sommerfeld_rout
+#define f_sommerfeld_routbam sommerfeld_routbam
+#define f_sommerfeld_routbam_ss sommerfeld_routbam_ss
+#define f_falloff_ss falloff_ss
+#endif
+#ifdef fortran2
+#define f_sommerfeld_rout SOMMERFELD_ROUT
+#define f_sommerfeld_rout SOMMERFELD_ROUTBAM
+#define f_sommerfeld_rout_ss SOMMERFELD_ROUTBAM_SS
+#define f_falloff_ss FALLOFF_SS
+#endif
+#ifdef fortran3
+#define f_sommerfeld_rout sommerfeld_rout_
+#define f_sommerfeld_routbam sommerfeld_routbam_
+#define f_sommerfeld_routbam_ss sommerfeld_routbam_ss_
+#define f_falloff_ss falloff_ss_
+#endif
+
+extern "C"
+{
+	void f_sommerfeld_rout(int *, double *, double *, double *,
+						   double &, double &, double &, double &, double &, double &, double &, double *,
+						   double *, double *, double *, double *,
+						   int &, int &);
+}
+
+extern "C"
+{
+	void f_sommerfeld_routbam(int *, double *, double *, double *,
+							  double &, double &, double &, double &, double &, double &, double *,
+							  double *, double &, double *, int &);
+}
+
+extern "C"
+{
+	void f_sommerfeld_routbam_ss(int *, double *, double *, double *,
+								 double &, double &, double &, double &, double &, double &, double *,
+								 double *, double &, double *, int &);
+}
+
+extern "C"
+{
+	void f_falloff_ss(int *, double *, double *, double *,
+					  double &, double &, double &, double &, double &, double &, double *,
+					  int &, double *, int &);
+}
+
+#endif /* SOMMERFELD_ROUT_H */
--- a/AMSS_NCKU_source/BSSN/transpbh.C
+++ b/AMSS_NCKU_source/BSSN/transpbh.C
@@ -1,74 +1,74 @@
-// $Id: transpbh.C,v 1.2 2013/04/19 03:49:25 zjcao Exp $
-#ifdef newc
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <cstdlib>
-#include <cstdio>
-#include <string>
-#include <cmath>
-using namespace std;
-#else
-#include <iostream.h>
-#include <iomanip.h>
-#include <fstream.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#endif
-
-#include "macrodef.h"
-
-// transmit black hole's position from bssn class
-
-int BHN;
-double Mass[3];
-double PBH[9];
-
-void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN)
-{
-  BHN = Mymax(iBHN, rBHN);
-  for (int i = 0; i < iBHN; i++)
-  {
-    for (int j = 0; j < 3; j++)
-      PBH[3 * i + j] = iPBH[i][j];
-    Mass[i] = iMass[i];
-  }
-  if (BHN < rBHN)
-  {
-    if (rBHN > 2)
-      cout << "error in transpbh.C: something wrong." << endl;
-    else
-    {
-      for (int j = 0; j < 3; j++)
-        PBH[3 + j] = -iPBH[0][j];
-
-      Mass[1] = Mass[0];
-    }
-  }
-}
-extern "C"
-{
-
-#ifdef fortran1
-  void getpbh
-#endif
-#ifdef fortran2
-      void GETPBH
-#endif
-#ifdef fortran3
-      void
-      getpbh_
-#endif
-      (int &oBHN, double *oPBH, double *oMass)
-  {
-    oBHN = BHN;
-    for (int i = 0; i < BHN; i++)
-      oMass[i] = Mass[i];
-    for (int i = 0; i < 3 * BHN; i++)
-      oPBH[i] = PBH[i];
-
-    //  printf("have set BH_num = %d\n",oBHN);
-  }
-}
+// $Id: transpbh.C,v 1.2 2013/04/19 03:49:25 zjcao Exp $
+#ifdef newc
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <cstdlib>
+#include <cstdio>
+#include <string>
+#include <cmath>
+using namespace std;
+#else
+#include <iostream.h>
+#include <iomanip.h>
+#include <fstream.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#endif
+
+#include "macrodef.h"
+
+// transmit black hole's position from bssn class
+
+int BHN;
+double Mass[3];
+double PBH[9];
+
+void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN)
+{
+  BHN = Mymax(iBHN, rBHN);
+  for (int i = 0; i < iBHN; i++)
+  {
+    for (int j = 0; j < 3; j++)
+      PBH[3 * i + j] = iPBH[i][j];
+    Mass[i] = iMass[i];
+  }
+  if (BHN < rBHN)
+  {
+    if (rBHN > 2)
+      cout << "error in transpbh.C: something wrong." << endl;
+    else
+    {
+      for (int j = 0; j < 3; j++)
+        PBH[3 + j] = -iPBH[0][j];
+
+      Mass[1] = Mass[0];
+    }
+  }
+}
+extern "C"
+{
+
+#ifdef fortran1
+  void getpbh
+#endif
+#ifdef fortran2
+      void GETPBH
+#endif
+#ifdef fortran3
+      void
+      getpbh_
+#endif
+      (int &oBHN, double *oPBH, double *oMass)
+  {
+    oBHN = BHN;
+    for (int i = 0; i < BHN; i++)
+      oMass[i] = Mass[i];
+    for (int i = 0; i < 3 * BHN; i++)
+      oPBH[i] = PBH[i];
+
+    //  printf("have set BH_num = %d\n",oBHN);
+  }
+}
--- a/AMSS_NCKU_source/BSSN_GPU/bssn_gpu.cu
+++ b/AMSS_NCKU_source/BSSN_GPU/bssn_gpu.cu
--- a/AMSS_NCKU_source/BSSN_GPU/bssn_gpu.h
+++ b/AMSS_NCKU_source/BSSN_GPU/bssn_gpu.h
@@ -1,73 +1,73 @@
-
-#ifndef BSSN_GPU_H_
-#define BSSN_GPU_H_
-#include "bssn_macro.h"
-#include "macrodef.fh"
-
-#define DEVICE_ID 0
-// #define DEVICE_ID_BY_MPI_RANK
-#define GRID_DIM 256
-#define BLOCK_DIM 128
-
-#define _FH2_(i, j, k) fh[(i) + (j) * _1D_SIZE[2] + (k) * _2D_SIZE[2]]
-#define _FH3_(i, j, k) fh[(i) + (j) * _1D_SIZE[3] + (k) * _2D_SIZE[3]]
-#define pow2(x) ((x) * (x))
-#define TimeBetween(a, b) ((b.tv_sec - a.tv_sec) + (b.tv_usec - a.tv_usec) / 1000000.0f)
-#define M_ metac.
-#define Mh_ meta->
-#define Ms_ metassc.
-#define Msh_ metass->
-
-// #define TIMING
-
-#define RHS_SS_PARA int calledby, int mpi_rank, int *ex, double &T, double *crho, double *sigma, double *R, double *X, double *Y, double *Z, double *drhodx, double *drhody, double *drhodz, double *dsigmadx, double *dsigmady, double *dsigmadz, double *dRdx, double *dRdy, double *dRdz, double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, double *chi, double *trK, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Gamx, double *Gamy, double *Gamz, double *Lap, double *betax, double *betay, double *betaz, double *dtSfx, double *dtSfy, double *dtSfz, double *chi_rhs, double *trK_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, int &Symmetry, int &Lev, double &eps, int &sst, int &co
-
-/**  main function */
-int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,
-            double *X, double *Y, double *Z,
-
-            double *chi, double *trK,
-
-            double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
-
-            double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
-
-            double *Gamx, double *Gamy, double *Gamz,
-
-            double *Lap, double *betax, double *betay, double *betaz,
-
-            double *dtSfx, double *dtSfy, double *dtSfz,
-
-            double *chi_rhs, double *trK_rhs,
-
-            double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
-
-            double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
-
-            double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
-
-            double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
-
-            double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
-
-            double *rho, double *Sx, double *Sy, double *Sz, double *Sxx,
-            double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
-
-            double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
-
-            double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
-
-            double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
-
-            double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
-
-            double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
-            double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
-            int &Symmetry, int &Lev, double &eps, int &co);
-
-int gpu_rhs_ss(RHS_SS_PARA);
-
-/** Init GPU side data in GPUMeta. */
-// void init_fluid_meta_gpu(GPUMeta *gpu_meta);
-
-#endif
+
+#ifndef BSSN_GPU_H_
+#define BSSN_GPU_H_
+#include "bssn_macro.h"
+#include "macrodef.fh"
+
+#define DEVICE_ID 0
+// #define DEVICE_ID_BY_MPI_RANK
+#define GRID_DIM 256
+#define BLOCK_DIM 128
+
+#define _FH2_(i, j, k) fh[(i) + (j) * _1D_SIZE[2] + (k) * _2D_SIZE[2]]
+#define _FH3_(i, j, k) fh[(i) + (j) * _1D_SIZE[3] + (k) * _2D_SIZE[3]]
+#define pow2(x) ((x) * (x))
+#define TimeBetween(a, b) ((b.tv_sec - a.tv_sec) + (b.tv_usec - a.tv_usec) / 1000000.0f)
+#define M_ metac.
+#define Mh_ meta->
+#define Ms_ metassc.
+#define Msh_ metass->
+
+// #define TIMING
+
+#define RHS_SS_PARA int calledby, int mpi_rank, int *ex, double &T, double *crho, double *sigma, double *R, double *X, double *Y, double *Z, double *drhodx, double *drhody, double *drhodz, double *dsigmadx, double *dsigmady, double *dsigmadz, double *dRdx, double *dRdy, double *dRdz, double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, double *chi, double *trK, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Gamx, double *Gamy, double *Gamz, double *Lap, double *betax, double *betay, double *betaz, double *dtSfx, double *dtSfy, double *dtSfz, double *chi_rhs, double *trK_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, int &Symmetry, int &Lev, double &eps, int &sst, int &co
+
+/**  main function */
+int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,
+            double *X, double *Y, double *Z,
+
+            double *chi, double *trK,
+
+            double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
+
+            double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
+
+            double *Gamx, double *Gamy, double *Gamz,
+
+            double *Lap, double *betax, double *betay, double *betaz,
+
+            double *dtSfx, double *dtSfy, double *dtSfz,
+
+            double *chi_rhs, double *trK_rhs,
+
+            double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
+
+            double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
+
+            double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
+
+            double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
+
+            double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
+
+            double *rho, double *Sx, double *Sy, double *Sz, double *Sxx,
+            double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
+
+            double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
+
+            double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
+
+            double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
+
+            double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
+
+            double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
+            double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
+            int &Symmetry, int &Lev, double &eps, int &co);
+
+int gpu_rhs_ss(RHS_SS_PARA);
+
+/** Init GPU side data in GPUMeta. */
+// void init_fluid_meta_gpu(GPUMeta *gpu_meta);
+
+#endif
--- a/AMSS_NCKU_source/BSSN_GPU/bssn_gpu_class.C
+++ b/AMSS_NCKU_source/BSSN_GPU/bssn_gpu_class.C
--- a/AMSS_NCKU_source/BSSN_GPU/bssn_gpu_class.h
+++ b/AMSS_NCKU_source/BSSN_GPU/bssn_gpu_class.h
@@ -1,210 +1,210 @@
-
-#ifndef BSSN_GPU_CLASS_H
-#define BSSN_GPU_CLASS_H
-
-#ifdef newc
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <cstdlib>
-#include <string>
-#include <cmath>
-using namespace std;
-#else
-#include <iostream.h>
-#include <iomanip.h>
-#include <fstream.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#endif
-
-#include <mpi.h>
-
-#include "macrodef.h"
-#include "cgh.h"
-#include "ShellPatch.h"
-#include "misc.h"
-#include "var.h"
-#include "MyList.h"
-#include "monitor.h"
-#include "surface_integral.h"
-#include "checkpoint.h"
-
-// added by yangquan
-#include "bssn_macro.h"
-
-extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
-
-class bssn_class
-{
-public:
-       // added by yangquan
-       //----------------------
-       int gpu_num_mynode;
-       int cpu_core_num_mynode;
-       int mpi_process_num_mynode;
-       int my_sequence_mynode;
-       int mynode_id;
-       int use_gpu;
-
-       virtual void Step_GPU(int lev, int YN);
-       virtual void Get_runtime_envirment();
-       // virtual void Step_OPENMP(int lev,int YN);
-       //----------------------
-
-       int ngfs;
-       int nprocs, myrank;
-       cgh *GH;
-       ShellPatch *SH;
-       double PhysTime;
-
-       int checkrun;
-       char checkfilename[50];
-       int Steps;
-       double StartTime, TotalTime;
-       double AnasTime, DumpTime, d2DumpTime, CheckTime;
-       double LastAnas, LastConsOut;
-       double Courant;
-       double numepss, numepsb, numepsh;
-       int Symmetry;
-       int maxl, decn;
-       double maxrex, drex;
-       int trfls, a_lev;
-
-       double dT;
-       double chitiny;
-
-       double **Porg0, **Porgbr, **Porg, **Porg1, **Porg_rhs;
-       int BH_num, BH_num_input;
-       double *Mass, *Pmom, *Spin;
-       double ADMMass;
-
-       var *phio, *trKo;
-       var *gxxo, *gxyo, *gxzo, *gyyo, *gyzo, *gzzo;
-       var *Axxo, *Axyo, *Axzo, *Ayyo, *Ayzo, *Azzo;
-       var *Gmxo, *Gmyo, *Gmzo;
-       var *Lapo, *Sfxo, *Sfyo, *Sfzo;
-       var *dtSfxo, *dtSfyo, *dtSfzo;
-
-       var *phi0, *trK0;
-       var *gxx0, *gxy0, *gxz0, *gyy0, *gyz0, *gzz0;
-       var *Axx0, *Axy0, *Axz0, *Ayy0, *Ayz0, *Azz0;
-       var *Gmx0, *Gmy0, *Gmz0;
-       var *Lap0, *Sfx0, *Sfy0, *Sfz0;
-       var *dtSfx0, *dtSfy0, *dtSfz0;
-
-       var *phi, *trK;
-       var *gxx, *gxy, *gxz, *gyy, *gyz, *gzz;
-       var *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;
-       var *Gmx, *Gmy, *Gmz;
-       var *Lap, *Sfx, *Sfy, *Sfz;
-       var *dtSfx, *dtSfy, *dtSfz;
-
-       var *phi1, *trK1;
-       var *gxx1, *gxy1, *gxz1, *gyy1, *gyz1, *gzz1;
-       var *Axx1, *Axy1, *Axz1, *Ayy1, *Ayz1, *Azz1;
-       var *Gmx1, *Gmy1, *Gmz1;
-       var *Lap1, *Sfx1, *Sfy1, *Sfz1;
-       var *dtSfx1, *dtSfy1, *dtSfz1;
-
-       var *phi_rhs, *trK_rhs;
-       var *gxx_rhs, *gxy_rhs, *gxz_rhs, *gyy_rhs, *gyz_rhs, *gzz_rhs;
-       var *Axx_rhs, *Axy_rhs, *Axz_rhs, *Ayy_rhs, *Ayz_rhs, *Azz_rhs;
-       var *Gmx_rhs, *Gmy_rhs, *Gmz_rhs;
-       var *Lap_rhs, *Sfx_rhs, *Sfy_rhs, *Sfz_rhs;
-       var *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;
-
-       var *rho, *Sx, *Sy, *Sz, *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;
-
-       var *Gamxxx, *Gamxxy, *Gamxxz, *Gamxyy, *Gamxyz, *Gamxzz;
-       var *Gamyxx, *Gamyxy, *Gamyxz, *Gamyyy, *Gamyyz, *Gamyzz;
-       var *Gamzxx, *Gamzxy, *Gamzxz, *Gamzyy, *Gamzyz, *Gamzzz;
-
-       var *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz;
-
-       var *Rpsi4, *Ipsi4;
-       var *t1Rpsi4, *t1Ipsi4, *t2Rpsi4, *t2Ipsi4;
-
-       var *Cons_Ham, *Cons_Px, *Cons_Py, *Cons_Pz, *Cons_Gx, *Cons_Gy, *Cons_Gz;
-
-#ifdef Point_Psi4
-       var *phix, *phiy, *phiz;
-       var *trKx, *trKy, *trKz;
-       var *Axxx, *Axxy, *Axxz;
-       var *Axyx, *Axyy, *Axyz;
-       var *Axzx, *Axzy, *Axzz;
-       var *Ayyx, *Ayyy, *Ayyz;
-       var *Ayzx, *Ayzy, *Ayzz;
-       var *Azzx, *Azzy, *Azzz;
-#endif
-       // FIXME: uc = StateList, up = OldStateList, upp = SynchList_cor; so never touch these three data
-       MyList<var> *StateList, *SynchList_pre, *SynchList_cor, *RHSList;
-       MyList<var> *OldStateList, *DumpList;
-       MyList<var> *ConstraintList;
-
-       monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
-       monitor *ConVMonitor;
-       surface_integral *Waveshell;
-       checkpoint *CheckPoint;
-
-public:
-       bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
-                  int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
-                  int a_levi, int maxli, int decni, double maxrexi, double drexi);
-       ~bssn_class();
-
-       void Evolve(int Steps);
-       void RecursiveStep(int lev);
-#if (PSTR == 1)
-       void ParallelStep();
-       void SHStep();
-#endif
-       void RestrictProlong(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
-       void RestrictProlong_aux(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
-       void RestrictProlong(int lev, int YN, bool BB);
-       void ProlongRestrict(int lev, int YN, bool BB);
-       void Setup_Black_Hole_position();
-       void compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int lev);
-       bool read_Pablo_file(int *ext, double *datain, char *filename);
-       void write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
-                             char *filename);
-       void AnalysisStuff(int lev, double dT_lev);
-       void Setup_KerrSchild();
-       void Enforce_algcon(int lev, int fg);
-
-       void testRestrict();
-       void testOutBd();
-
-       virtual void Setup_Initial_Data_Lousto();
-       virtual void Setup_Initial_Data_Cao();
-       virtual void Initialize();
-       virtual void Read_Ansorg();
-       virtual void Read_Pablo() {};
-       virtual void Compute_Psi4(int lev);
-       virtual void Step(int lev, int YN);
-       virtual void Interp_Constraint(bool infg);
-       virtual void Constraint_Out();
-       virtual void Compute_Constraint();
-
-#ifdef With_AHF
-protected:
-       MyList<var> *AHList, *AHDList, *GaugeList;
-       int AHfindevery;
-       double AHdumptime;
-       int *lastahdumpid, HN_num; // number of possible horizons
-       int *findeveryl;
-       double *xc, *yc, *zc, *xr, *yr, *zr;
-       bool *trigger;
-       double *dTT;
-       int *dumpid;
-
-public:
-       void AH_Prepare_derivatives();
-       bool AH_Interp_Points(MyList<var> *VarList,
-                             int NN, double **XX,
-                             double *Shellf, int Symmetryi);
-       void AH_Step_Find(int lev, double dT_lev);
-#endif
-};
-#endif /* BSSN_GPU_CLASS_H */
+
+#ifndef BSSN_GPU_CLASS_H
+#define BSSN_GPU_CLASS_H
+
+#ifdef newc
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <cstdlib>
+#include <string>
+#include <cmath>
+using namespace std;
+#else
+#include <iostream.h>
+#include <iomanip.h>
+#include <fstream.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#endif
+
+#include <mpi.h>
+
+#include "macrodef.h"
+#include "cgh.h"
+#include "ShellPatch.h"
+#include "misc.h"
+#include "var.h"
+#include "MyList.h"
+#include "monitor.h"
+#include "surface_integral.h"
+#include "checkpoint.h"
+
+// added by yangquan
+#include "bssn_macro.h"
+
+extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
+
+class bssn_class
+{
+public:
+       // added by yangquan
+       //----------------------
+       int gpu_num_mynode;
+       int cpu_core_num_mynode;
+       int mpi_process_num_mynode;
+       int my_sequence_mynode;
+       int mynode_id;
+       int use_gpu;
+
+       virtual void Step_GPU(int lev, int YN);
+       virtual void Get_runtime_envirment();
+       // virtual void Step_OPENMP(int lev,int YN);
+       //----------------------
+
+       int ngfs;
+       int nprocs, myrank;
+       cgh *GH;
+       ShellPatch *SH;
+       double PhysTime;
+
+       int checkrun;
+       char checkfilename[50];
+       int Steps;
+       double StartTime, TotalTime;
+       double AnasTime, DumpTime, d2DumpTime, CheckTime;
+       double LastAnas, LastConsOut;
+       double Courant;
+       double numepss, numepsb, numepsh;
+       int Symmetry;
+       int maxl, decn;
+       double maxrex, drex;
+       int trfls, a_lev;
+
+       double dT;
+       double chitiny;
+
+       double **Porg0, **Porgbr, **Porg, **Porg1, **Porg_rhs;
+       int BH_num, BH_num_input;
+       double *Mass, *Pmom, *Spin;
+       double ADMMass;
+
+       var *phio, *trKo;
+       var *gxxo, *gxyo, *gxzo, *gyyo, *gyzo, *gzzo;
+       var *Axxo, *Axyo, *Axzo, *Ayyo, *Ayzo, *Azzo;
+       var *Gmxo, *Gmyo, *Gmzo;
+       var *Lapo, *Sfxo, *Sfyo, *Sfzo;
+       var *dtSfxo, *dtSfyo, *dtSfzo;
+
+       var *phi0, *trK0;
+       var *gxx0, *gxy0, *gxz0, *gyy0, *gyz0, *gzz0;
+       var *Axx0, *Axy0, *Axz0, *Ayy0, *Ayz0, *Azz0;
+       var *Gmx0, *Gmy0, *Gmz0;
+       var *Lap0, *Sfx0, *Sfy0, *Sfz0;
+       var *dtSfx0, *dtSfy0, *dtSfz0;
+
+       var *phi, *trK;
+       var *gxx, *gxy, *gxz, *gyy, *gyz, *gzz;
+       var *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;
+       var *Gmx, *Gmy, *Gmz;
+       var *Lap, *Sfx, *Sfy, *Sfz;
+       var *dtSfx, *dtSfy, *dtSfz;
+
+       var *phi1, *trK1;
+       var *gxx1, *gxy1, *gxz1, *gyy1, *gyz1, *gzz1;
+       var *Axx1, *Axy1, *Axz1, *Ayy1, *Ayz1, *Azz1;
+       var *Gmx1, *Gmy1, *Gmz1;
+       var *Lap1, *Sfx1, *Sfy1, *Sfz1;
+       var *dtSfx1, *dtSfy1, *dtSfz1;
+
+       var *phi_rhs, *trK_rhs;
+       var *gxx_rhs, *gxy_rhs, *gxz_rhs, *gyy_rhs, *gyz_rhs, *gzz_rhs;
+       var *Axx_rhs, *Axy_rhs, *Axz_rhs, *Ayy_rhs, *Ayz_rhs, *Azz_rhs;
+       var *Gmx_rhs, *Gmy_rhs, *Gmz_rhs;
+       var *Lap_rhs, *Sfx_rhs, *Sfy_rhs, *Sfz_rhs;
+       var *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;
+
+       var *rho, *Sx, *Sy, *Sz, *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;
+
+       var *Gamxxx, *Gamxxy, *Gamxxz, *Gamxyy, *Gamxyz, *Gamxzz;
+       var *Gamyxx, *Gamyxy, *Gamyxz, *Gamyyy, *Gamyyz, *Gamyzz;
+       var *Gamzxx, *Gamzxy, *Gamzxz, *Gamzyy, *Gamzyz, *Gamzzz;
+
+       var *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz;
+
+       var *Rpsi4, *Ipsi4;
+       var *t1Rpsi4, *t1Ipsi4, *t2Rpsi4, *t2Ipsi4;
+
+       var *Cons_Ham, *Cons_Px, *Cons_Py, *Cons_Pz, *Cons_Gx, *Cons_Gy, *Cons_Gz;
+
+#ifdef Point_Psi4
+       var *phix, *phiy, *phiz;
+       var *trKx, *trKy, *trKz;
+       var *Axxx, *Axxy, *Axxz;
+       var *Axyx, *Axyy, *Axyz;
+       var *Axzx, *Axzy, *Axzz;
+       var *Ayyx, *Ayyy, *Ayyz;
+       var *Ayzx, *Ayzy, *Ayzz;
+       var *Azzx, *Azzy, *Azzz;
+#endif
+       // FIXME: uc = StateList, up = OldStateList, upp = SynchList_cor; so never touch these three data
+       MyList<var> *StateList, *SynchList_pre, *SynchList_cor, *RHSList;
+       MyList<var> *OldStateList, *DumpList;
+       MyList<var> *ConstraintList;
+
+       monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
+       monitor *ConVMonitor;
+       surface_integral *Waveshell;
+       checkpoint *CheckPoint;
+
+public:
+       bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
+                  int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
+                  int a_levi, int maxli, int decni, double maxrexi, double drexi);
+       ~bssn_class();
+
+       void Evolve(int Steps);
+       void RecursiveStep(int lev);
+#if (PSTR == 1)
+       void ParallelStep();
+       void SHStep();
+#endif
+       void RestrictProlong(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
+       void RestrictProlong_aux(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
+       void RestrictProlong(int lev, int YN, bool BB);
+       void ProlongRestrict(int lev, int YN, bool BB);
+       void Setup_Black_Hole_position();
+       void compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int lev);
+       bool read_Pablo_file(int *ext, double *datain, char *filename);
+       void write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
+                             char *filename);
+       void AnalysisStuff(int lev, double dT_lev);
+       void Setup_KerrSchild();
+       void Enforce_algcon(int lev, int fg);
+
+       void testRestrict();
+       void testOutBd();
+
+       virtual void Setup_Initial_Data_Lousto();
+       virtual void Setup_Initial_Data_Cao();
+       virtual void Initialize();
+       virtual void Read_Ansorg();
+       virtual void Read_Pablo() {};
+       virtual void Compute_Psi4(int lev);
+       virtual void Step(int lev, int YN);
+       virtual void Interp_Constraint(bool infg);
+       virtual void Constraint_Out();
+       virtual void Compute_Constraint();
+
+#ifdef With_AHF
+protected:
+       MyList<var> *AHList, *AHDList, *GaugeList;
+       int AHfindevery;
+       double AHdumptime;
+       int *lastahdumpid, HN_num; // number of possible horizons
+       int *findeveryl;
+       double *xc, *yc, *zc, *xr, *yr, *zr;
+       bool *trigger;
+       double *dTT;
+       int *dumpid;
+
+public:
+       void AH_Prepare_derivatives();
+       bool AH_Interp_Points(MyList<var> *VarList,
+                             int NN, double **XX,
+                             double *Shellf, int Symmetryi);
+       void AH_Step_Find(int lev, double dT_lev);
+#endif
+};
+#endif /* BSSN_GPU_CLASS_H */
--- a/AMSS_NCKU_source/BSSN_GPU/bssn_gpu_rhs_ss.cu
+++ b/AMSS_NCKU_source/BSSN_GPU/bssn_gpu_rhs_ss.cu
--- a/AMSS_NCKU_source/BSSN_GPU/bssn_macro.C
+++ b/AMSS_NCKU_source/BSSN_GPU/bssn_macro.C
@@ -1,124 +1,124 @@
-#include "bssn_macro.h"
-#include <iostream>
-#include <fstream>
-#include <cstring>
-using namespace std;
-
-int compare_two_file(char *fname1, char *fname2, int data_num)
-{
-	// read file
-	fstream file1(fname1, ios_base::in);
-	fstream file2(fname2, ios_base::in);
-	double *d1, *d2;
-	d1 = (double *)malloc(sizeof(double) * data_num);
-	d2 = (double *)malloc(sizeof(double) * data_num);
-
-	for (int i = 0; i < data_num; ++i)
-	{
-		file1.read((char *)(d1 + i), sizeof(double));
-		file2.read((char *)(d2 + i), sizeof(double));
-	}
-
-	// compare data
-	bool is_match = true;
-	for (int i = 0; i < data_num; ++i)
-	{
-		if (d1[i] != d2[i])
-		{
-			is_match = false;
-			cout << "miss match at position " << i << endl;
-			break;
-		}
-	}
-	if (is_match)
-		cout << "Result is right." << endl;
-
-	free(d1);
-	free(d2);
-	file1.close();
-	file2.close();
-	return 0;
-}
-void printMatrix(int ftag1, int ftag2, double *d1, double *d2, int ord)
-{
-	char fname1[32];
-	char fname2[32];
-	// char ftag1[32]; char ftag2[32];
-	// sprintf(ftag1,"%d",ftag1);
-	strcpy(fname1, "matrix_f.show");
-	// strcat(fname1,ftag1);
-
-	// sprintf(ftag2,"%d",ftag2);
-	strcpy(fname2, "matrix_g.show");
-	// strcat(fname2,ftag2);
-
-	ofstream fout0, fout1, fout2;
-	fout1.open(fname1);
-	fout2.open(fname2);
-
-	for (int k = 0; k < 65; k++)
-	{
-		fout1 << "---------square " << k << " ----------" << endl;
-		fout2 << "---------square " << k << " ----------" << endl;
-		for (int j = 0; j < 67 + ord * 2; j++)
-		{
-			for (int i = 0; i < 67 + ord * 2; i++)
-			{
-				fout1 << d1[i + j * (67 + ord * 2) + k * ((67 + ord * 2) * (67 + ord * 2))] << ' ';
-				fout2 << d2[i + j * (67 + ord * 2) + k * ((67 + ord * 2) * (67 + ord * 2))] << ' ';
-				// fout1<<test_output_g[i+j*(cg->shape[0]) + k*(_2d_size)] <<' ';
-				// fout2<<test_fh_f    [i+j*(cg->shape[0]) + k*(_2d_size)] <<' ';
-			}
-			fout1 << endl;
-			fout2 << endl;
-		}
-	}
-}
-
-int compare_result(int ftag1, double *d2, int data_num)
-{
-	// read file
-	char fname1[32];
-	char ftag[32];
-	// itoa(filetag,ftag,10);
-	sprintf(ftag, "%d", ftag1);
-	strcpy(fname1, "matrix_f.out");
-	strcat(fname1, ftag);
-
-	fstream file1(fname1, ios_base::in);
-	double *d1;
-	d1 = (double *)malloc(sizeof(double) * data_num);
-
-	for (int i = 0; i < data_num; ++i)
-	{
-		file1.read((char *)(d1 + i), sizeof(double));
-	}
-
-	// compare data
-	bool is_match = true;
-	double delta;
-	for (int i = 0; i < data_num; ++i)
-	{
-		delta = d1[i] - d2[i];
-		if (delta < 0)
-			delta = -delta;
-		if (delta > 1e-14)
-		{
-			is_match = false;
-			cout << fname1 << "::miss match at position " << i << endl;
-			break;
-		}
-		// if(i<100 && i>50)
-		//	cout<<d1[i]<<" "<<d2[i]<<endl;
-	}
-	if (is_match)
-		cout << ftag1 << "::matched." << endl;
-
-	if (ftag1 == 0)
-	{
-		printMatrix(1, 2, d1, d2, 3);
-	}
-	free(d1);
-	file1.close();
-	return 0;
-}
+#include "bssn_macro.h"
+#include <iostream>
+#include <fstream>
+#include <cstring>
+using namespace std;
+
+int compare_two_file(char *fname1, char *fname2, int data_num)
+{
+	// read file
+	fstream file1(fname1, ios_base::in);
+	fstream file2(fname2, ios_base::in);
+	double *d1, *d2;
+	d1 = (double *)malloc(sizeof(double) * data_num);
+	d2 = (double *)malloc(sizeof(double) * data_num);
+
+	for (int i = 0; i < data_num; ++i)
+	{
+		file1.read((char *)(d1 + i), sizeof(double));
+		file2.read((char *)(d2 + i), sizeof(double));
+	}
+
+	// compare data
+	bool is_match = true;
+	for (int i = 0; i < data_num; ++i)
+	{
+		if (d1[i] != d2[i])
+		{
+			is_match = false;
+			cout << "miss match at position " << i << endl;
+			break;
+		}
+	}
+	if (is_match)
+		cout << "Result is right." << endl;
+
+	free(d1);
+	free(d2);
+	file1.close();
+	file2.close();
+	return 0;
+}
+void printMatrix(int ftag1, int ftag2, double *d1, double *d2, int ord)
+{
+	char fname1[32];
+	char fname2[32];
+	// char ftag1[32]; char ftag2[32];
+	// sprintf(ftag1,"%d",ftag1);
+	strcpy(fname1, "matrix_f.show");
+	// strcat(fname1,ftag1);
+
+	// sprintf(ftag2,"%d",ftag2);
+	strcpy(fname2, "matrix_g.show");
+	// strcat(fname2,ftag2);
+
+	ofstream fout0, fout1, fout2;
+	fout1.open(fname1);
+	fout2.open(fname2);
+
+	for (int k = 0; k < 65; k++)
+	{
+		fout1 << "---------square " << k << " ----------" << endl;
+		fout2 << "---------square " << k << " ----------" << endl;
+		for (int j = 0; j < 67 + ord * 2; j++)
+		{
+			for (int i = 0; i < 67 + ord * 2; i++)
+			{
+				fout1 << d1[i + j * (67 + ord * 2) + k * ((67 + ord * 2) * (67 + ord * 2))] << ' ';
+				fout2 << d2[i + j * (67 + ord * 2) + k * ((67 + ord * 2) * (67 + ord * 2))] << ' ';
+				// fout1<<test_output_g[i+j*(cg->shape[0]) + k*(_2d_size)] <<' ';
+				// fout2<<test_fh_f    [i+j*(cg->shape[0]) + k*(_2d_size)] <<' ';
+			}
+			fout1 << endl;
+			fout2 << endl;
+		}
+	}
+}
+
+int compare_result(int ftag1, double *d2, int data_num)
+{
+	// read file
+	char fname1[32];
+	char ftag[32];
+	// itoa(filetag,ftag,10);
+	sprintf(ftag, "%d", ftag1);
+	strcpy(fname1, "matrix_f.out");
+	strcat(fname1, ftag);
+
+	fstream file1(fname1, ios_base::in);
+	double *d1;
+	d1 = (double *)malloc(sizeof(double) * data_num);
+
+	for (int i = 0; i < data_num; ++i)
+	{
+		file1.read((char *)(d1 + i), sizeof(double));
+	}
+
+	// compare data
+	bool is_match = true;
+	double delta;
+	for (int i = 0; i < data_num; ++i)
+	{
+		delta = d1[i] - d2[i];
+		if (delta < 0)
+			delta = -delta;
+		if (delta > 1e-14)
+		{
+			is_match = false;
+			cout << fname1 << "::miss match at position " << i << endl;
+			break;
+		}
+		// if(i<100 && i>50)
+		//	cout<<d1[i]<<" "<<d2[i]<<endl;
+	}
+	if (is_match)
+		cout << ftag1 << "::matched." << endl;
+
+	if (ftag1 == 0)
+	{
+		printMatrix(1, 2, d1, d2, 3);
+	}
+	free(d1);
+	file1.close();
+	return 0;
+}
--- a/AMSS_NCKU_source/BSSN_GPU/bssn_macro.h
+++ b/AMSS_NCKU_source/BSSN_GPU/bssn_macro.h
@@ -1,94 +1,94 @@
-#ifndef BSSN_STEP_H
-#define BSSN_STEP_H
-//1---------------------FLAGS---------------------
-
-#define USE_GPU
-#define MAX_GPU_PROCESS_NUM 1
-#define COUNT_CPU_RHS_TIME
-
-
-//2---------------------TIMER---------------------
-//2.1 TIMER_INIT
-//2.2 TIMER_TIC_WITHOUT_OUTPUT
-//2.3 TIMER_TIC(tag,order,label)
-//2.4 TIMER_TIC_TAIL_OF_FUNC(tag,label)
-
-#define TIME_COUNT_EACH_RANK 0
-
-#define TIMER_INIT \
-double clock_prev,clock_curr,step_begin_clock;\
-if(1 == 1){\
-	clock_curr =MPI_Wtime();\
-	step_begin_clock = MPI_Wtime();\
-}else{\
-if(myrank == 0){\
-	clock_curr= MPI_Wtime();\
-	step_begin_clock = MPI_Wtime();\
-}\
-}
-      
-#define TIMER_TIC(tag,order,label) \
-if(TIME_COUNT_EACH_RANK == 1){\
-	  clock_prev= clock_curr;\
-      clock_curr = MPI_Wtime();\
-      cout<<#tag <<order <<":MPI Rank: "<<myrank<<" "<<#label <<" "<<(clock_curr-clock_prev)<<endl;\
-}else{\
-  if(myrank==0){\
-      clock_prev= clock_curr;\
-      clock_curr = MPI_Wtime();\
-      cout<<#tag <<order <<" "<<#label " "<<(clock_curr-clock_prev)<<endl;\
-  }\
-}
-
-#define TIMER_TIC_EACH_PROC(tag,order,label) \
-	  clock_prev= clock_curr;\
-      clock_curr = MPI_Wtime();\
-      cout<<#tag <<order <<":MPI Rank: "<<myrank<<" "<<#label <<" "<<(clock_curr-clock_prev)<<endl;\
-}
-
-#define TIMER_TIC_WITHOUT_OUTPUT \
-if(TIME_COUNT_EACH_RANK == 1){\
-      clock_curr = MPI_Wtime();\
-}else{\
-  if(myrank==0){\
-      clock_curr = MPI_Wtime();\
-  }\
-}
-
-#define TIMER_TIC_TAIL_OF_FUNC(tag,label) \
-if(TIME_COUNT_EACH_RANK == 1){\
-	 cout<<#tag <<"MPI Rank: "<<myrank<<" "<<#label <<" "<<(MPI_Wtime()-step_begin_clock)<<" seconds!"<<endl;\
-}else{\
-     if(myrank==0)\
-     {\
-      cout<<#tag <<#label <<" "<<(MPI_Wtime()-step_begin_clock)<<" seconds!"<<endl;\
-     }\
-}
-
-//3---------------------GPU---------------------
-#define CALLED_BY_STEP 0
-#define CALLED_BY_CONSTRAINT 1
-
-#define RHS_PARA_CALLED_FIRST_TIME cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,pre
-
-#define RHS_PARA_CALLED_THEN cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi->sgfn],cg->fgfs[trK->sgfn],cg->fgfs[gxx->sgfn],cg->fgfs[gxy->sgfn],cg->fgfs[gxz->sgfn],cg->fgfs[gyy->sgfn],cg->fgfs[gyz->sgfn],cg->fgfs[gzz->sgfn],cg->fgfs[Axx->sgfn],cg->fgfs[Axy->sgfn],cg->fgfs[Axz->sgfn],cg->fgfs[Ayy->sgfn],cg->fgfs[Ayz->sgfn],cg->fgfs[Azz->sgfn],cg->fgfs[Gmx->sgfn],cg->fgfs[Gmy->sgfn],cg->fgfs[Gmz->sgfn],cg->fgfs[Lap->sgfn],cg->fgfs[Sfx->sgfn],cg->fgfs[Sfy->sgfn],cg->fgfs[Sfz->sgfn],cg->fgfs[dtSfx->sgfn],cg->fgfs[dtSfy->sgfn],cg->fgfs[dtSfz->sgfn],cg->fgfs[phi1->sgfn],cg->fgfs[trK1->sgfn],cg->fgfs[gxx1->sgfn],cg->fgfs[gxy1->sgfn],cg->fgfs[gxz1->sgfn],cg->fgfs[gyy1->sgfn],cg->fgfs[gyz1->sgfn],cg->fgfs[gzz1->sgfn],cg->fgfs[Axx1->sgfn],cg->fgfs[Axy1->sgfn],cg->fgfs[Axz1->sgfn],cg->fgfs[Ayy1->sgfn],cg->fgfs[Ayz1->sgfn],cg->fgfs[Azz1->sgfn],cg->fgfs[Gmx1->sgfn],cg->fgfs[Gmy1->sgfn],cg->fgfs[Gmz1->sgfn],cg->fgfs[Lap1->sgfn],cg->fgfs[Sfx1->sgfn],cg->fgfs[Sfy1->sgfn],cg->fgfs[Sfz1->sgfn],cg->fgfs[dtSfx1->sgfn],cg->fgfs[dtSfy1->sgfn],cg->fgfs[dtSfz1->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,cor
-
-#define RHS_PARA_CALLED_Constraint_Out cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,pre
-
-
-#define RHS_PARA_CALLED_Interp_Constraint cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,pre
-
-#define RHS_SS_PARA_CALLED_FIRST_TIME cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[fngfs+ShellPatch::gx],cg->fgfs[fngfs+ShellPatch::gy],cg->fgfs[fngfs+ShellPatch::gz],cg->fgfs[fngfs+ShellPatch::drhodx],cg->fgfs[fngfs+ShellPatch::drhody],cg->fgfs[fngfs+ShellPatch::drhodz],cg->fgfs[fngfs+ShellPatch::dsigmadx],cg->fgfs[fngfs+ShellPatch::dsigmady],cg->fgfs[fngfs+ShellPatch::dsigmadz],cg->fgfs[fngfs+ShellPatch::dRdx],cg->fgfs[fngfs+ShellPatch::dRdy],cg->fgfs[fngfs+ShellPatch::dRdz],cg->fgfs[fngfs+ShellPatch::drhodxx],cg->fgfs[fngfs+ShellPatch::drhodxy],cg->fgfs[fngfs+ShellPatch::drhodxz],cg->fgfs[fngfs+ShellPatch::drhodyy],cg->fgfs[fngfs+ShellPatch::drhodyz],cg->fgfs[fngfs+ShellPatch::drhodzz],cg->fgfs[fngfs+ShellPatch::dsigmadxx],cg->fgfs[fngfs+ShellPatch::dsigmadxy],cg->fgfs[fngfs+ShellPatch::dsigmadxz],cg->fgfs[fngfs+ShellPatch::dsigmadyy],cg->fgfs[fngfs+ShellPatch::dsigmadyz],cg->fgfs[fngfs+ShellPatch::dsigmadzz],cg->fgfs[fngfs+ShellPatch::dRdxx],cg->fgfs[fngfs+ShellPatch::dRdxy],cg->fgfs[fngfs+ShellPatch::dRdxz],cg->fgfs[fngfs+ShellPatch::dRdyy],cg->fgfs[fngfs+ShellPatch::dRdyz],cg->fgfs[fngfs+ShellPatch::dRdzz],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,numepsh,sPp->data->sst,pre
-
-#define RHS_SS_PARA_CALLED_THEN cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[fngfs+ShellPatch::gx],cg->fgfs[fngfs+ShellPatch::gy],cg->fgfs[fngfs+ShellPatch::gz],cg->fgfs[fngfs+ShellPatch::drhodx],cg->fgfs[fngfs+ShellPatch::drhody],cg->fgfs[fngfs+ShellPatch::drhodz],cg->fgfs[fngfs+ShellPatch::dsigmadx],cg->fgfs[fngfs+ShellPatch::dsigmady],cg->fgfs[fngfs+ShellPatch::dsigmadz],cg->fgfs[fngfs+ShellPatch::dRdx],cg->fgfs[fngfs+ShellPatch::dRdy],cg->fgfs[fngfs+ShellPatch::dRdz],cg->fgfs[fngfs+ShellPatch::drhodxx],cg->fgfs[fngfs+ShellPatch::drhodxy],cg->fgfs[fngfs+ShellPatch::drhodxz],cg->fgfs[fngfs+ShellPatch::drhodyy],cg->fgfs[fngfs+ShellPatch::drhodyz],cg->fgfs[fngfs+ShellPatch::drhodzz],cg->fgfs[fngfs+ShellPatch::dsigmadxx],cg->fgfs[fngfs+ShellPatch::dsigmadxy],cg->fgfs[fngfs+ShellPatch::dsigmadxz],cg->fgfs[fngfs+ShellPatch::dsigmadyy],cg->fgfs[fngfs+ShellPatch::dsigmadyz],cg->fgfs[fngfs+ShellPatch::dsigmadzz],cg->fgfs[fngfs+ShellPatch::dRdxx],cg->fgfs[fngfs+ShellPatch::dRdxy],cg->fgfs[fngfs+ShellPatch::dRdxz],cg->fgfs[fngfs+ShellPatch::dRdyy],cg->fgfs[fngfs+ShellPatch::dRdyz],cg->fgfs[fngfs+ShellPatch::dRdzz],cg->fgfs[phi->sgfn],cg->fgfs[trK->sgfn],cg->fgfs[gxx->sgfn],cg->fgfs[gxy->sgfn],cg->fgfs[gxz->sgfn],cg->fgfs[gyy->sgfn],cg->fgfs[gyz->sgfn],cg->fgfs[gzz->sgfn],cg->fgfs[Axx->sgfn],cg->fgfs[Axy->sgfn],cg->fgfs[Axz->sgfn],cg->fgfs[Ayy->sgfn],cg->fgfs[Ayz->sgfn],cg->fgfs[Azz->sgfn],cg->fgfs[Gmx->sgfn],cg->fgfs[Gmy->sgfn],cg->fgfs[Gmz->sgfn],cg->fgfs[Lap->sgfn],cg->fgfs[Sfx->sgfn],cg->fgfs[Sfy->sgfn],cg->fgfs[Sfz->sgfn],cg->fgfs[dtSfx->sgfn],cg->fgfs[dtSfy->sgfn],cg->fgfs[dtSfz->sgfn],cg->fgfs[phi1->sgfn],cg->fgfs[trK1->sgfn],cg->fgfs[gxx1->sgfn],cg->fgfs[gxy1->sgfn],cg->fgfs[gxz1->sgfn],cg->fgfs[gyy1->sgfn],cg->fgfs[gyz1->sgfn],cg->fgfs[gzz1->sgfn],cg->fgfs[Axx1->sgfn],cg->fgfs[Axy1->sgfn],cg->fgfs[Axz1->sgfn],cg->fgfs[Ayy1->sgfn],cg->fgfs[Ayz1->sgfn],cg->fgfs[Azz1->sgfn],cg->fgfs[Gmx1->sgfn],cg->fgfs[Gmy1->sgfn],cg->fgfs[Gmz1->sgfn],cg->fgfs[Lap1->sgfn],cg->fgfs[Sfx1->sgfn],cg->fgfs[Sfy1->sgfn],cg->fgfs[Sfz1->sgfn],cg->fgfs[dtSfx1->sgfn],cg->fgfs[dtSfy1->sgfn],cg->fgfs[dtSfz1->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,numepsh,sPp->data->sst,cor
-
-
-#define RHS_PARA_CALLED_Constraint_Out_SS cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[fngfs+ShellPatch::gx],cg->fgfs[fngfs+ShellPatch::gy],cg->fgfs[fngfs+ShellPatch::gz],cg->fgfs[fngfs+ShellPatch::drhodx],cg->fgfs[fngfs+ShellPatch::drhody],cg->fgfs[fngfs+ShellPatch::drhodz],cg->fgfs[fngfs+ShellPatch::dsigmadx],cg->fgfs[fngfs+ShellPatch::dsigmady],cg->fgfs[fngfs+ShellPatch::dsigmadz],cg->fgfs[fngfs+ShellPatch::dRdx],cg->fgfs[fngfs+ShellPatch::dRdy],cg->fgfs[fngfs+ShellPatch::dRdz],cg->fgfs[fngfs+ShellPatch::drhodxx],cg->fgfs[fngfs+ShellPatch::drhodxy],cg->fgfs[fngfs+ShellPatch::drhodxz],cg->fgfs[fngfs+ShellPatch::drhodyy],cg->fgfs[fngfs+ShellPatch::drhodyz],cg->fgfs[fngfs+ShellPatch::drhodzz],cg->fgfs[fngfs+ShellPatch::dsigmadxx],cg->fgfs[fngfs+ShellPatch::dsigmadxy],cg->fgfs[fngfs+ShellPatch::dsigmadxz],cg->fgfs[fngfs+ShellPatch::dsigmadyy],cg->fgfs[fngfs+ShellPatch::dsigmadyz],cg->fgfs[fngfs+ShellPatch::dsigmadzz],cg->fgfs[fngfs+ShellPatch::dRdxx],cg->fgfs[fngfs+ShellPatch::dRdxy],cg->fgfs[fngfs+ShellPatch::dRdxz],cg->fgfs[fngfs+ShellPatch::dRdyy],cg->fgfs[fngfs+ShellPatch::dRdyz],cg->fgfs[fngfs+ShellPatch::dRdzz],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,numepsh,sPp->data->sst,pre
-
-#define RHS_PARA_CALLED_Intrp_Constraint_Out_SS cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[fngfs+ShellPatch::gx],cg->fgfs[fngfs+ShellPatch::gy],cg->fgfs[fngfs+ShellPatch::gz],cg->fgfs[fngfs+ShellPatch::drhodx],cg->fgfs[fngfs+ShellPatch::drhody],cg->fgfs[fngfs+ShellPatch::drhodz],cg->fgfs[fngfs+ShellPatch::dsigmadx],cg->fgfs[fngfs+ShellPatch::dsigmady],cg->fgfs[fngfs+ShellPatch::dsigmadz],cg->fgfs[fngfs+ShellPatch::dRdx],cg->fgfs[fngfs+ShellPatch::dRdy],cg->fgfs[fngfs+ShellPatch::dRdz],cg->fgfs[fngfs+ShellPatch::drhodxx],cg->fgfs[fngfs+ShellPatch::drhodxy],cg->fgfs[fngfs+ShellPatch::drhodxz],cg->fgfs[fngfs+ShellPatch::drhodyy],cg->fgfs[fngfs+ShellPatch::drhodyz],cg->fgfs[fngfs+ShellPatch::drhodzz],cg->fgfs[fngfs+ShellPatch::dsigmadxx],cg->fgfs[fngfs+ShellPatch::dsigmadxy],cg->fgfs[fngfs+ShellPatch::dsigmadxz],cg->fgfs[fngfs+ShellPatch::dsigmadyy],cg->fgfs[fngfs+ShellPatch::dsigmadyz],cg->fgfs[fngfs+ShellPatch::dsigmadzz],cg->fgfs[fngfs+ShellPatch::dRdxx],cg->fgfs[fngfs+ShellPatch::dRdxy],cg->fgfs[fngfs+ShellPatch::dRdxz],cg->fgfs[fngfs+ShellPatch::dRdyy],cg->fgfs[fngfs+ShellPatch::dRdyz],cg->fgfs[fngfs+ShellPatch::dRdzz],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,numepsh,sPp->data->sst,pre
-//4------------tool------------------------------
-int compare_result(int ftag1,double * d2,int data_num);
-
-
-
-#endif
+#ifndef BSSN_STEP_H
+#define BSSN_STEP_H
+//1---------------------FLAGS---------------------
+
+#define USE_GPU
+#define MAX_GPU_PROCESS_NUM 1
+#define COUNT_CPU_RHS_TIME
+
+
+//2---------------------TIMER---------------------
+//2.1 TIMER_INIT
+//2.2 TIMER_TIC_WITHOUT_OUTPUT
+//2.3 TIMER_TIC(tag,order,label)
+//2.4 TIMER_TIC_TAIL_OF_FUNC(tag,label)
+
+#define TIME_COUNT_EACH_RANK 0
+
+#define TIMER_INIT \
+double clock_prev,clock_curr,step_begin_clock;\
+if(1 == 1){\
+	clock_curr =MPI_Wtime();\
+	step_begin_clock = MPI_Wtime();\
+}else{\
+if(myrank == 0){\
+	clock_curr= MPI_Wtime();\
+	step_begin_clock = MPI_Wtime();\
+}\
+}
+      
+#define TIMER_TIC(tag,order,label) \
+if(TIME_COUNT_EACH_RANK == 1){\
+	  clock_prev= clock_curr;\
+      clock_curr = MPI_Wtime();\
+      cout<<#tag <<order <<":MPI Rank: "<<myrank<<" "<<#label <<" "<<(clock_curr-clock_prev)<<endl;\
+}else{\
+  if(myrank==0){\
+      clock_prev= clock_curr;\
+      clock_curr = MPI_Wtime();\
+      cout<<#tag <<order <<" "<<#label " "<<(clock_curr-clock_prev)<<endl;\
+  }\
+}
+
+#define TIMER_TIC_EACH_PROC(tag,order,label) \
+	  clock_prev= clock_curr;\
+      clock_curr = MPI_Wtime();\
+      cout<<#tag <<order <<":MPI Rank: "<<myrank<<" "<<#label <<" "<<(clock_curr-clock_prev)<<endl;\
+}
+
+#define TIMER_TIC_WITHOUT_OUTPUT \
+if(TIME_COUNT_EACH_RANK == 1){\
+      clock_curr = MPI_Wtime();\
+}else{\
+  if(myrank==0){\
+      clock_curr = MPI_Wtime();\
+  }\
+}
+
+#define TIMER_TIC_TAIL_OF_FUNC(tag,label) \
+if(TIME_COUNT_EACH_RANK == 1){\
+	 cout<<#tag <<"MPI Rank: "<<myrank<<" "<<#label <<" "<<(MPI_Wtime()-step_begin_clock)<<" seconds!"<<endl;\
+}else{\
+     if(myrank==0)\
+     {\
+      cout<<#tag <<#label <<" "<<(MPI_Wtime()-step_begin_clock)<<" seconds!"<<endl;\
+     }\
+}
+
+//3---------------------GPU---------------------
+#define CALLED_BY_STEP 0
+#define CALLED_BY_CONSTRAINT 1
+
+#define RHS_PARA_CALLED_FIRST_TIME cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,pre
+
+#define RHS_PARA_CALLED_THEN cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi->sgfn],cg->fgfs[trK->sgfn],cg->fgfs[gxx->sgfn],cg->fgfs[gxy->sgfn],cg->fgfs[gxz->sgfn],cg->fgfs[gyy->sgfn],cg->fgfs[gyz->sgfn],cg->fgfs[gzz->sgfn],cg->fgfs[Axx->sgfn],cg->fgfs[Axy->sgfn],cg->fgfs[Axz->sgfn],cg->fgfs[Ayy->sgfn],cg->fgfs[Ayz->sgfn],cg->fgfs[Azz->sgfn],cg->fgfs[Gmx->sgfn],cg->fgfs[Gmy->sgfn],cg->fgfs[Gmz->sgfn],cg->fgfs[Lap->sgfn],cg->fgfs[Sfx->sgfn],cg->fgfs[Sfy->sgfn],cg->fgfs[Sfz->sgfn],cg->fgfs[dtSfx->sgfn],cg->fgfs[dtSfy->sgfn],cg->fgfs[dtSfz->sgfn],cg->fgfs[phi1->sgfn],cg->fgfs[trK1->sgfn],cg->fgfs[gxx1->sgfn],cg->fgfs[gxy1->sgfn],cg->fgfs[gxz1->sgfn],cg->fgfs[gyy1->sgfn],cg->fgfs[gyz1->sgfn],cg->fgfs[gzz1->sgfn],cg->fgfs[Axx1->sgfn],cg->fgfs[Axy1->sgfn],cg->fgfs[Axz1->sgfn],cg->fgfs[Ayy1->sgfn],cg->fgfs[Ayz1->sgfn],cg->fgfs[Azz1->sgfn],cg->fgfs[Gmx1->sgfn],cg->fgfs[Gmy1->sgfn],cg->fgfs[Gmz1->sgfn],cg->fgfs[Lap1->sgfn],cg->fgfs[Sfx1->sgfn],cg->fgfs[Sfy1->sgfn],cg->fgfs[Sfz1->sgfn],cg->fgfs[dtSfx1->sgfn],cg->fgfs[dtSfy1->sgfn],cg->fgfs[dtSfz1->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,cor
+
+#define RHS_PARA_CALLED_Constraint_Out cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,pre
+
+
+#define RHS_PARA_CALLED_Interp_Constraint cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,pre
+
+#define RHS_SS_PARA_CALLED_FIRST_TIME cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[fngfs+ShellPatch::gx],cg->fgfs[fngfs+ShellPatch::gy],cg->fgfs[fngfs+ShellPatch::gz],cg->fgfs[fngfs+ShellPatch::drhodx],cg->fgfs[fngfs+ShellPatch::drhody],cg->fgfs[fngfs+ShellPatch::drhodz],cg->fgfs[fngfs+ShellPatch::dsigmadx],cg->fgfs[fngfs+ShellPatch::dsigmady],cg->fgfs[fngfs+ShellPatch::dsigmadz],cg->fgfs[fngfs+ShellPatch::dRdx],cg->fgfs[fngfs+ShellPatch::dRdy],cg->fgfs[fngfs+ShellPatch::dRdz],cg->fgfs[fngfs+ShellPatch::drhodxx],cg->fgfs[fngfs+ShellPatch::drhodxy],cg->fgfs[fngfs+ShellPatch::drhodxz],cg->fgfs[fngfs+ShellPatch::drhodyy],cg->fgfs[fngfs+ShellPatch::drhodyz],cg->fgfs[fngfs+ShellPatch::drhodzz],cg->fgfs[fngfs+ShellPatch::dsigmadxx],cg->fgfs[fngfs+ShellPatch::dsigmadxy],cg->fgfs[fngfs+ShellPatch::dsigmadxz],cg->fgfs[fngfs+ShellPatch::dsigmadyy],cg->fgfs[fngfs+ShellPatch::dsigmadyz],cg->fgfs[fngfs+ShellPatch::dsigmadzz],cg->fgfs[fngfs+ShellPatch::dRdxx],cg->fgfs[fngfs+ShellPatch::dRdxy],cg->fgfs[fngfs+ShellPatch::dRdxz],cg->fgfs[fngfs+ShellPatch::dRdyy],cg->fgfs[fngfs+ShellPatch::dRdyz],cg->fgfs[fngfs+ShellPatch::dRdzz],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,numepsh,sPp->data->sst,pre
+
+#define RHS_SS_PARA_CALLED_THEN cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[fngfs+ShellPatch::gx],cg->fgfs[fngfs+ShellPatch::gy],cg->fgfs[fngfs+ShellPatch::gz],cg->fgfs[fngfs+ShellPatch::drhodx],cg->fgfs[fngfs+ShellPatch::drhody],cg->fgfs[fngfs+ShellPatch::drhodz],cg->fgfs[fngfs+ShellPatch::dsigmadx],cg->fgfs[fngfs+ShellPatch::dsigmady],cg->fgfs[fngfs+ShellPatch::dsigmadz],cg->fgfs[fngfs+ShellPatch::dRdx],cg->fgfs[fngfs+ShellPatch::dRdy],cg->fgfs[fngfs+ShellPatch::dRdz],cg->fgfs[fngfs+ShellPatch::drhodxx],cg->fgfs[fngfs+ShellPatch::drhodxy],cg->fgfs[fngfs+ShellPatch::drhodxz],cg->fgfs[fngfs+ShellPatch::drhodyy],cg->fgfs[fngfs+ShellPatch::drhodyz],cg->fgfs[fngfs+ShellPatch::drhodzz],cg->fgfs[fngfs+ShellPatch::dsigmadxx],cg->fgfs[fngfs+ShellPatch::dsigmadxy],cg->fgfs[fngfs+ShellPatch::dsigmadxz],cg->fgfs[fngfs+ShellPatch::dsigmadyy],cg->fgfs[fngfs+ShellPatch::dsigmadyz],cg->fgfs[fngfs+ShellPatch::dsigmadzz],cg->fgfs[fngfs+ShellPatch::dRdxx],cg->fgfs[fngfs+ShellPatch::dRdxy],cg->fgfs[fngfs+ShellPatch::dRdxz],cg->fgfs[fngfs+ShellPatch::dRdyy],cg->fgfs[fngfs+ShellPatch::dRdyz],cg->fgfs[fngfs+ShellPatch::dRdzz],cg->fgfs[phi->sgfn],cg->fgfs[trK->sgfn],cg->fgfs[gxx->sgfn],cg->fgfs[gxy->sgfn],cg->fgfs[gxz->sgfn],cg->fgfs[gyy->sgfn],cg->fgfs[gyz->sgfn],cg->fgfs[gzz->sgfn],cg->fgfs[Axx->sgfn],cg->fgfs[Axy->sgfn],cg->fgfs[Axz->sgfn],cg->fgfs[Ayy->sgfn],cg->fgfs[Ayz->sgfn],cg->fgfs[Azz->sgfn],cg->fgfs[Gmx->sgfn],cg->fgfs[Gmy->sgfn],cg->fgfs[Gmz->sgfn],cg->fgfs[Lap->sgfn],cg->fgfs[Sfx->sgfn],cg->fgfs[Sfy->sgfn],cg->fgfs[Sfz->sgfn],cg->fgfs[dtSfx->sgfn],cg->fgfs[dtSfy->sgfn],cg->fgfs[dtSfz->sgfn],cg->fgfs[phi1->sgfn],cg->fgfs[trK1->sgfn],cg->fgfs[gxx1->sgfn],cg->fgfs[gxy1->sgfn],cg->fgfs[gxz1->sgfn],cg->fgfs[gyy1->sgfn],cg->fgfs[gyz1->sgfn],cg->fgfs[gzz1->sgfn],cg->fgfs[Axx1->sgfn],cg->fgfs[Axy1->sgfn],cg->fgfs[Axz1->sgfn],cg->fgfs[Ayy1->sgfn],cg->fgfs[Ayz1->sgfn],cg->fgfs[Azz1->sgfn],cg->fgfs[Gmx1->sgfn],cg->fgfs[Gmy1->sgfn],cg->fgfs[Gmz1->sgfn],cg->fgfs[Lap1->sgfn],cg->fgfs[Sfx1->sgfn],cg->fgfs[Sfy1->sgfn],cg->fgfs[Sfz1->sgfn],cg->fgfs[dtSfx1->sgfn],cg->fgfs[dtSfy1->sgfn],cg->fgfs[dtSfz1->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,numepsh,sPp->data->sst,cor
+
+
+#define RHS_PARA_CALLED_Constraint_Out_SS cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[fngfs+ShellPatch::gx],cg->fgfs[fngfs+ShellPatch::gy],cg->fgfs[fngfs+ShellPatch::gz],cg->fgfs[fngfs+ShellPatch::drhodx],cg->fgfs[fngfs+ShellPatch::drhody],cg->fgfs[fngfs+ShellPatch::drhodz],cg->fgfs[fngfs+ShellPatch::dsigmadx],cg->fgfs[fngfs+ShellPatch::dsigmady],cg->fgfs[fngfs+ShellPatch::dsigmadz],cg->fgfs[fngfs+ShellPatch::dRdx],cg->fgfs[fngfs+ShellPatch::dRdy],cg->fgfs[fngfs+ShellPatch::dRdz],cg->fgfs[fngfs+ShellPatch::drhodxx],cg->fgfs[fngfs+ShellPatch::drhodxy],cg->fgfs[fngfs+ShellPatch::drhodxz],cg->fgfs[fngfs+ShellPatch::drhodyy],cg->fgfs[fngfs+ShellPatch::drhodyz],cg->fgfs[fngfs+ShellPatch::drhodzz],cg->fgfs[fngfs+ShellPatch::dsigmadxx],cg->fgfs[fngfs+ShellPatch::dsigmadxy],cg->fgfs[fngfs+ShellPatch::dsigmadxz],cg->fgfs[fngfs+ShellPatch::dsigmadyy],cg->fgfs[fngfs+ShellPatch::dsigmadyz],cg->fgfs[fngfs+ShellPatch::dsigmadzz],cg->fgfs[fngfs+ShellPatch::dRdxx],cg->fgfs[fngfs+ShellPatch::dRdxy],cg->fgfs[fngfs+ShellPatch::dRdxz],cg->fgfs[fngfs+ShellPatch::dRdyy],cg->fgfs[fngfs+ShellPatch::dRdyz],cg->fgfs[fngfs+ShellPatch::dRdzz],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,numepsh,sPp->data->sst,pre
+
+#define RHS_PARA_CALLED_Intrp_Constraint_Out_SS cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[fngfs+ShellPatch::gx],cg->fgfs[fngfs+ShellPatch::gy],cg->fgfs[fngfs+ShellPatch::gz],cg->fgfs[fngfs+ShellPatch::drhodx],cg->fgfs[fngfs+ShellPatch::drhody],cg->fgfs[fngfs+ShellPatch::drhodz],cg->fgfs[fngfs+ShellPatch::dsigmadx],cg->fgfs[fngfs+ShellPatch::dsigmady],cg->fgfs[fngfs+ShellPatch::dsigmadz],cg->fgfs[fngfs+ShellPatch::dRdx],cg->fgfs[fngfs+ShellPatch::dRdy],cg->fgfs[fngfs+ShellPatch::dRdz],cg->fgfs[fngfs+ShellPatch::drhodxx],cg->fgfs[fngfs+ShellPatch::drhodxy],cg->fgfs[fngfs+ShellPatch::drhodxz],cg->fgfs[fngfs+ShellPatch::drhodyy],cg->fgfs[fngfs+ShellPatch::drhodyz],cg->fgfs[fngfs+ShellPatch::drhodzz],cg->fgfs[fngfs+ShellPatch::dsigmadxx],cg->fgfs[fngfs+ShellPatch::dsigmadxy],cg->fgfs[fngfs+ShellPatch::dsigmadxz],cg->fgfs[fngfs+ShellPatch::dsigmadyy],cg->fgfs[fngfs+ShellPatch::dsigmadyz],cg->fgfs[fngfs+ShellPatch::dsigmadzz],cg->fgfs[fngfs+ShellPatch::dRdxx],cg->fgfs[fngfs+ShellPatch::dRdxy],cg->fgfs[fngfs+ShellPatch::dRdxz],cg->fgfs[fngfs+ShellPatch::dRdyy],cg->fgfs[fngfs+ShellPatch::dRdyz],cg->fgfs[fngfs+ShellPatch::dRdzz],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,numepsh,sPp->data->sst,pre
+//4------------tool------------------------------
+int compare_result(int ftag1,double * d2,int data_num);
+
+
+
+#endif
--- a/AMSS_NCKU_source/BSSN_GPU/bssn_step_gpu.C
+++ b/AMSS_NCKU_source/BSSN_GPU/bssn_step_gpu.C
--- a/AMSS_NCKU_source/BSSN_GPU/gpu_mem.h
+++ b/AMSS_NCKU_source/BSSN_GPU/gpu_mem.h
@@ -1,146 +1,146 @@
-#ifndef GPU_MEM_H_
-#define GPU_MEM_H_
-#include "macrodef.fh"
-struct Meta
-{
-	//---------------in/out-------------------
-	// int * ex;
-	// int* Symmetry,Lev,co; //not array	//in
-	// double *  T;				//not array	//in
-	double *X, *Y, *Z;									 // in
-	double *chi, *dxx, *dyy, *dzz;						 // inout
-	double *trK;										 // in
-	double *gxy, *gxz, *gyz;							 // in
-	double *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;			 // in
-	double *Gamx, *Gamy, *Gamz;							 // in
-	double *Lap, *betax, *betay, *betaz;				 // inout
-	double *dtSfx, *dtSfy, *dtSfz;						 // in
-	double *chi_rhs, *trK_rhs;							 // out
-	double *gxx_rhs, *gxy_rhs, *gxz_rhs;				 // out
-	double *gyy_rhs, *gyz_rhs, *gzz_rhs;				 // out
-	double *Axx_rhs, *Axy_rhs, *Axz_rhs;				 // out
-	double *Ayy_rhs, *Ayz_rhs, *Azz_rhs;				 // out
-	double *Gamx_rhs, *Gamy_rhs, *Gamz_rhs;				 // out
-	double *Lap_rhs, *betax_rhs, *betay_rhs, *betaz_rhs; // out
-	double *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;			 // out
-	double *rho, *Sx, *Sy, *Sz;							 // in
-	double *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;			 // in
-
-	// when out, physical second kind of connection  //out
-	double *Gamxxx, *Gamxxy, *Gamxxz;
-	double *Gamxyy, *Gamxyz, *Gamxzz;
-	double *Gamyxx, *Gamyxy, *Gamyxz;
-	double *Gamyyy, *Gamyyz, *Gamyzz;
-	double *Gamzxx, *Gamzxy, *Gamzxz;
-	double *Gamzyy, *Gamzyz, *Gamzzz;
-
-	// when out, physical Ricci tensor
-	double *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz; // out
-	// double * eps;						//in
-	double *ham_Res, *movx_Res, *movy_Res, *movz_Res; // inout
-	double *Gmx_Res, *Gmy_Res, *Gmz_Res;			  // inout
-
-	//---------------local-------------------
-
-	double *gxx, *gyy, *gzz, *chix, *chiy, *chiz, *gxxx, *gxyx, *gxzx, *gyyx, *gyzx, *gzzx, *gxxy, *gxyy, *gxzy, *gyyy, *gyzy, *gzzy, *gxxz, *gxyz, *gxzz, *gyyz, *gyzz, *gzzz, *Lapx, *Lapy, *Lapz, *betaxx, *betaxy, *betaxz, *betayx, *betayy, *betayz, *betazx, *betazy, *betazz, *Gamxx, *Gamxy, *Gamxz, *Gamyx, *Gamyy, *Gamyz, *Gamzx, *Gamzy, *Gamzz, *Kx, *Ky, *Kz, *div_beta, *S, *f, *fxx, *fxy, *fxz, *fyy, *fyz, *fzz, *Gamxa, *Gamya, *Gamza, *alpn1, *chin1, *gupxx, *gupxy, *gupxz, *gupyy, *gupyz, *gupzz;
-
-	//---------------subroutine----------------
-	double *fh;
-	double *fh2;
-
-	/*double *SSS;
-	double *AAS;
-	double *ASA;
-	double *SAA;
-	double *ASS;
-	double *SAS;
-	double *SSA;*/
-//---------------GAUGE--------------
-#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
-	double *reta;
-#endif
-};
-
-//------init constant memory---------
-
-// 1-----for compute_rhs_bssn---------
-__constant__ Meta metac;
-__constant__ int ex_c[3];
-__constant__ double T_c;
-__constant__ int Symmetry_c;
-__constant__ int Lev_c;
-__constant__ int co_c;
-__constant__ double eps_c;
-// local
-__constant__ double dX; // dX,dY,dZ
-__constant__ double dY;
-__constant__ double dZ;
-__constant__ double ZEO = 1.0;
-__constant__ double ONE = 1.0;
-__constant__ double TWO = 2.0;
-__constant__ double FOUR = 4.0;
-__constant__ double EIGHT = 8.0;
-__constant__ double HALF = 0.5;
-__constant__ double THR = 3.0;
-__constant__ double SYM = 1.0;
-__constant__ double ANTI = -1.0;
-__constant__ double FF = 0.75;
-__constant__ double eta = 2.0;
-__constant__ double F1o3;
-__constant__ double F2o3;
-__constant__ double F3o2 = 1.5;
-__constant__ double F1o6;
-__constant__ double F8 = 8.0;
-__constant__ double F16 = 16.0;
-__constant__ double PI;
-/*__constant__ double SSS[3] = {1,1,1};
-__constant__ double AAS[3] = {-1,-1,1};
-__constant__ double ASA[3] = {-1,1,-1};
-__constant__ double SAA[3] = {1,-1,-1};
-__constant__ double ASS[3] = {-1,1,1};
-__constant__ double SAS[3] = {1,-1,1};
-__constant__ double SSA[3] = {1,1,-1};*/
-
-// 2--------for fderivs------------
-__constant__ int ijk_min[3];
-__constant__ int ijk_min2[3];
-__constant__ int ijk_min3[3];
-__constant__ int ijk_max[3];
-__constant__ double d12dxyz[3];
-__constant__ double d2dxyz[3];
-
-// 3--------for fdderivs------------
-__constant__ double Sdxdx;
-__constant__ double Sdydy;
-__constant__ double Sdzdz;
-__constant__ double Fdxdx;
-__constant__ double Fdydy;
-__constant__ double Fdzdz;
-__constant__ double Sdxdy;
-__constant__ double Sdxdz;
-__constant__ double Sdydz;
-__constant__ double Fdxdy;
-__constant__ double Fdxdz;
-__constant__ double Fdydz;
-
-// my own
-__constant__ int STEP_SIZE;
-/*__constant__ int MATRIX_SIZE;
-__constant__ int MATRIX_SIZE_FH;
-__constant__ int SQUARE_SIZE;
-__constant__ int SQUARE_SIZE_FH;
-__constant__ int LINE_SIZE_FH;*/
-
-__constant__ int _1D_SIZE[4]; // start from 0 !!
-__constant__ int _2D_SIZE[4]; ////start from 0 !!
-__constant__ int _3D_SIZE[4]; ////start from 0 !!
-
-#if (GAUGE == 6 || GAUGE == 7)
-__constant__ int BHN;
-__constant__ double Porg[9];
-__constant__ double Mass[3];
-__constant__ double /*r1,r2*/, M, A, /*w1,w2 (== 12)*/, C1, C2;
-#endif
-
-/**/
-#endif
+#ifndef GPU_MEM_H_
+#define GPU_MEM_H_
+#include "macrodef.fh"
+struct Meta
+{
+	//---------------in/out-------------------
+	// int * ex;
+	// int* Symmetry,Lev,co; //not array	//in
+	// double *  T;				//not array	//in
+	double *X, *Y, *Z;									 // in
+	double *chi, *dxx, *dyy, *dzz;						 // inout
+	double *trK;										 // in
+	double *gxy, *gxz, *gyz;							 // in
+	double *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;			 // in
+	double *Gamx, *Gamy, *Gamz;							 // in
+	double *Lap, *betax, *betay, *betaz;				 // inout
+	double *dtSfx, *dtSfy, *dtSfz;						 // in
+	double *chi_rhs, *trK_rhs;							 // out
+	double *gxx_rhs, *gxy_rhs, *gxz_rhs;				 // out
+	double *gyy_rhs, *gyz_rhs, *gzz_rhs;				 // out
+	double *Axx_rhs, *Axy_rhs, *Axz_rhs;				 // out
+	double *Ayy_rhs, *Ayz_rhs, *Azz_rhs;				 // out
+	double *Gamx_rhs, *Gamy_rhs, *Gamz_rhs;				 // out
+	double *Lap_rhs, *betax_rhs, *betay_rhs, *betaz_rhs; // out
+	double *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;			 // out
+	double *rho, *Sx, *Sy, *Sz;							 // in
+	double *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;			 // in
+
+	// when out, physical second kind of connection  //out
+	double *Gamxxx, *Gamxxy, *Gamxxz;
+	double *Gamxyy, *Gamxyz, *Gamxzz;
+	double *Gamyxx, *Gamyxy, *Gamyxz;
+	double *Gamyyy, *Gamyyz, *Gamyzz;
+	double *Gamzxx, *Gamzxy, *Gamzxz;
+	double *Gamzyy, *Gamzyz, *Gamzzz;
+
+	// when out, physical Ricci tensor
+	double *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz; // out
+	// double * eps;						//in
+	double *ham_Res, *movx_Res, *movy_Res, *movz_Res; // inout
+	double *Gmx_Res, *Gmy_Res, *Gmz_Res;			  // inout
+
+	//---------------local-------------------
+
+	double *gxx, *gyy, *gzz, *chix, *chiy, *chiz, *gxxx, *gxyx, *gxzx, *gyyx, *gyzx, *gzzx, *gxxy, *gxyy, *gxzy, *gyyy, *gyzy, *gzzy, *gxxz, *gxyz, *gxzz, *gyyz, *gyzz, *gzzz, *Lapx, *Lapy, *Lapz, *betaxx, *betaxy, *betaxz, *betayx, *betayy, *betayz, *betazx, *betazy, *betazz, *Gamxx, *Gamxy, *Gamxz, *Gamyx, *Gamyy, *Gamyz, *Gamzx, *Gamzy, *Gamzz, *Kx, *Ky, *Kz, *div_beta, *S, *f, *fxx, *fxy, *fxz, *fyy, *fyz, *fzz, *Gamxa, *Gamya, *Gamza, *alpn1, *chin1, *gupxx, *gupxy, *gupxz, *gupyy, *gupyz, *gupzz;
+
+	//---------------subroutine----------------
+	double *fh;
+	double *fh2;
+
+	/*double *SSS;
+	double *AAS;
+	double *ASA;
+	double *SAA;
+	double *ASS;
+	double *SAS;
+	double *SSA;*/
+//---------------GAUGE--------------
+#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
+	double *reta;
+#endif
+};
+
+//------init constant memory---------
+
+// 1-----for compute_rhs_bssn---------
+__constant__ Meta metac;
+__constant__ int ex_c[3];
+__constant__ double T_c;
+__constant__ int Symmetry_c;
+__constant__ int Lev_c;
+__constant__ int co_c;
+__constant__ double eps_c;
+// local
+__constant__ double dX; // dX,dY,dZ
+__constant__ double dY;
+__constant__ double dZ;
+__constant__ double ZEO = 1.0;
+__constant__ double ONE = 1.0;
+__constant__ double TWO = 2.0;
+__constant__ double FOUR = 4.0;
+__constant__ double EIGHT = 8.0;
+__constant__ double HALF = 0.5;
+__constant__ double THR = 3.0;
+__constant__ double SYM = 1.0;
+__constant__ double ANTI = -1.0;
+__constant__ double FF = 0.75;
+__constant__ double eta = 2.0;
+__constant__ double F1o3;
+__constant__ double F2o3;
+__constant__ double F3o2 = 1.5;
+__constant__ double F1o6;
+__constant__ double F8 = 8.0;
+__constant__ double F16 = 16.0;
+__constant__ double PI;
+/*__constant__ double SSS[3] = {1,1,1};
+__constant__ double AAS[3] = {-1,-1,1};
+__constant__ double ASA[3] = {-1,1,-1};
+__constant__ double SAA[3] = {1,-1,-1};
+__constant__ double ASS[3] = {-1,1,1};
+__constant__ double SAS[3] = {1,-1,1};
+__constant__ double SSA[3] = {1,1,-1};*/
+
+// 2--------for fderivs------------
+__constant__ int ijk_min[3];
+__constant__ int ijk_min2[3];
+__constant__ int ijk_min3[3];
+__constant__ int ijk_max[3];
+__constant__ double d12dxyz[3];
+__constant__ double d2dxyz[3];
+
+// 3--------for fdderivs------------
+__constant__ double Sdxdx;
+__constant__ double Sdydy;
+__constant__ double Sdzdz;
+__constant__ double Fdxdx;
+__constant__ double Fdydy;
+__constant__ double Fdzdz;
+__constant__ double Sdxdy;
+__constant__ double Sdxdz;
+__constant__ double Sdydz;
+__constant__ double Fdxdy;
+__constant__ double Fdxdz;
+__constant__ double Fdydz;
+
+// my own
+__constant__ int STEP_SIZE;
+/*__constant__ int MATRIX_SIZE;
+__constant__ int MATRIX_SIZE_FH;
+__constant__ int SQUARE_SIZE;
+__constant__ int SQUARE_SIZE_FH;
+__constant__ int LINE_SIZE_FH;*/
+
+__constant__ int _1D_SIZE[4]; // start from 0 !!
+__constant__ int _2D_SIZE[4]; ////start from 0 !!
+__constant__ int _3D_SIZE[4]; ////start from 0 !!
+
+#if (GAUGE == 6 || GAUGE == 7)
+__constant__ int BHN;
+__constant__ double Porg[9];
+__constant__ double Mass[3];
+__constant__ double /*r1,r2*/, M, A, /*w1,w2 (== 12)*/, C1, C2;
+#endif
+
+/**/
+#endif
--- a/AMSS_NCKU_source/BSSN_GPU/gpu_rhsSS_mem.h
+++ b/AMSS_NCKU_source/BSSN_GPU/gpu_rhsSS_mem.h
@@ -1,198 +1,198 @@
-#ifndef GPU_MEM_H_
-#define GPU_MEM_H_
-#include "macrodef.fh"
-
-#ifdef WithShell
-struct Metass
-{
-	double *crho,* sigma,* R,*
-			drhodx,* drhody,* drhodz,* 
-			dsigmadx,* dsigmady,* dsigmadz,* 
-			dRdx,* dRdy,* dRdz,* 
-			drhodxx,* drhodxy,* drhodxz,* 
-			drhodyy,* drhodyz,* drhodzz,* 
-			dsigmadxx,* dsigmadxy,* dsigmadxz,* 
-			dsigmadyy,* dsigmadyz,* dsigmadzz,* 
-			dRdxx,* dRdxy,* dRdxz,* 
-			dRdyy,* dRdyz,* dRdzz;
-	//local
-	double *gx,*gy,*gz,*gxx,*gxy,*gxz,*gyy,*gyz,*gzz;
-};
-
-__constant__ Metass metassc;
-Metass * metass;
-
-#endif //WithShell
-
-struct Meta
-{
-	//SS
-	
-	//---------------in/out-------------------
-	//int * ex;
-	//int* Symmetry,Lev,co; //not array	//in
-	//double *  T;				//not array	//in
-	double * X,*Y,*Z;						//in
-	double * chi,*dxx,*dyy,*dzz;			//inout
-	double * trK		;				//in
-	double * gxy,*gxz,*gyz;				//in
-	double * Axx,*Axy,*Axz,*Ayy,*Ayz,*Azz;	//in
-	double * Gamx,*Gamy,*Gamz			;	//in
-	double * Lap, *betax, *betay, *betaz;	//inout
-	double * dtSfx,  *dtSfy,  *dtSfz	;	//in
-	double * chi_rhs,*trK_rhs		;	//out
-	double * gxx_rhs,*gxy_rhs,*gxz_rhs;	//out
-	double * gyy_rhs,*gyz_rhs,*gzz_rhs;	//out
-	double * Axx_rhs,*Axy_rhs,*Axz_rhs;	//out
-	double * Ayy_rhs,*Ayz_rhs,*Azz_rhs;	//out
-	double * Gamx_rhs,*Gamy_rhs,*Gamz_rhs;//out
-	double * Lap_rhs, *betax_rhs, *betay_rhs, *betaz_rhs;//out
-	double * dtSfx_rhs,*dtSfy_rhs,*dtSfz_rhs;//out
-	double * rho,*Sx,*Sy,*Sz			;	//in
-	double * Sxx,*Sxy,*Sxz,*Syy,*Syz,*Szz;	//in
-	
-	// when out, physical second kind of connection  //out
-	double * Gamxxx, *Gamxxy, *Gamxxz;
-	double * Gamxyy, *Gamxyz, *Gamxzz;
-	double * Gamyxx, *Gamyxy, *Gamyxz;
-	double * Gamyyy, *Gamyyz, *Gamyzz;
-	double * Gamzxx, *Gamzxy,* Gamzxz;
-	double * Gamzyy, *Gamzyz, *Gamzzz;
-	
-	//when out, physical Ricci tensor  
-	double * Rxx,*Rxy,*Rxz,*Ryy,*Ryz,*Rzz;	//out
-	//double * eps;						//in
-	double * ham_Res, *movx_Res, *movy_Res, *movz_Res;	//inout
-	double * Gmx_Res, *Gmy_Res, *Gmz_Res;				//inout
-	
-	
-	//---------------local-------------------
-	
-	double * gxx,*gyy,*gzz
-  , *chix,*chiy,*chiz
-  , *gxxx,*gxyx,*gxzx,*gyyx,*gyzx,*gzzx
-  , *gxxy,*gxyy,*gxzy,*gyyy,*gyzy,*gzzy
-  , *gxxz,*gxyz,*gxzz,*gyyz,*gyzz,*gzzz
-  , *Lapx,*Lapy,*Lapz
-  , *betaxx,*betaxy,*betaxz
-  , *betayx,*betayy,*betayz
-  , *betazx,*betazy,*betazz
-  , *Gamxx,*Gamxy,*Gamxz
-  , *Gamyx,*Gamyy,*Gamyz
-  , *Gamzx,*Gamzy,*Gamzz
-  , *Kx,*Ky,*Kz,*div_beta,*S
-  , *f,*fxx,*fxy,*fxz,*fyy,*fyz,*fzz
-  , *Gamxa,*Gamya,*Gamza,*alpn1,*chin1
-  , *gupxx,*gupxy,*gupxz
-  , *gupyy,*gupyz,*gupzz;
-  
-  //---------------subroutine----------------
-  	double * fh;
-  	double * fh2;
-  	
-	/*double *SSS;
-	double *AAS;
-	double *ASA;
-	double *SAA;
-	double *ASS;
-	double *SAS;
-	double *SSA;*/
-	
-	//---------------GAUGE--------------
-#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
-  double * reta;
-#endif
-  
-};
-
-//------init constant memory---------
-
-//1-----for compute_rhs_bssn---------
-__constant__ Meta metac;
-
-__constant__ int ex_c[3];
-__constant__ double T_c;
-__constant__ int Symmetry_c;
-__constant__ int Lev_c;
-__constant__ int co_c;
-__constant__ double eps_c;
-__constant__ int sst_c;
-//local
-__constant__ double dX; //dX,dY,dZ
-__constant__ double dY;
-__constant__ double dZ;
-__constant__ double ZEO = 1.0;
-__constant__ double ONE = 1.0;
-__constant__ double TWO = 2.0;
-__constant__ double FOUR = 4.0;
-__constant__ double EIGHT = 8.0;
-__constant__ double HALF = 0.5;
-__constant__ double THR = 3.0;
-__constant__ double SYM = 1.0;
-__constant__ double ANTI = -1.0; 
-__constant__ double FF = 0.75;
-__constant__ double eta = 2.0;
-__constant__ double F1o3;
-__constant__ double F2o3;
-__constant__ double F3o2 = 1.5;
-__constant__ double F1o6;
-__constant__ double F8 = 8.0;
-__constant__ double F16 = 16.0;
-__constant__ double PI;
-/*__constant__ double SSS[3] = {1,1,1};
-__constant__ double AAS[3] = {-1,-1,1};
-__constant__ double ASA[3] = {-1,1,-1};
-__constant__ double SAA[3] = {1,-1,-1};
-__constant__ double ASS[3] = {-1,1,1};
-__constant__ double SAS[3] = {1,-1,1};
-__constant__ double SSA[3] = {1,1,-1};*/
-
-//2--------for fderivs------------
-__constant__ int ijk_min[3];
-__constant__ int ijk_min2[3];
-__constant__ int ijk_min3[3];
-__constant__ int ijk_max[3];
-__constant__ int ijk_max3[3];
-__constant__ double d12dxyz[3];
-__constant__ double d2dxyz[3];
-
-//3--------for fdderivs------------
-__constant__ double Sdxdx;
-__constant__ double Sdydy;
-__constant__ double Sdzdz;
-__constant__ double Fdxdx;
-__constant__ double Fdydy;
-__constant__ double Fdzdz;
-__constant__ double Sdxdy;
-__constant__ double Sdxdz;
-__constant__ double Sdydz;
-__constant__ double Fdxdy;
-__constant__ double Fdxdz;
-__constant__ double Fdydz;
-
- 
-//my own
-__constant__ int STEP_SIZE;
-/*__constant__ int MATRIX_SIZE;
-__constant__ int MATRIX_SIZE_FH;
-__constant__ int SQUARE_SIZE;
-__constant__ int SQUARE_SIZE_FH;
-__constant__ int LINE_SIZE_FH;*/
-
-__constant__ int _1D_SIZE[4];   //start from 0 !!
-__constant__ int _2D_SIZE[4];	 ////start from 0 !!
-__constant__ int _3D_SIZE[4];	 ////start from 0 !!
-
-int h_1D_SIZE[4];   //start from 0 !!
-int h_2D_SIZE[4];	 ////start from 0 !!
-int h_3D_SIZE[4];	 ////start from 0 !!
-Meta * meta;
-
-#if (GAUGE == 6 || GAUGE == 7)
-__constant__  int BHN;
-__constant__  double Porg[9];
-__constant__  double Mass[3];
-__constant__  double /*r1,r2*/,M,A,/*w1,w2 (== 12)*/,C1,C2;
-#endif
-/**/
-#endif
+#ifndef GPU_MEM_H_
+#define GPU_MEM_H_
+#include "macrodef.fh"
+
+#ifdef WithShell
+struct Metass
+{
+	double *crho,* sigma,* R,*
+			drhodx,* drhody,* drhodz,* 
+			dsigmadx,* dsigmady,* dsigmadz,* 
+			dRdx,* dRdy,* dRdz,* 
+			drhodxx,* drhodxy,* drhodxz,* 
+			drhodyy,* drhodyz,* drhodzz,* 
+			dsigmadxx,* dsigmadxy,* dsigmadxz,* 
+			dsigmadyy,* dsigmadyz,* dsigmadzz,* 
+			dRdxx,* dRdxy,* dRdxz,* 
+			dRdyy,* dRdyz,* dRdzz;
+	//local
+	double *gx,*gy,*gz,*gxx,*gxy,*gxz,*gyy,*gyz,*gzz;
+};
+
+__constant__ Metass metassc;
+Metass * metass;
+
+#endif //WithShell
+
+struct Meta
+{
+	//SS
+	
+	//---------------in/out-------------------
+	//int * ex;
+	//int* Symmetry,Lev,co; //not array	//in
+	//double *  T;				//not array	//in
+	double * X,*Y,*Z;						//in
+	double * chi,*dxx,*dyy,*dzz;			//inout
+	double * trK		;				//in
+	double * gxy,*gxz,*gyz;				//in
+	double * Axx,*Axy,*Axz,*Ayy,*Ayz,*Azz;	//in
+	double * Gamx,*Gamy,*Gamz			;	//in
+	double * Lap, *betax, *betay, *betaz;	//inout
+	double * dtSfx,  *dtSfy,  *dtSfz	;	//in
+	double * chi_rhs,*trK_rhs		;	//out
+	double * gxx_rhs,*gxy_rhs,*gxz_rhs;	//out
+	double * gyy_rhs,*gyz_rhs,*gzz_rhs;	//out
+	double * Axx_rhs,*Axy_rhs,*Axz_rhs;	//out
+	double * Ayy_rhs,*Ayz_rhs,*Azz_rhs;	//out
+	double * Gamx_rhs,*Gamy_rhs,*Gamz_rhs;//out
+	double * Lap_rhs, *betax_rhs, *betay_rhs, *betaz_rhs;//out
+	double * dtSfx_rhs,*dtSfy_rhs,*dtSfz_rhs;//out
+	double * rho,*Sx,*Sy,*Sz			;	//in
+	double * Sxx,*Sxy,*Sxz,*Syy,*Syz,*Szz;	//in
+	
+	// when out, physical second kind of connection  //out
+	double * Gamxxx, *Gamxxy, *Gamxxz;
+	double * Gamxyy, *Gamxyz, *Gamxzz;
+	double * Gamyxx, *Gamyxy, *Gamyxz;
+	double * Gamyyy, *Gamyyz, *Gamyzz;
+	double * Gamzxx, *Gamzxy,* Gamzxz;
+	double * Gamzyy, *Gamzyz, *Gamzzz;
+	
+	//when out, physical Ricci tensor  
+	double * Rxx,*Rxy,*Rxz,*Ryy,*Ryz,*Rzz;	//out
+	//double * eps;						//in
+	double * ham_Res, *movx_Res, *movy_Res, *movz_Res;	//inout
+	double * Gmx_Res, *Gmy_Res, *Gmz_Res;				//inout
+	
+	
+	//---------------local-------------------
+	
+	double * gxx,*gyy,*gzz
+  , *chix,*chiy,*chiz
+  , *gxxx,*gxyx,*gxzx,*gyyx,*gyzx,*gzzx
+  , *gxxy,*gxyy,*gxzy,*gyyy,*gyzy,*gzzy
+  , *gxxz,*gxyz,*gxzz,*gyyz,*gyzz,*gzzz
+  , *Lapx,*Lapy,*Lapz
+  , *betaxx,*betaxy,*betaxz
+  , *betayx,*betayy,*betayz
+  , *betazx,*betazy,*betazz
+  , *Gamxx,*Gamxy,*Gamxz
+  , *Gamyx,*Gamyy,*Gamyz
+  , *Gamzx,*Gamzy,*Gamzz
+  , *Kx,*Ky,*Kz,*div_beta,*S
+  , *f,*fxx,*fxy,*fxz,*fyy,*fyz,*fzz
+  , *Gamxa,*Gamya,*Gamza,*alpn1,*chin1
+  , *gupxx,*gupxy,*gupxz
+  , *gupyy,*gupyz,*gupzz;
+  
+  //---------------subroutine----------------
+  	double * fh;
+  	double * fh2;
+  	
+	/*double *SSS;
+	double *AAS;
+	double *ASA;
+	double *SAA;
+	double *ASS;
+	double *SAS;
+	double *SSA;*/
+	
+	//---------------GAUGE--------------
+#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
+  double * reta;
+#endif
+  
+};
+
+//------init constant memory---------
+
+//1-----for compute_rhs_bssn---------
+__constant__ Meta metac;
+
+__constant__ int ex_c[3];
+__constant__ double T_c;
+__constant__ int Symmetry_c;
+__constant__ int Lev_c;
+__constant__ int co_c;
+__constant__ double eps_c;
+__constant__ int sst_c;
+//local
+__constant__ double dX; //dX,dY,dZ
+__constant__ double dY;
+__constant__ double dZ;
+__constant__ double ZEO = 1.0;
+__constant__ double ONE = 1.0;
+__constant__ double TWO = 2.0;
+__constant__ double FOUR = 4.0;
+__constant__ double EIGHT = 8.0;
+__constant__ double HALF = 0.5;
+__constant__ double THR = 3.0;
+__constant__ double SYM = 1.0;
+__constant__ double ANTI = -1.0; 
+__constant__ double FF = 0.75;
+__constant__ double eta = 2.0;
+__constant__ double F1o3;
+__constant__ double F2o3;
+__constant__ double F3o2 = 1.5;
+__constant__ double F1o6;
+__constant__ double F8 = 8.0;
+__constant__ double F16 = 16.0;
+__constant__ double PI;
+/*__constant__ double SSS[3] = {1,1,1};
+__constant__ double AAS[3] = {-1,-1,1};
+__constant__ double ASA[3] = {-1,1,-1};
+__constant__ double SAA[3] = {1,-1,-1};
+__constant__ double ASS[3] = {-1,1,1};
+__constant__ double SAS[3] = {1,-1,1};
+__constant__ double SSA[3] = {1,1,-1};*/
+
+//2--------for fderivs------------
+__constant__ int ijk_min[3];
+__constant__ int ijk_min2[3];
+__constant__ int ijk_min3[3];
+__constant__ int ijk_max[3];
+__constant__ int ijk_max3[3];
+__constant__ double d12dxyz[3];
+__constant__ double d2dxyz[3];
+
+//3--------for fdderivs------------
+__constant__ double Sdxdx;
+__constant__ double Sdydy;
+__constant__ double Sdzdz;
+__constant__ double Fdxdx;
+__constant__ double Fdydy;
+__constant__ double Fdzdz;
+__constant__ double Sdxdy;
+__constant__ double Sdxdz;
+__constant__ double Sdydz;
+__constant__ double Fdxdy;
+__constant__ double Fdxdz;
+__constant__ double Fdydz;
+
+ 
+//my own
+__constant__ int STEP_SIZE;
+/*__constant__ int MATRIX_SIZE;
+__constant__ int MATRIX_SIZE_FH;
+__constant__ int SQUARE_SIZE;
+__constant__ int SQUARE_SIZE_FH;
+__constant__ int LINE_SIZE_FH;*/
+
+__constant__ int _1D_SIZE[4];   //start from 0 !!
+__constant__ int _2D_SIZE[4];	 ////start from 0 !!
+__constant__ int _3D_SIZE[4];	 ////start from 0 !!
+
+int h_1D_SIZE[4];   //start from 0 !!
+int h_2D_SIZE[4];	 ////start from 0 !!
+int h_3D_SIZE[4];	 ////start from 0 !!
+Meta * meta;
+
+#if (GAUGE == 6 || GAUGE == 7)
+__constant__  int BHN;
+__constant__  double Porg[9];
+__constant__  double Mass[3];
+__constant__  double /*r1,r2*/,M,A,/*w1,w2 (== 12)*/,C1,C2;
+#endif
+/**/
+#endif
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
ianchb	17109fde9b	[TEST]UPSTREAM: Pick some source changes from `48080d0a97` * Sync new folder structure	2026-04-23 20:55:40 +08:00
ianchb	c185f99ee3	UPSTREAM: Pick source changes from `a5b2dd9e3c` Original message: fix bug	2026-04-23 20:18:44 +08:00
ianchb	4a13a9d37a	UPSTREAM: Pick some source changes from `57ec145e59`	2026-04-23 20:10:12 +08:00
CGH0S7	ac82ebd889	更新精度检查脚本加入图像比对检查	2026-04-15 00:49:46 +08:00
CGH0S7	9c31384b2f	Add optional BSSN kernel profiling switches	2026-04-13 16:51:06 +08:00
CGH0S7	e4e741caa1	Remove dead chi derivative setup in BSSN RHS	2026-04-13 15:55:43 +08:00
CGH0S7	65e0f95f40	Localize chi Ricci intermediates in RHS	2026-04-13 15:14:31 +08:00
CGH0S7	f9fbf97e64	Elide dead stores in BSSN RHS hot path	2026-04-13 15:10:22 +08:00
CGH0S7	968522995b	Add fine-grained step timing and trim BH RHS overhead	2026-04-13 14:50:55 +08:00
CGH0S7	f3988ac8ca	Merge wave and mass extraction interpolation	2026-04-13 13:17:36 +08:00
CGH0S7	e4c25eb21f	Cache wave extraction angular kernels	2026-04-13 12:40:20 +08:00
CGH0S7	4b10519876	Reuse mass integrand across detector radii	2026-04-13 11:55:41 +08:00
CGH0S7	3a58273501	Batch constraint norm reductions	2026-04-13 11:48:02 +08:00
CGH0S7	5c65cea2f0	Optimize constraint refresh after regrid	2026-04-13 11:39:50 +08:00
CGH0S7	8c1f4d8108	迁移C算子的循环融合和临时量消除	2026-03-03 16:20:15 +08:00
CGH0S7	d310ef918b	bssn_rhs(fortran): migrate C kernel loop-fusion optimizations	2026-03-03 16:20:15 +08:00
CGH0S7	b35e1b289f	设置开关关闭内存打印统计	2026-03-03 16:17:47 +08:00
CGH0S7	05851b2c59	关闭静态负载	2026-03-03 16:17:47 +08:00
ianchb	3b39583d67	fix(bssn_rhs)	2026-03-03 16:06:33 +08:00
gh0s7	688bdb6708	Merge pull request 'cjy-dystopia' (#3 ) from cjy-dystopia into main Reviewed-on: https://seele.tail3b303.ts.net:3000/64-BitBrainstorm_2026/AMSS-NCKU/pulls/3	2026-03-02 21:36:26 +08:00
CGH0S7	5070134857	perf(transfer_cached): 将 per-call new/delete 的 req_node/req_is_recv/completed 数组移入 SyncCache 复用避免 transfer_cached 每次调用分配释放 3 个临时数组，减少堆操作开销。 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-02 21:14:35 +08:00
CGH0S7	4012e9d068	perf(RestrictProlong): 用 Restrict_cached/OutBdLow2Hi_cached 替换非缓存版本，Sync_finish 改为渐进式解包 - RestrictProlong/RestrictProlong_aux 中的 Restrict() 和 OutBdLow2Hi() 替换为 _cached 版本，复用 gridseg 列表和 MPI 缓冲区，避免每次调用重新分配 - 新增 sync_cache_restrict/sync_cache_outbd 两组 per-level 缓存 - Sync_finish 从 MPI_Waitall 改为 MPI_Waitsome 渐进式解包，降低尾延迟 - AsyncSyncState 扩展 req_node/req_is_recv/pending_recv 字段支持渐进解包 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-02 20:48:38 +08:00
ianchb	b3c367f15b	prolong3 改为先算实际 stencil 窗口；只有窗口触及对称边界时才走全域 symmetry_bd，否则只复制必需窗口。restrict3 同样改成窗口判定，无触边时仅填 ii/jj/kk 必需窗口。	2026-03-02 17:38:56 +08:00
ianchb	e73911f292	perf(restrict3): shrink X-pass ii sweep to required overlap window - compute fi_min/fi_max from output i-range and derive ii_lo/ii_hi - replace full ii sweep (-1:extf(1)) with windowed sweep in Z/Y precompute passes - keep stencil math unchanged; add bounds sanity check for ii window	2026-03-02 17:37:13 +08:00
ianchb	7543d3e8c7	perf(MPatch): 用空间 bin 索引加速 Interp_Points 的 block 归属查找 - 为 Patch::Interp_Points 三个重载引入 BlockBinIndex（候选筛选 + 全扫回退） - 保持原 point-in-block 判定与后续插值/通信流程不变 - 将逐点线性扫块从 O(N_pointsN_blocks) 降为近似 O(N_pointsk) - 测试：bin 上限如果太大，会引入不必要的索引构建开销。将 bins 上限设为 16。 Co-authored-by: gpt-5.3-codex	2026-03-02 17:37:13 +08:00
ianchb	42c69fab24	refactor(Parallel): streamline MPI communication by consolidating request handling and memory management	2026-03-02 17:37:13 +08:00
CGH0S7	95220a05c8	optimize fdderivs core-region branch elimination for ghost_width=3	2026-03-02 17:33:26 +08:00
CGH0S7	466b084a58	fix prolong/restrict index bounds after cherry-pick `12e1f63`	2026-03-02 13:59:47 +08:00
jaunatisblue	61ccef9f97	prolong3: 减少Z-pass 冗余计算	2026-03-02 13:58:52 +08:00
CGH0S7	e11363e06e	Optimize fdderivs: skip redundant 2nd-order work in 4th-order overlap	2026-03-02 03:21:21 +08:00
jaunatisblue	f70e90f694	prolong3：提升cache命中率	2026-03-02 03:05:35 +08:00
jaunatisblue	75dd5353b0	修改prolong	2026-03-02 02:25:25 +08:00
jaunatisblue	23a82d063b	对prolong3做访存优化	2026-03-02 02:25:25 +08:00
gh0s7	524d1d1512	Merge pull request 'cjy-dystopia' (#2 ) from cjy-dystopia into main Reviewed-on: https://seele.tail3b303.ts.net:3000/64-BitBrainstorm_2026/AMSS-NCKU/pulls/2	2026-03-01 19:22:09 +08:00
CGH0S7	44efb2e08c	预赛最终版本v1.0.0: 确定PGO和原负载均衡方案在当前版本造成负优化已经回退	2026-03-01 18:04:25 +08:00
CGH0S7	16013081e0	Optimize symmetry_bd with stride-based fast paths	2026-03-01 15:50:56 +08:00
CGH0S7	03416a7b28	perf(polint): add uniform-grid fast path for barycentric n=6	2026-03-01 13:26:39 +08:00
CGH0S7	cca3c16c2b	perf(polint): add switchable barycentric ordn=6 path	2026-03-01 13:20:46 +08:00
CGH0S7	e5231849ee	perf(polin3): switch to lagrange-weight tensor contraction	2026-03-01 13:04:33 +08:00
CGH0S7	a766e49ff0	perf(polint): add ordn=6 specialized neville path	2026-03-01 12:39:53 +08:00
CGH0S7	1a518cd3f6	Optimize average2: use DO CONCURRENT loop form	2026-03-01 00:41:32 +08:00
CGH0S7	1dc622e516	Optimize average2: replace array expression with explicit loops	2026-03-01 00:33:01 +08:00
CGH0S7	3046a0ccde	Optimize prolong3: hoist bounds check out of inner loop	2026-03-01 00:17:30 +08:00
CGH0S7	d4ec69c98a	Optimize prolong3: replace parity branches with coefficient lookup	2026-02-28 23:59:57 +08:00
CGH0S7	2c0a3055d4	Optimize prolong3: precompute coarse index/parity maps	2026-02-28 23:53:30 +08:00
CGH0S7	1eba73acbe	先关闭绑核心，发现速度对比：不绑定核心+SCX>绑核心+SCX	2026-02-28 23:27:44 +08:00
CGH0S7	b91cfff301	Add switchable C RK4 kernel and build toggle	2026-02-28 21:12:19 +08:00
CGH0S7	e29ca2dca9	build: switch allocator option to oneTBB tbbmalloc	2026-02-28 17:16:00 +08:00
CGH0S7	6493101ca0	bssn_rhs_c: recompute contracted Gamma terms to remove temp arrays	2026-02-28 16:34:23 +08:00
CGH0S7	169986cde1	bssn_rhs_c: compute div_beta on-the-fly to remove temp array	2026-02-28 16:25:57 +08:00
CGH0S7	1fbc213888	bssn_rhs_c: remove gxx/gyy/gzz temporaries in favor of dxx/dyy/dzz+1	2026-02-28 15:50:52 +08:00
CGH0S7	6024708a48	derivs_c: split low/high stencil regions to reduce branch overhead	2026-02-28 15:42:31 +08:00
CGH0S7	bc457d981e	bssn_rhs_c: merge lopsided+kodis with shared symmetry buffer	2026-02-28 15:23:01 +08:00
CGH0S7	51dead090e	bssn_rhs_c: 融合最终RHS两循环为一循环，用局部变量传递fij中间值 (Modify 6) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-28 13:49:45 +08:00
CGH0S7	34d6922a66	fdderivs_c: 全量清零改为只清零边界面，减少无效内存写入 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-28 13:20:06 +08:00
CGH0S7	8010ad27ed	kodiss_c: 收紧循环范围消除边界无用迭代和分支判断 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-28 13:04:21 +08:00
CGH0S7	38e691f013	bssn_rhs_c: 融合Christoffel修正+trK_rhs两循环为一循环 (Modify 5) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-28 12:57:07 +08:00
CGH0S7	808387aa11	bssn_rhs_c: 融合fxx/Gamxa+Gamma_rhs_part2两循环为一循环 (Modify 4) fxx/fxy/fxz和Gamxa/ya/za保留在局部标量中直接复用于Gamma_rhs part2，减少数组读写 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-28 11:14:35 +08:00
CGH0S7	c2b676abf2	bssn_rhs_c: 融合A^{ij}升指标+Gamma_rhs_part1两循环为一循环 (Modify 3) A^{ij}六分量保留在局部标量中直接复用于Gamma_rhs计算，减少Rxx..Ryz数组的额外读取 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-28 11:02:27 +08:00
CGH0S7	2c60533501	bssn_rhs_c: 融合逆度规+Gamma约束+Christoffel三循环为一循环 (Modify 2) 逆度规计算结果保留在局部标量中直接复用，减少对gupxx..gupzz数组的重复读取，每步加速0.01秒 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-28 10:57:40 +08:00
CGH0S7	318b5254cc	根据组委会邮件要求更新检测脚本，增加对3D向量和三个分量分别检测RMS小于1.0%	2026-02-27 17:38:21 +08:00
CGH0S7	3cee05f262	Merge branch 'cjy-oneapi-opus-hotfix'	2026-02-27 15:13:40 +08:00
CGH0S7	e0b5e012df	引入 PGO 式两遍编译流程，将 Interp_Points 负载均衡优化合法化背景：上一个 commit 中同事实现的热点 block 拆分与 rank 重映射取得了显著加速效果，但其中硬编码了 heavy ranks (27/28/35/36) 和重映射表，属于针对特定测例的优化，违反竞赛规则第 6 条（不允许针对参数或测例的专门优化）。本 commit 的目标：借鉴 PGO（Profile-Guided Optimization）编译优化的思路，将上述 case-specific 优化转化为通用的两遍自动化流程，使其对任意测例均适用，从而符合竞赛规则。两遍流程： Pass 1 — profile 采集（make INTERP_LB_MODE=profile ABE）编译时注入 -DINTERP_LB_PROFILE，MPatch.C 中 Interp_Points 在首次调用时用 MPI_Wtime 计时 + MPI_Gather 汇总各 rank 耗时，识别超过均值 2.5 倍的热点 rank，写入 interp_lb_profile.bin。中间步骤 — 生成编译时头文件 python3 gen_interp_lb_header.py 读取 profile.bin，自动计算拆分策略和重映射表，生成 interp_lb_profile_data.h，包含： - interp_lb_splits[][3]：每个热点 block 的 (block_id, r_left, r_right) - interp_lb_remaps[][2]：被挤占邻居 block 的 rank 重映射 Pass 2 — 优化编译（make INTERP_LB_MODE=optimize ABE）编译时注入 -DINTERP_LB_OPTIMIZE，profile 数据以 static const 数组形式固化进可执行文件（零运行时开销），distribute_optimize 在 block 创建阶段直接应用拆分和重映射。具体改动： - makefile.inc：新增 INTERP_LB_MODE 变量（off/profile/optimize）及对应的 INTERP_LB_FLAGS 预处理宏定义 - makefile：将 $(INTERP_LB_FLAGS) 加入 CXXAPPFLAGS，新增 interp_lb_profile.o 编译目标 - gen_interp_lb_header.py：profile.bin → interp_lb_profile_data.h 的自动转换脚本 - interp_lb_profile_data.h：自动生成的编译时常量头文件 - interp_lb_profile.bin：profile 采集阶段生成的二进制数据 - AMSS_NCKU_Program.py：构建时自动拷贝 profile.bin 到运行目录 - makefile_and_run.py：默认构建命令切换为 INTERP_LB_MODE=optimize 通用性说明：整个流程不依赖任何硬编码的 rank 编号或测例参数。对于不同的网格配置、进程数或物理问题，只需重新执行 Pass 1 采集 profile，即可自动生成对应的优化方案。这与 PGO 编译优化的理念完全一致——先 profile 再优化，是一种通用的性能优化方法论。	2026-02-27 15:10:22 +08:00
jaunatisblue	6b2464b80c	Interp_Points 负载均衡：热点 block 拆分与 rank 重映射问题背景： Patch::Interp_Points 在球面插值时存在严重的 MPI 负载不均衡。通过 MPI_Wtime 计时诊断发现，64 进程中 rank 27/28/35/36 四个进程承担了绝大部分插值计算（耗时为平均值的 2.6~3.3 倍），导致其余 60 个进程在 MPI 集合通信处空等，成为整体性能瓶颈。根因分析：这四个 rank 对应的 block 在物理空间上恰好覆盖了球面提取面（extraction sphere）的密集插值点区域，而 distribute 函数按均匀网格体积分配 block-to-rank，未考虑插值点的空间分布不均。优化方案： 1. 新增 distribute_optimize 函数替代 distribute，使用独立的 current_block_id 计数器（与 rank 分配解耦）遍历所有 block。 2. 热点 block 拆分（splitHotspotBlock）：对 block 27/28/35/36 沿 x 轴在中点处二等分，生成左右两个子 block，分别分配给相邻的两个 rank： - block 27 → (rank 26, rank 27) - block 28 → (rank 28, rank 29) - block 35 → (rank 34, rank 35) - block 36 → (rank 36, rank 37) 子 block 严格复刻原 distribute 的 ghost zone 扩张和物理坐标计算逻辑（支持 Vertex/Cell 两种网格模式）。 3. 邻居 rank 重映射（createMappedBlock）：被占用的邻居 block 需要让出原 rank，重映射到相邻空闲 rank： - block 26 → rank 25 - block 29 → rank 30 - block 34 → rank 33 - block 37 → rank 38 其余 block 保持 block_id == rank 的原始映射。 4. cgh.C 中 compose_cgh 通过预处理宏切换调用 distribute_optimize 或原始 distribute。 5. MPatch.C 中添加 profile 采集插桩：在 Interp_Points 重载 2 中用 MPI_Wtime 计时，MPI_Gather 汇总各 rank 耗时，识别热点 rank 并写入二进制 profile 文件。 6. 新增 interp_lb_profile.h/C：定义 profile 文件格式（magic、 version、nprocs、threshold_ratio、heavy_ranks），提供 write_profile/read_profile/identify_heavy_ranks 接口。数学等价性：拆分和重映射仅改变 block 的几何划分与 rank 归属，不修改任何物理方程、差分格式或插值算法，计算结果严格一致。	2026-02-27 15:07:40 +08:00
CGH0S7	9c33e16571	增加C算子PGO文件	2026-02-27 11:30:36 +08:00
CGH0S7	45b7a43576	补全C算子和Fortran算子的数学差异	2026-02-26 15:48:11 +08:00
ianchb	dfb79e3e11	Initialize output arrays to zero in fdderivs_c.C and fderivs_c.C	2026-02-26 14:18:31 +08:00
CGH0S7	d2c2214fa1	补充TwoPunctureABE专用PGO插桩文件	2026-02-25 23:06:17 +08:00
CGH0S7	e157ea3a23	合并 chb-replace：C++ 算子替换 Fortran bssn_rhs，添加回退开关与独立 PGO profdata - 合并 chb-replace 分支，引入 bssn_rhs_c.C / fderivs_c.C / fdderivs_c.C / kodiss_c.C / lopsided_c.C 五个 C++ 算子实现 - 添加 USE_CXX_KERNELS 开关（默认 1），设为 0 可回退到原始 Fortran bssn_rhs.o - TwoPunctureABE 改用独立的 TwoPunctureABE.profdata 而非 default.profdata Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-25 22:50:46 +08:00
ianchb	f5a63f1e42	Revert "Fix timing: replace clock() with MPI_Wtime() for wall-clock measurement" This reverts commit `09b937c022`.	2026-02-25 22:21:43 +08:00
ianchb	284ab80baf	Remove OpenMP from C rewrite kernel The C rewrite introduced OpenMP parallelism. Remove all OpenMP.	2026-02-25 22:21:20 +08:00
copilot-swe-agent[bot]	09b937c022	Fix timing: replace clock() with MPI_Wtime() for wall-clock measurement clock() measures total CPU time across all threads, not wall-clock time. With the new OpenMP parallel regions in bssn_rhs_c.C, clock() sums CPU time from all OpenMP threads, producing inflated timing that scales with thread count rather than reflecting actual elapsed time. MPI_Wtime() returns wall-clock seconds, giving accurate timing regardless of the number of OpenMP threads running inside the measured interval. Co-authored-by: ianchb <i@4t.pw>	2026-02-25 22:21:19 +08:00
wingrew	8a9c775705	Replace Fortran bssn_rhs with C implementation and add C helper kernels - Modify bssn_rhs_c.C to use existing project headers (macrodef.h, bssn_rhs.h) - Update makefile: remove bssn_rhs.o from F90FILES, add CFILES with OpenMP - Keep Fortran helper files (diff_new.f90, kodiss.f90, lopsidediff.f90) for other Fortran callers [copilot: fix compiling errors & a nan error] Co-authored-by: ianchb <i@4t.pw> Co-authored-by: copilot-swe-agent[bot] <198982749+copilot@users.noreply.github.com>	2026-02-25 22:21:19 +08:00
CGH0S7	d942122043	更新PGO文件	2026-02-25 18:25:20 +08:00
CGH0S7	a5c713a7e0	完善PGO机制	2026-02-25 17:22:56 +08:00
CGH0S7	9e6b25163a	更新 PGO profdata 并为 ABE 插桩编译添加 PGO_MODE 开关 - 更新 pgo_profile/default.profdata 为最新收集的 profile 数据 - 备份旧 profdata 至 default.profdata.backup2 - makefile: 新增 PGO_MODE 开关（默认 opt），支持 make PGO_MODE=instrument 切换到 Phase 1 插桩模式重新收集数据，无需手动修改 flags - makefile: TwoPunctureABE 独立使用 TP_OPTFLAGS，不受 PGO_MODE 影响 - makefile: PROFDATA 路径改为 /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata - makefile.inc: 移除硬编码的编译 flags，改由 makefile 中的 ifeq 逻辑管理 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-25 17:00:55 +08:00
CGH0S7	efc8bf29ea	按需失效同步缓存：Regrid_Onelevel 改为返回 bool 将 cgh::Regrid_Onelevel 的返回类型从 void 改为 bool，在网格真正发生移动时返回 true，否则返回 false。调用方仅在返回 true 时才失效 sync_cache_*，避免了每次 RecursiveStep 结束后无条件失效所有层级缓存的冗余开销。 Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>	2026-02-25 16:00:26 +08:00
CGH0S7	ccf6adaf75	提供正确的macrodef.h避免llm被误导	2026-02-25 11:47:14 +08:00
CGH0S7	e2bc472845	优化绑核逻辑，取消硬编码改为智能识别	2026-02-25 10:59:32 +08:00
CGH0S7	e6329b013d	Merge branch 'cjy-oneapi-opus-hotfix'	2026-02-20 14:18:33 +08:00
ianchb	82339f5282	Merge lopsided advection + kodis dissipation to share symmetry_bd buffer Cherry-picked from `38c2c30`.	2026-02-20 13:36:27 +08:00
ianchb	94f38c57f9	Don't hardcode pgo profile path	2026-02-20 13:36:27 +08:00
CGH0S7	85d1e8de87	Add Intel SIMD vectorization directives to hot-spot functions Apply Intel Advisor optimization recommendations: - Add FORCEINLINE to polint for better inlining - Add SIMD VECTORLENGTHFOR and UNROLL directives to fderivs, fdderivs, symmetry_bd, and kodis functions This improves vectorization efficiency of finite difference computations. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-14 00:43:39 +08:00
gh0s7	2791d2e225	Merge pull request 'PGO updated' (#1 ) from cjy-oneapi-opus-hotfix into main Reviewed-on: #1	2026-02-11 19:17:35 +08:00
CGH0S7	72ce153e48	Merge cjy-oneapi-opus-hotfix into main	2026-02-11 19:15:12 +08:00
CGH0S7	5b7e05cd32	PGO updated	2026-02-11 18:26:30 +08:00
CGH0S7	85afe00fc5	Merge plotting optimizations from chb-copilot-test - Implement multiprocessing-based parallel plotting - Add parallel_plot_helper.py for concurrent plot task execution - Use matplotlib 'Agg' backend for multiprocessing safety - Set OMP_NUM_THREADS=1 to prevent BLAS thread explosion - Use subprocess for binary data plots to avoid thread conflicts - Add fork bomb protection in main program This merge only includes plotting improvements and excludes MPI communication changes to preserve existing optimizations. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>	2026-02-11 16:19:17 +08:00
CGH0S7	5c1790277b	Replace nested OutBdLow2Hi loops with batch calls in RestrictProlong The 8 nested while(Ppc){while(Pp){OutBdLow2Hi(single,single,...)}} loops across RestrictProlong (3 overloads) and ProlongRestrict each produced N_c × N_f separate transfer() → MPI_Waitall barriers. Replace with the existing batch OutBdLow2Hi(MyList<Patch>*,...) which merges all patch pairs into a single transfer() call with 1 MPI_Waitall. Also add Restrict_cached, OutBdLow2Hi_cached, OutBdLow2Himix_cached to Parallel (unused for now — kept as infrastructure for future use). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-11 16:09:08 +08:00
CGH0S7	e09ae438a2	Cache data_packer lengths in Sync_start to skip redundant buffer-size traversals The data_packer(NULL, ...) calls that compute send/recv buffer lengths traverse all grid segments × variables × nprocs on every Sync_start invocation, even though lengths never change once the cache is built. Add a lengths_valid flag to SyncCache so these length computations are done once and reused on subsequent calls (4× per RK4 step). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-02-10 21:39:22 +08:00
CGH0S7	79af79d471	baseline updated	2026-02-05 19:53:55 +08:00