Refactor verification method and optimize numerical kernels with oneMKL BLAS

This commit transitions the verification approach from post-Newtonian theory comparison to regression testing against baseline simulations, and optimizes critical numerical kernels using Intel oneMKL BLAS routines. Verification Changes: - Replace PN theory-based RMS calculation with trajectory-based comparison - Compare optimized results against baseline (GW150914-origin) on XY plane - Compute RMS independently for BH1 and BH2, report maximum as final metric - Update documentation to reflect new regression test methodology Performance Optimizations: - Replace manual vector operations with oneMKL BLAS routines: * norm2() and scalarproduct() now use cblas_dnrm2/cblas_ddot (C++) * L2 norm calculations use DDOT for dot products (Fortran) * Interpolation weighted sums use DDOT (Fortran) - Disable OpenMP threading (switch to sequential MKL) for better performance Build Configuration: - Switch from lmkl_intel_thread to lmkl_sequential - Remove -qopenmp flags from compiler options - Maintain aggressive optimization flags (-O3, -xHost, -fp-model fast=2, -fma) Other Changes: - Update .gitignore for GW150914-origin, docs, and temporary files
2026-01-18 14:25:21 +08:00
parent 3a7bce3af2
commit 9deeda9831
5 changed files with 170 additions and 99 deletions
--- a/AMSS_NCKU_Verify_ASC26.py
+++ b/AMSS_NCKU_Verify_ASC26.py
@@ -1,13 +1,19 @@
 #!/usr/bin/env python3
 """
-AMSS-NCKU GW150914 Simulation Accuracy Verification Script
+AMSS-NCKU GW150914 Simulation Regression Test Script

 Verification Requirements:
-1. RMS error < 1% (Black hole trajectory vs. post-Newtonian theory)
+1. XY-plane trajectory RMS error < 1% (Optimized vs. baseline, max of BH1 and BH2)
 2. ADM constraint violation < 2 (Grid Level 0)

-Usage: python3 verify_accuracy.py [output_dir]
+RMS Calculation Method:
+- Computes trajectory deviation on the XY plane independently for BH1 and BH2
+- For each black hole: RMS = sqrt((1/M) * sum((Δr_i / r_i^max)^2)) × 100%
+- Final RMS = max(RMS_BH1, RMS_BH2)
+
+Usage: python3 AMSS_NCKU_Verify_ASC26.py [output_dir]
 Default: output_dir = GW150914/AMSS_NCKU_output
+Reference: GW150914-origin (baseline simulation)
 """

 import numpy as np
@@ -52,51 +58,77 @@ def load_constraint_data(filepath):
    return np.array(data)


-def calculate_rms_error(bh_data, M1=0.5538461539, M2=0.4461538472):
+def calculate_rms_error(bh_data_ref, bh_data_target):
    """
-    Calculate RMS error
-    Compare numerical orbit with post-Newtonian (PN) theory prediction
+    Calculate trajectory-based RMS error on the XY plane between baseline and optimized simulations.
+
+    This function computes the RMS error independently for BH1 and BH2 trajectories,
+    then returns the maximum of the two as the final RMS error metric.
+
+    For each black hole, the RMS is calculated as:
+        RMS = sqrt( (1/M) * sum( (Δr_i / r_i^max)^2 ) ) × 100%
+
+    where:
+        Δr_i = sqrt((x_ref,i - x_new,i)^2 + (y_ref,i - y_new,i)^2)
+        r_i^max = max(sqrt(x_ref,i^2 + y_ref,i^2), sqrt(x_new,i^2 + y_new,i^2))
+
+    Args:
+        bh_data_ref: Reference (baseline) trajectory data
+        bh_data_target: Target (optimized) trajectory data
+
+    Returns:
+        rms_value: Final RMS error as a percentage (max of BH1 and BH2)
+        error: Error message if any
    """
-    time = bh_data['time']
-    x1, y1, z1 = bh_data['x1'], bh_data['y1'], bh_data['z1']
-    x2, y2, z2 = bh_data['x2'], bh_data['y2'], bh_data['z2']
+    # Align data: truncate to the length of the shorter dataset
+    M = min(len(bh_data_ref['time']), len(bh_data_target['time']))

-    # Calculate separation distance
-    r_num = np.sqrt((x2-x1)**2 + (y2-y1)**2 + (z2-z1)**2)
+    if M < 10:
+        return None, "Insufficient data points for comparison"

-    # Mass parameters
-    M_total = M1 + M2
-    eta = M1 * M2 / M_total**2
+    # Extract XY coordinates for both black holes
+    x1_ref = bh_data_ref['x1'][:M]
+    y1_ref = bh_data_ref['y1'][:M]
+    x2_ref = bh_data_ref['x2'][:M]
+    y2_ref = bh_data_ref['y2'][:M]

-    # Use inspiral phase data (r > 2M)
-    inspiral_mask = r_num > 2.0
-    t_insp = time[inspiral_mask]
-    r_insp = r_num[inspiral_mask]
+    x1_new = bh_data_target['x1'][:M]
+    y1_new = bh_data_target['y1'][:M]
+    x2_new = bh_data_target['x2'][:M]
+    y2_new = bh_data_target['y2'][:M]

-    if len(t_insp) < 10:
-        return None, None, "Insufficient data points"
+    # Calculate RMS for BH1
+    delta_r1 = np.sqrt((x1_ref - x1_new)**2 + (y1_ref - y1_new)**2)
+    r1_ref = np.sqrt(x1_ref**2 + y1_ref**2)
+    r1_new = np.sqrt(x1_new**2 + y1_new**2)
+    r1_max = np.maximum(r1_ref, r1_new)

-    # Post-Newtonian theory prediction
-    r0 = r_insp[0]
-    t0 = t_insp[0]
-    t_coal_PN = (5.0/256.0) * (r0**4) / (eta * M_total**3)
+    # Calculate RMS for BH2
+    delta_r2 = np.sqrt((x2_ref - x2_new)**2 + (y2_ref - y2_new)**2)
+    r2_ref = np.sqrt(x2_ref**2 + y2_ref**2)
+    r2_new = np.sqrt(x2_new**2 + y2_new**2)
+    r2_max = np.maximum(r2_ref, r2_new)

-    # Calculate PN predicted separation distance
-    tau = 1.0 - (t_insp - t0) / t_coal_PN
-    tau = np.maximum(tau, 1e-10)  # Avoid negative values
-    r_PN = r0 * np.power(tau, 0.25)
+    # Avoid division by zero for BH1
+    valid_mask1 = r1_max > 1e-15
+    if np.sum(valid_mask1) < 10:
+        return None, "Insufficient valid data points for BH1"

-    # Use only valid data (t < 0.95 * t_coal)
-    valid_mask = (t_insp - t0) < t_coal_PN * 0.95
-    r_num_valid = r_insp[valid_mask]
-    r_PN_valid = r_PN[valid_mask]
+    terms1 = (delta_r1[valid_mask1] / r1_max[valid_mask1])**2
+    rms_bh1 = np.sqrt(np.mean(terms1)) * 100

-    # Calculate RMS error
-    residual = r_num_valid - r_PN_valid
-    rms_abs = np.sqrt(np.mean(residual**2))
-    rms_rel = rms_abs / np.mean(r_num_valid) * 100
+    # Avoid division by zero for BH2
+    valid_mask2 = r2_max > 1e-15
+    if np.sum(valid_mask2) < 10:
+        return None, "Insufficient valid data points for BH2"

-    return rms_abs, rms_rel, None
+    terms2 = (delta_r2[valid_mask2] / r2_max[valid_mask2])**2
+    rms_bh2 = np.sqrt(np.mean(terms2)) * 100
+
+    # Final RMS is the maximum of BH1 and BH2
+    rms_final = max(rms_bh1, rms_bh2)
+
+    return rms_final, None


 def analyze_constraint_violation(constraint_data, n_levels=9):
@@ -125,13 +157,13 @@ def analyze_constraint_violation(constraint_data, n_levels=9):
 def print_header():
    """Print report header"""
    print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
-    print(Color.BOLD + "   AMSS-NCKU GW150914 Simulation Accuracy Verification Report" + Color.RESET)
+    print(Color.BOLD + "   AMSS-NCKU GW150914 Simulation Regression Test Report" + Color.RESET)
    print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)


-def print_rms_results(rms_abs, rms_rel, error, threshold=1.0):
+def print_rms_results(rms_rel, error, threshold=1.0):
    """Print RMS error results"""
-    print(f"\n{Color.BOLD}1. RMS Error Analysis (Black Hole Trajectory){Color.RESET}")
+    print(f"\n{Color.BOLD}1. RMS Error Analysis (Baseline vs Optimized){Color.RESET}")
    print("-" * 45)

    if error:
@@ -139,8 +171,7 @@ def print_rms_results(rms_abs, rms_rel, error, threshold=1.0):
        return False

    passed = rms_rel < threshold
-    
-    print(f"   RMS absolute error: {rms_abs:.4e} M")
+
    print(f"   RMS relative error: {rms_rel:.4f}%")
    print(f"   Requirement:        < {threshold}%")
    print(f"   Status:             {get_status_text(passed)}")
@@ -190,20 +221,29 @@ def print_summary(rms_passed, constraint_passed):


 def main():
-    # Determine output directory
+    # Determine target (optimized) output directory
    if len(sys.argv) > 1:
-        output_dir = sys.argv[1]
+        target_dir = sys.argv[1]
    else:
        script_dir = os.path.dirname(os.path.abspath(__file__))
-        output_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")
+        target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")
+
+    # Determine reference (baseline) directory
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")

    # Data file paths
-    bh_file = os.path.join(output_dir, "bssn_BH.dat")
-    constraint_file = os.path.join(output_dir, "bssn_constraint.dat")
+    bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
+    bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
+    constraint_file = os.path.join(target_dir, "bssn_constraint.dat")

    # Check if files exist
-    if not os.path.exists(bh_file):
-        print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Black hole trajectory file not found: {bh_file}")
+    if not os.path.exists(bh_file_ref):
+        print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}")
+        sys.exit(1)
+
+    if not os.path.exists(bh_file_target):
+        print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}")
        sys.exit(1)

    if not os.path.exists(constraint_file):
@@ -212,15 +252,17 @@ def main():

    # Print header
    print_header()
-    print(f"\n{Color.BOLD}Target Directory:{Color.RESET} {Color.BLUE}{output_dir}{Color.RESET}")
+    print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
+    print(f"{Color.BOLD}Target (Optimized):  {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")

    # Load data
-    bh_data = load_bh_trajectory(bh_file)
+    bh_data_ref = load_bh_trajectory(bh_file_ref)
+    bh_data_target = load_bh_trajectory(bh_file_target)
    constraint_data = load_constraint_data(constraint_file)

    # Calculate RMS error
-    rms_abs, rms_rel, error = calculate_rms_error(bh_data)
-    rms_passed = print_rms_results(rms_abs, rms_rel, error)
+    rms_rel, error = calculate_rms_error(bh_data_ref, bh_data_target)
+    rms_passed = print_rms_results(rms_rel, error)

    # Analyze constraint violation
    constraint_results = analyze_constraint_violation(constraint_data)