diff --git a/.gitignore b/.gitignore index 1a4ec9b..790612c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ __pycache__ GW150914 +GW150914-origin \ No newline at end of file diff --git a/AMSS_NCKU_ABEtest.py b/AMSS_NCKU_ABEtest.py new file mode 100644 index 0000000..63947ba --- /dev/null +++ b/AMSS_NCKU_ABEtest.py @@ -0,0 +1,445 @@ + +################################################################## +## +## AMSS-NCKU ABE Test Program (Skip TwoPuncture if data exists) +## Modified from AMSS_NCKU_Program.py +## Author: Xiaoqu +## Modified: 2026/02/01 +## +################################################################## + + +################################################################## + +## Print program introduction + +import print_information + +print_information.print_program_introduction() + +################################################################## + +import AMSS_NCKU_Input as input_data + +################################################################## + +## Create directories to store program run data + +import os +import shutil +import sys +import time + +## Set the output directory according to the input file +File_directory = os.path.join(input_data.File_directory) + +## Check if output directory exists and if TwoPuncture data is available +skip_twopuncture = False +output_directory = os.path.join(File_directory, "AMSS_NCKU_output") +binary_results_directory = os.path.join(output_directory, input_data.Output_directory) + +if os.path.exists(File_directory): + print( " Output directory already exists." ) + print() + + # Check if TwoPuncture initial data files exist + if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture"): + twopuncture_output = os.path.join(output_directory, "TwoPunctureABE") + input_par = os.path.join(output_directory, "input.par") + + if os.path.exists(twopuncture_output) and os.path.exists(input_par): + print( " Found existing TwoPuncture initial data." ) + print( " Do you want to skip TwoPuncture phase and reuse existing data?" ) + print( " Input 'skip' to skip TwoPuncture and start ABE directly" ) + print( " Input 'regenerate' to regenerate everything from scratch" ) + print() + + while True: + try: + inputvalue = input() + if ( inputvalue == "skip" ): + print( " Skipping TwoPuncture phase, will reuse existing initial data." ) + print() + skip_twopuncture = True + break + elif ( inputvalue == "regenerate" ): + print( " Regenerating everything from scratch." ) + print() + skip_twopuncture = False + break + else: + print( " Please input 'skip' or 'regenerate'." ) + except ValueError: + print( " Please input 'skip' or 'regenerate'." ) + else: + print( " TwoPuncture initial data not found, will regenerate everything." ) + print() + + # If not skipping, remove and recreate directory + if not skip_twopuncture: + shutil.rmtree(File_directory, ignore_errors=True) + os.mkdir(File_directory) + os.mkdir(output_directory) + os.mkdir(binary_results_directory) + figure_directory = os.path.join(File_directory, "figure") + os.mkdir(figure_directory) + shutil.copy("AMSS_NCKU_Input.py", File_directory) + print( " Output directory has been regenerated." ) + print() +else: + # Create fresh directory structure + os.mkdir(File_directory) + shutil.copy("AMSS_NCKU_Input.py", File_directory) + os.mkdir(output_directory) + os.mkdir(binary_results_directory) + figure_directory = os.path.join(File_directory, "figure") + os.mkdir(figure_directory) + print( " Output directory has been generated." ) + print() + +# Ensure figure directory exists +figure_directory = os.path.join(File_directory, "figure") +if not os.path.exists(figure_directory): + os.mkdir(figure_directory) + +################################################################## + +## Output related parameter information + +import setup + +## Print and save input parameter information +setup.print_input_data( File_directory ) + +if not skip_twopuncture: + setup.generate_AMSSNCKU_input() + +setup.print_puncture_information() + + +################################################################## + +## Generate AMSS-NCKU program input files based on the configured parameters + +if not skip_twopuncture: + print() + print( " Generating the AMSS-NCKU input parfile for the ABE executable." ) + print() + + ## Generate cgh-related input files from the grid information + + import numerical_grid + + numerical_grid.append_AMSSNCKU_cgh_input() + + print() + print( " The input parfile for AMSS-NCKU C++ executable file ABE has been generated." ) + print( " However, the input relevant to TwoPuncture need to be appended later." ) + print() + + +################################################################## + +## Plot the initial grid configuration + +if not skip_twopuncture: + print() + print( " Schematically plot the numerical grid structure." ) + print() + + import numerical_grid + numerical_grid.plot_initial_grid() + + +################################################################## + +## Generate AMSS-NCKU macro files according to the numerical scheme and parameters + +if not skip_twopuncture: + print() + print( " Automatically generating the macro file for AMSS-NCKU C++ executable file ABE " ) + print( " (Based on the finite-difference numerical scheme) " ) + print() + + import generate_macrodef + + generate_macrodef.generate_macrodef_h() + print( " AMSS-NCKU macro file macrodef.h has been generated. " ) + + generate_macrodef.generate_macrodef_fh() + print( " AMSS-NCKU macro file macrodef.fh has been generated. " ) + + +################################################################## + +# Compile the AMSS-NCKU program according to user requirements +# NOTE: ABE compilation is always performed, even when skipping TwoPuncture + +print() +print( " Preparing to compile and run the AMSS-NCKU code as requested " ) +print( " Compiling the AMSS-NCKU code based on the generated macro files " ) +print() + +AMSS_NCKU_source_path = "AMSS_NCKU_source" +AMSS_NCKU_source_copy = os.path.join(File_directory, "AMSS_NCKU_source_copy") + +## If AMSS_NCKU source folder is missing, create it and prompt the user +if not os.path.exists(AMSS_NCKU_source_path): + os.makedirs(AMSS_NCKU_source_path) + print( " The AMSS-NCKU source files are incomplete; copy all source files into ./AMSS_NCKU_source. " ) + print( " Press Enter to continue. " ) + inputvalue = input() + +# Copy AMSS-NCKU source files to prepare for compilation +# If skipping TwoPuncture and source_copy already exists, remove it first +if skip_twopuncture and os.path.exists(AMSS_NCKU_source_copy): + shutil.rmtree(AMSS_NCKU_source_copy) + +shutil.copytree(AMSS_NCKU_source_path, AMSS_NCKU_source_copy) + +# Copy the generated macro files into the AMSS_NCKU source folder +if not skip_twopuncture: + macrodef_h_path = os.path.join(File_directory, "macrodef.h") + macrodef_fh_path = os.path.join(File_directory, "macrodef.fh") +else: + # When skipping TwoPuncture, use existing macro files from previous run + macrodef_h_path = os.path.join(File_directory, "macrodef.h") + macrodef_fh_path = os.path.join(File_directory, "macrodef.fh") + +shutil.copy2(macrodef_h_path, AMSS_NCKU_source_copy) +shutil.copy2(macrodef_fh_path, AMSS_NCKU_source_copy) + +# Compile related programs +import makefile_and_run + +## Change working directory to the target source copy +os.chdir(AMSS_NCKU_source_copy) + +## Build the main AMSS-NCKU executable (ABE or ABEGPU) +makefile_and_run.makefile_ABE() + +## If the initial-data method is Ansorg-TwoPuncture, build the TwoPunctureABE executable +## Only build TwoPunctureABE if not skipping TwoPuncture phase +if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture: + makefile_and_run.makefile_TwoPunctureABE() + +## Change current working directory back up two levels +os.chdir('..') +os.chdir('..') + +print() + +################################################################## + +## Copy the AMSS-NCKU executable (ABE/ABEGPU) to the run directory + +if (input_data.GPU_Calculation == "no"): + ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE") +elif (input_data.GPU_Calculation == "yes"): + ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABEGPU") + +if not os.path.exists( ABE_file ): + print() + print( " Lack of AMSS-NCKU executable file ABE/ABEGPU; recompile AMSS_NCKU_source manually. " ) + print( " When recompilation is finished, press Enter to continue. " ) + inputvalue = input() + +## Copy the executable ABE (or ABEGPU) into the run directory +shutil.copy2(ABE_file, output_directory) + +## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory +## Only copy TwoPunctureABE if not skipping TwoPuncture phase +if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture: + TwoPuncture_file = os.path.join(AMSS_NCKU_source_copy, "TwoPunctureABE") + + if not os.path.exists( TwoPuncture_file ): + print() + print( " Lack of AMSS-NCKU executable file TwoPunctureABE; recompile TwoPunctureABE in AMSS_NCKU_source. " ) + print( " When recompilation is finished, press Enter to continue. " ) + inputvalue = input() + + ## Copy the TwoPunctureABE executable into the run directory + shutil.copy2(TwoPuncture_file, output_directory) + +################################################################## + +## If the initial-data method is TwoPuncture, generate the TwoPuncture input files + +if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture: + + print() + print( " Initial data is chosen as Ansorg-TwoPuncture" ) + print() + + print() + print( " Automatically generating the input parfile for the TwoPunctureABE executable " ) + print() + + import generate_TwoPuncture_input + + generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input() + + print() + print( " The input parfile for the TwoPunctureABE executable has been generated. " ) + print() + + ## Generated AMSS-NCKU TwoPuncture input filename + AMSS_NCKU_TwoPuncture_inputfile = 'AMSS-NCKU-TwoPuncture.input' + AMSS_NCKU_TwoPuncture_inputfile_path = os.path.join( File_directory, AMSS_NCKU_TwoPuncture_inputfile ) + + ## Copy and rename the file + shutil.copy2( AMSS_NCKU_TwoPuncture_inputfile_path, os.path.join(output_directory, 'TwoPunctureinput.par') ) + + ## Run TwoPuncture to generate initial-data files + + start_time = time.time() # Record start time + + print() + print() + + ## Change to the output (run) directory + os.chdir(output_directory) + + ## Run the TwoPuncture executable + import makefile_and_run + makefile_and_run.run_TwoPunctureABE() + + ## Change current working directory back up two levels + os.chdir('..') + os.chdir('..') + +elif (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and skip_twopuncture: + print() + print( " Skipping TwoPuncture execution, using existing initial data." ) + print() + start_time = time.time() # Record start time for ABE only +else: + start_time = time.time() # Record start time + +################################################################## + +## Update puncture data based on TwoPuncture run results + +if not skip_twopuncture: + import renew_puncture_parameter + renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory) + + ## Generated AMSS-NCKU input filename + AMSS_NCKU_inputfile = 'AMSS-NCKU.input' + AMSS_NCKU_inputfile_path = os.path.join(File_directory, AMSS_NCKU_inputfile) + + ## Copy and rename the file + shutil.copy2( AMSS_NCKU_inputfile_path, os.path.join(output_directory, 'input.par') ) + + print() + print( " Successfully copy all AMSS-NCKU input parfile to target dictionary. " ) + print() +else: + print() + print( " Using existing input.par file from previous run." ) + print() + +################################################################## + +## Launch the AMSS-NCKU program + +print() +print() + +## Change to the run directory +os.chdir( output_directory ) + +import makefile_and_run +makefile_and_run.run_ABE() + +## Change current working directory back up two levels +os.chdir('..') +os.chdir('..') + +end_time = time.time() +elapsed_time = end_time - start_time + +################################################################## + +## Copy some basic input and log files out to facilitate debugging + +## Path to the file that stores calculation settings +AMSS_NCKU_error_file_path = os.path.join(binary_results_directory, "setting.par") +## Copy and rename the file for easier inspection +shutil.copy( AMSS_NCKU_error_file_path, os.path.join(output_directory, "AMSSNCKU_setting_parameter") ) + +## Path to the error log file +AMSS_NCKU_error_file_path = os.path.join(binary_results_directory, "Error.log") +## Copy and rename the error log +shutil.copy( AMSS_NCKU_error_file_path, os.path.join(output_directory, "Error.log") ) + +## Primary program outputs +AMSS_NCKU_BH_data = os.path.join(binary_results_directory, "bssn_BH.dat" ) +AMSS_NCKU_ADM_data = os.path.join(binary_results_directory, "bssn_ADMQs.dat" ) +AMSS_NCKU_psi4_data = os.path.join(binary_results_directory, "bssn_psi4.dat" ) +AMSS_NCKU_constraint_data = os.path.join(binary_results_directory, "bssn_constraint.dat") +## copy and rename the file +shutil.copy( AMSS_NCKU_BH_data, os.path.join(output_directory, "bssn_BH.dat" ) ) +shutil.copy( AMSS_NCKU_ADM_data, os.path.join(output_directory, "bssn_ADMQs.dat" ) ) +shutil.copy( AMSS_NCKU_psi4_data, os.path.join(output_directory, "bssn_psi4.dat" ) ) +shutil.copy( AMSS_NCKU_constraint_data, os.path.join(output_directory, "bssn_constraint.dat") ) + +## Additional program outputs +if (input_data.Equation_Class == "BSSN-EM"): + AMSS_NCKU_phi1_data = os.path.join(binary_results_directory, "bssn_phi1.dat" ) + AMSS_NCKU_phi2_data = os.path.join(binary_results_directory, "bssn_phi2.dat" ) + shutil.copy( AMSS_NCKU_phi1_data, os.path.join(output_directory, "bssn_phi1.dat" ) ) + shutil.copy( AMSS_NCKU_phi2_data, os.path.join(output_directory, "bssn_phi2.dat" ) ) +elif (input_data.Equation_Class == "BSSN-EScalar"): + AMSS_NCKU_maxs_data = os.path.join(binary_results_directory, "bssn_maxs.dat" ) + shutil.copy( AMSS_NCKU_maxs_data, os.path.join(output_directory, "bssn_maxs.dat" ) ) + +################################################################## + +## Plot the AMSS-NCKU program results + +print() +print( " Plotting the txt and binary results data from the AMSS-NCKU simulation " ) +print() + + +import plot_xiaoqu +import plot_GW_strain_amplitude_xiaoqu + +## Plot black hole trajectory +plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory ) +plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory ) + +## Plot black hole separation vs. time +plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory ) + +## Plot gravitational waveforms (psi4 and strain amplitude) +for i in range(input_data.Detector_Number): + plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i ) + plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i ) + +## Plot ADM mass evolution +for i in range(input_data.Detector_Number): + plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i ) + +## Plot Hamiltonian constraint violation over time +for i in range(input_data.grid_level): + plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i ) + +## Plot stored binary data +plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory ) + +print() +print( f" This Program Cost = {elapsed_time} Seconds " ) +print() + + +################################################################## + +print() +print( " The AMSS-NCKU-Python simulation is successfully finished, thanks for using !!! " ) +print() + +################################################################## + + diff --git a/AMSS_NCKU_Input.py b/AMSS_NCKU_Input.py index 6bf3589..f288e2a 100755 --- a/AMSS_NCKU_Input.py +++ b/AMSS_NCKU_Input.py @@ -16,7 +16,7 @@ import numpy File_directory = "GW150914" ## output file directory Output_directory = "binary_output" ## binary data file directory ## The file directory name should not be too long -MPI_processes = 8 ## number of mpi processes used in the simulation +MPI_processes = 48 ## number of mpi processes used in the simulation GPU_Calculation = "no" ## Use GPU or not ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) diff --git a/AMSS_NCKU_Verify_ASC26.py b/AMSS_NCKU_Verify_ASC26.py new file mode 100644 index 0000000..4601c22 --- /dev/null +++ b/AMSS_NCKU_Verify_ASC26.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +""" +AMSS-NCKU GW150914 Simulation Regression Test Script + +Verification Requirements: +1. XY-plane trajectory RMS error < 1% (Optimized vs. baseline, max of BH1 and BH2) +2. ADM constraint violation < 2 (Grid Level 0) + +RMS Calculation Method: +- Computes trajectory deviation on the XY plane independently for BH1 and BH2 +- For each black hole: RMS = sqrt((1/M) * sum((Δr_i / r_i^max)^2)) × 100% +- Final RMS = max(RMS_BH1, RMS_BH2) + +Usage: python3 AMSS_NCKU_Verify_ASC26.py [output_dir] +Default: output_dir = GW150914/AMSS_NCKU_output +Reference: GW150914-origin (baseline simulation) +""" + +import numpy as np +import sys +import os + +# ANSI Color Codes +class Color: + GREEN = '\033[92m' + RED = '\033[91m' + YELLOW = '\033[93m' + BLUE = '\033[94m' + BOLD = '\033[1m' + RESET = '\033[0m' + +def get_status_text(passed): + if passed: + return f"{Color.GREEN}{Color.BOLD}PASS{Color.RESET}" + else: + return f"{Color.RED}{Color.BOLD}FAIL{Color.RESET}" + +def load_bh_trajectory(filepath): + """Load black hole trajectory data""" + data = np.loadtxt(filepath) + return { + 'time': data[:, 0], + 'x1': data[:, 1], 'y1': data[:, 2], 'z1': data[:, 3], + 'x2': data[:, 4], 'y2': data[:, 5], 'z2': data[:, 6] + } + + +def load_constraint_data(filepath): + """Load constraint violation data""" + data = [] + with open(filepath, 'r') as f: + for line in f: + if line.startswith('#'): + continue + parts = line.split() + if len(parts) >= 8: + data.append([float(x) for x in parts[:8]]) + return np.array(data) + + +def calculate_rms_error(bh_data_ref, bh_data_target): + """ + Calculate trajectory-based RMS error on the XY plane between baseline and optimized simulations. + + This function computes the RMS error independently for BH1 and BH2 trajectories, + then returns the maximum of the two as the final RMS error metric. + + For each black hole, the RMS is calculated as: + RMS = sqrt( (1/M) * sum( (Δr_i / r_i^max)^2 ) ) × 100% + + where: + Δr_i = sqrt((x_ref,i - x_new,i)^2 + (y_ref,i - y_new,i)^2) + r_i^max = max(sqrt(x_ref,i^2 + y_ref,i^2), sqrt(x_new,i^2 + y_new,i^2)) + + Args: + bh_data_ref: Reference (baseline) trajectory data + bh_data_target: Target (optimized) trajectory data + + Returns: + rms_value: Final RMS error as a percentage (max of BH1 and BH2) + error: Error message if any + """ + # Align data: truncate to the length of the shorter dataset + M = min(len(bh_data_ref['time']), len(bh_data_target['time'])) + + if M < 10: + return None, "Insufficient data points for comparison" + + # Extract XY coordinates for both black holes + x1_ref = bh_data_ref['x1'][:M] + y1_ref = bh_data_ref['y1'][:M] + x2_ref = bh_data_ref['x2'][:M] + y2_ref = bh_data_ref['y2'][:M] + + x1_new = bh_data_target['x1'][:M] + y1_new = bh_data_target['y1'][:M] + x2_new = bh_data_target['x2'][:M] + y2_new = bh_data_target['y2'][:M] + + # Calculate RMS for BH1 + delta_r1 = np.sqrt((x1_ref - x1_new)**2 + (y1_ref - y1_new)**2) + r1_ref = np.sqrt(x1_ref**2 + y1_ref**2) + r1_new = np.sqrt(x1_new**2 + y1_new**2) + r1_max = np.maximum(r1_ref, r1_new) + + # Calculate RMS for BH2 + delta_r2 = np.sqrt((x2_ref - x2_new)**2 + (y2_ref - y2_new)**2) + r2_ref = np.sqrt(x2_ref**2 + y2_ref**2) + r2_new = np.sqrt(x2_new**2 + y2_new**2) + r2_max = np.maximum(r2_ref, r2_new) + + # Avoid division by zero for BH1 + valid_mask1 = r1_max > 1e-15 + if np.sum(valid_mask1) < 10: + return None, "Insufficient valid data points for BH1" + + terms1 = (delta_r1[valid_mask1] / r1_max[valid_mask1])**2 + rms_bh1 = np.sqrt(np.mean(terms1)) * 100 + + # Avoid division by zero for BH2 + valid_mask2 = r2_max > 1e-15 + if np.sum(valid_mask2) < 10: + return None, "Insufficient valid data points for BH2" + + terms2 = (delta_r2[valid_mask2] / r2_max[valid_mask2])**2 + rms_bh2 = np.sqrt(np.mean(terms2)) * 100 + + # Final RMS is the maximum of BH1 and BH2 + rms_final = max(rms_bh1, rms_bh2) + + return rms_final, None + + +def analyze_constraint_violation(constraint_data, n_levels=9): + """ + Analyze ADM constraint violation + Return maximum constraint violation for Grid Level 0 + """ + # Extract Grid Level 0 data (first entry for each time step) + level0_data = constraint_data[::n_levels] + + # Calculate maximum absolute value for each constraint + results = { + 'Ham': np.max(np.abs(level0_data[:, 1])), + 'Px': np.max(np.abs(level0_data[:, 2])), + 'Py': np.max(np.abs(level0_data[:, 3])), + 'Pz': np.max(np.abs(level0_data[:, 4])), + 'Gx': np.max(np.abs(level0_data[:, 5])), + 'Gy': np.max(np.abs(level0_data[:, 6])), + 'Gz': np.max(np.abs(level0_data[:, 7])) + } + + results['max_violation'] = max(results.values()) + return results + + +def print_header(): + """Print report header""" + print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) + print(Color.BOLD + " AMSS-NCKU GW150914 Simulation Regression Test Report" + Color.RESET) + print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) + + +def print_rms_results(rms_rel, error, threshold=1.0): + """Print RMS error results""" + print(f"\n{Color.BOLD}1. RMS Error Analysis (Baseline vs Optimized){Color.RESET}") + print("-" * 45) + + if error: + print(f" {Color.RED}Error: {error}{Color.RESET}") + return False + + passed = rms_rel < threshold + + print(f" RMS relative error: {rms_rel:.4f}%") + print(f" Requirement: < {threshold}%") + print(f" Status: {get_status_text(passed)}") + + return passed + + +def print_constraint_results(results, threshold=2.0): + """Print constraint violation results""" + print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}") + print("-" * 45) + + names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz'] + for i, name in enumerate(names): + print(f" Max |{name:3}|: {results[name]:.6f}", end=" ") + if (i + 1) % 2 == 0: print() + if len(names) % 2 != 0: print() + + passed = results['max_violation'] < threshold + + print(f"\n Maximum violation: {results['max_violation']:.6f}") + print(f" Requirement: < {threshold}") + print(f" Status: {get_status_text(passed)}") + + return passed + + +def print_summary(rms_passed, constraint_passed): + """Print summary""" + print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) + print(Color.BOLD + "Verification Summary" + Color.RESET) + print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) + + all_passed = rms_passed and constraint_passed + + res_rms = get_status_text(rms_passed) + res_con = get_status_text(constraint_passed) + + print(f" [1] RMS trajectory check: {res_rms}") + print(f" [2] ADM constraint check: {res_con}") + + final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}" + print(f"\n Overall result: {final_status}") + print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET + "\n") + + return all_passed + + +def main(): + # Determine target (optimized) output directory + if len(sys.argv) > 1: + target_dir = sys.argv[1] + else: + script_dir = os.path.dirname(os.path.abspath(__file__)) + target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output") + + # Determine reference (baseline) directory + script_dir = os.path.dirname(os.path.abspath(__file__)) + reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output") + + # Data file paths + bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat") + bh_file_target = os.path.join(target_dir, "bssn_BH.dat") + constraint_file = os.path.join(target_dir, "bssn_constraint.dat") + + # Check if files exist + if not os.path.exists(bh_file_ref): + print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}") + sys.exit(1) + + if not os.path.exists(bh_file_target): + print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}") + sys.exit(1) + + if not os.path.exists(constraint_file): + print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}") + sys.exit(1) + + # Print header + print_header() + print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}") + print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}") + + # Load data + bh_data_ref = load_bh_trajectory(bh_file_ref) + bh_data_target = load_bh_trajectory(bh_file_target) + constraint_data = load_constraint_data(constraint_file) + + # Calculate RMS error + rms_rel, error = calculate_rms_error(bh_data_ref, bh_data_target) + rms_passed = print_rms_results(rms_rel, error) + + # Analyze constraint violation + constraint_results = analyze_constraint_violation(constraint_data) + constraint_passed = print_constraint_results(constraint_results) + + # Print summary + all_passed = print_summary(rms_passed, constraint_passed) + + # Return exit code + sys.exit(0 if all_passed else 1) + + +if __name__ == "__main__": + main() + diff --git a/AMSS_NCKU_source/fmisc.f90 b/AMSS_NCKU_source/fmisc.f90 index 81c5a62..d63dd9e 100644 --- a/AMSS_NCKU_source/fmisc.f90 +++ b/AMSS_NCKU_source/fmisc.f90 @@ -1117,146 +1117,137 @@ end subroutine d2dump !------------------------------------------------------------------------------ ! Lagrangian polynomial interpolation !------------------------------------------------------------------------------ - - subroutine polint(xa,ya,x,y,dy,ordn) - + subroutine polint(xa, ya, x, y, dy, ordn) implicit none -!~~~~~~> Input Parameter: - integer,intent(in) :: ordn - real*8, dimension(ordn), intent(in) :: xa,ya + integer, intent(in) :: ordn + real*8, dimension(ordn), intent(in) :: xa, ya real*8, intent(in) :: x - real*8, intent(out) :: y,dy + real*8, intent(out) :: y, dy -!~~~~~~> Other parameter: + integer :: i, m, ns, n_m + real*8, dimension(ordn) :: c, d, ho + real*8 :: dif, dift, hp, h, den_val - integer :: m,n,ns - real*8, dimension(ordn) :: c,d,den,ho - real*8 :: dif,dift - -!~~~~~~> - - n=ordn - m=ordn - - c=ya - d=ya - ho=xa-x - - ns=1 - dif=abs(x-xa(1)) - do m=1,n - dift=abs(x-xa(m)) - if(dift < dif) then - ns=m - dif=dift - end if + ! Initialization + c = ya + d = ya + ho = xa - x + + ns = 1 + dif = abs(x - xa(1)) + + ! Find the index of the closest table entry + do i = 2, ordn + dift = abs(x - xa(i)) + if (dift < dif) then + ns = i + dif = dift + end if end do - y=ya(ns) - ns=ns-1 - do m=1,n-1 - den(1:n-m)=ho(1:n-m)-ho(1+m:n) - if (any(den(1:n-m) == 0.0))then - write(*,*) 'failure in polint for point',x - write(*,*) 'with input points: ',xa - stop - endif - den(1:n-m)=(c(2:n-m+1)-d(1:n-m))/den(1:n-m) - d(1:n-m)=ho(1+m:n)*den(1:n-m) - c(1:n-m)=ho(1:n-m)*den(1:n-m) - if (2*ns < n-m) then - dy=c(ns+1) + y = ya(ns) + ns = ns - 1 + + ! Main Neville's algorithm loop + do m = 1, ordn - 1 + n_m = ordn - m + do i = 1, n_m + hp = ho(i) + h = ho(i+m) + den_val = hp - h + + ! Check for division by zero locally + if (den_val == 0.0d0) then + write(*,*) 'failure in polint for point',x + write(*,*) 'with input points: ',xa + stop + end if + + ! Reuse den_val to avoid redundant divisions + den_val = (c(i+1) - d(i)) / den_val + + ! Update c and d in place + d(i) = h * den_val + c(i) = hp * den_val + end do + + ! Decide which path (up or down the tableau) to take + if (2 * ns < n_m) then + dy = c(ns + 1) else - dy=d(ns) - ns=ns-1 + dy = d(ns) + ns = ns - 1 end if - y=y+dy + y = y + dy end do return - end subroutine polint !------------------------------------------------------------------------------ ! ! interpolation in 2 dimensions, follow yx order ! !------------------------------------------------------------------------------ - subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn) +subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn) + implicit none + integer,intent(in) :: ordn + real*8, dimension(ordn), intent(in) :: x1a,x2a + real*8, dimension(ordn,ordn), intent(in) :: ya + real*8, intent(in) :: x1,x2 + real*8, intent(out) :: y,dy - implicit none + integer :: j + real*8, dimension(ordn) :: ymtmp + real*8 :: dy_temp ! Local variable to prevent overwriting result -!~~~~~~> Input parameters: - integer,intent(in) :: ordn - real*8, dimension(1:ordn), intent(in) :: x1a,x2a - real*8, dimension(1:ordn,1:ordn), intent(in) :: ya - real*8, intent(in) :: x1,x2 - real*8, intent(out) :: y,dy + ! Optimized sequence: Loop over columns (j) + ! ya(:,j) is a contiguous memory block in Fortran + do j=1,ordn + call polint(x1a, ya(:,j), x1, ymtmp(j), dy_temp, ordn) + end do -!~~~~~~> Other parameters: - - integer :: i,m - real*8, dimension(ordn) :: ymtmp - real*8, dimension(ordn) :: yntmp - - m=size(x1a) - - do i=1,m - - yntmp=ya(i,:) - call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn) - - end do - - call polint(x1a,ymtmp,x1,y,dy,ordn) - - return + ! Final interpolation on the results + call polint(x2a, ymtmp, x2, y, dy, ordn) + return end subroutine polin2 !------------------------------------------------------------------------------ ! ! interpolation in 3 dimensions, follow zyx order ! !------------------------------------------------------------------------------ - subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn) +subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn) + implicit none + integer,intent(in) :: ordn + real*8, dimension(ordn), intent(in) :: x1a,x2a,x3a + real*8, dimension(ordn,ordn,ordn), intent(in) :: ya + real*8, intent(in) :: x1,x2,x3 + real*8, intent(out) :: y,dy - implicit none + integer :: j, k + real*8, dimension(ordn,ordn) :: yatmp + real*8, dimension(ordn) :: ymtmp + real*8 :: dy_temp -!~~~~~~> Input parameters: - integer,intent(in) :: ordn - real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a - real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya - real*8, intent(in) :: x1,x2,x3 - real*8, intent(out) :: y,dy + ! Sequence change: Process the contiguous first dimension (x1) first. + ! We loop through the 'slow' planes (j, k) to extract 'fast' columns. + do k=1,ordn + do j=1,ordn + ! ya(:,j,k) is contiguous; much faster than ya(i,j,:) + call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn) + end do + end do -!~~~~~~> Other parameters: + ! Now process the second dimension + do k=1,ordn + call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn) + end do - integer :: i,j,m,n - real*8, dimension(ordn,ordn) :: yatmp - real*8, dimension(ordn) :: ymtmp - real*8, dimension(ordn) :: yntmp - real*8, dimension(ordn) :: yqtmp - - m=size(x1a) - n=size(x2a) - - do i=1,m - do j=1,n - - yqtmp=ya(i,j,:) - call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn) - - end do - - yntmp=yatmp(i,:) - call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn) - - end do - - call polint(x1a,ymtmp,x1,y,dy,ordn) - - return + ! Final dimension + call polint(x3a, ymtmp, x3, y, dy, ordn) + return end subroutine polin3 !-------------------------------------------------------------------------------------- ! calculate L2norm @@ -2272,3 +2263,4 @@ subroutine find_maximum(ext,X,Y,Z,fun,val,pos,llb,uub) return end subroutine + diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index 9b7c970..5f245ba 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -1,21 +1,33 @@ - +## GCC version (commented out) ## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ +## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ +## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran -filein = -I/usr/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ +## Intel oneAPI version with oneMKL (Optimized for performance) +filein = -I/usr/include/ -I${MKLROOT}/include -## LDLIBS = -L/usr/lib/x86_64-linux-gnu -lmpich -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran +## Using sequential MKL (OpenMP disabled for better single-threaded performance) +## Added -lifcore for Intel Fortran runtime and -limf for Intel math library +LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -CXXAPPFLAGS = -O3 -Wno-deprecated -Dfortran3 -Dnewc -#f90appflags = -O3 -fpp -f90appflags = -O3 -x f95-cpp-input -f90 = gfortran -f77 = gfortran -CXX = g++ -CC = gcc -CLINKER = mpic++ +## Aggressive optimization flags: +## -O3: Maximum optimization +## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible) +## -fp-model fast=2: Aggressive floating-point optimizations +## -fma: Enable fused multiply-add instructions +## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues +CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma \ + -Dfortran3 -Dnewc -I${MKLROOT}/include +f90appflags = -O3 -xHost -fp-model fast=2 -fma \ + -fpp -I${MKLROOT}/include +f90 = ifx +f77 = ifx +CXX = icpx +CC = icx +CLINKER = mpiicpx Cu = nvcc CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include #CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc +