Compare commits

..

3 Commits

Author SHA1 Message Date
CGH0S7
3914659ebb Optimize BSSN RHS and finite difference calculations
- Integrate Intel oneMKL VML for efficient Gauge calculation in bssn_rhs.f90
- Refactor fderivs in diff_new.f90 to separate bulk/boundary loops for better vectorization
- Add optimization report in docs/optimization_report.md
2026-01-19 10:49:14 +08:00
CGH0S7
039dce4d65 Add aggressive compiler optimizations and vectorization directives
Changes:
1. Enhanced compiler flags in makefile.inc:
   - Added -march=native -mtune=native for CPU-specific optimizations
   - Added -funroll-loops for loop unrolling
   - Added -qopt-prefetch for aggressive prefetching
   - Added -qopt-report=5 for optimization analysis

2. Added Intel vectorization directives to diff_new.f90:
   - Added !DIR$ SIMD and !DIR$ IVDEP to critical loops
   - Targets fderivs and related finite difference functions
   - Forces compiler to vectorize inner loops

Expected improvement: 15-30% speedup in computation loops
Mathematical equivalence: Preserved (compiler optimizations only)
Precision: IEEE 754 maintained
2026-01-19 10:17:31 +08:00
CGH0S7
c524228d23 Enable multi-threaded MKL for better resource utilization
- Changed from libmkl_sequential to libmkl_intel_thread
- Added automatic MKL thread count configuration (104 cores / MPI_processes)
- Updated runtime scripts to set MKL_NUM_THREADS environment variable
- Added comprehensive optimization documentation

Expected improvement: 5-15% from better MKL utilization
Note: Main performance bottleneck is in computation loops, not MKL functions
2026-01-19 09:31:29 +08:00
235 changed files with 193584 additions and 196655 deletions

6
.idea/vcs.xml generated
View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@@ -16,7 +16,7 @@ import numpy
File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation
MPI_processes = 48 ## number of mpi processes used in the simulation
GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)

View File

@@ -8,14 +8,6 @@
##
##################################################################
## Guard against re-execution by multiprocessing child processes.
## Without this, using 'spawn' or 'forkserver' context would cause every
## worker to re-run the entire script, spawning exponentially more
## workers (fork bomb).
if __name__ != '__main__':
import sys as _sys
_sys.exit(0)
##################################################################
@@ -126,6 +118,11 @@ setup.generate_AMSSNCKU_input()
#inputvalue = input() ## Wait for user input (press Enter) to proceed
#print()
setup.print_puncture_information()
##################################################################
## Generate AMSS-NCKU program input files based on the configured parameters
print( )
@@ -265,12 +262,6 @@ if not os.path.exists( ABE_file ):
## Copy the executable ABE (or ABEGPU) into the run directory
shutil.copy2(ABE_file, output_directory)
## Copy interp load balance profile if present (for optimize pass)
interp_lb_profile = os.path.join(AMSS_NCKU_source_copy, "interp_lb_profile.bin")
if os.path.exists(interp_lb_profile):
shutil.copy2(interp_lb_profile, output_directory)
print( " Copied interp_lb_profile.bin to run directory " )
###########################
## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
@@ -307,7 +298,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
import generate_TwoPuncture_input
generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input(numerical_grid.puncture_data)
generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input()
print( )
print( " The input parfile for the TwoPunctureABE executable has been generated. " )
@@ -349,7 +340,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
import renew_puncture_parameter
renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory, numerical_grid.puncture_data)
renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory)
## Generated AMSS-NCKU input filename
@@ -433,31 +424,26 @@ print(
import plot_xiaoqu
import plot_GW_strain_amplitude_xiaoqu
from parallel_plot_helper import run_plot_tasks_parallel
plot_tasks = []
## Plot black hole trajectory
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory) ) )
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) )
plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory )
plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
## Plot black hole separation vs. time
plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) )
plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
## Plot gravitational waveforms (psi4 and strain amplitude)
for i in range(input_data.Detector_Number):
plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) )
plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) )
plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
## Plot ADM mass evolution
for i in range(input_data.Detector_Number):
plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) )
plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
## Plot Hamiltonian constraint violation over time
for i in range(input_data.grid_level):
plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) )
run_plot_tasks_parallel(plot_tasks)
plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
## Plot stored binary data
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )

View File

@@ -1,19 +1,10 @@
#!/usr/bin/env python3
"""
AMSS-NCKU GW150914 Simulation Regression Test Script (Comprehensive Version)
AMSS-NCKU GW150914 Simulation Regression Test Script
Verification Requirements:
1. RMS errors < 1% for:
- 3D Vector Total RMS
- X Component RMS
- Y Component RMS
- Z Component RMS
1. XY-plane trajectory RMS error < 1% (Optimized vs. baseline, max of BH1 and BH2)
2. ADM constraint violation < 2 (Grid Level 0)
3. The following figure PDFs must match GW150914-origin exactly after rasterization:
- ADM_Constraint_Grid_Level_0.pdf
- BH_Trajectory_21_XY.pdf
- BH_Trajectory_XY.pdf
The script also reports the percentage of differing pixels for each figure.
RMS Calculation Method:
- Computes trajectory deviation on the XY plane independently for BH1 and BH2
@@ -28,10 +19,6 @@ Reference: GW150914-origin (baseline simulation)
import numpy as np
import sys
import os
import shutil
import subprocess
import tempfile
from PIL import Image
# ANSI Color Codes
class Color:
@@ -71,187 +58,78 @@ def load_constraint_data(filepath):
return np.array(data)
def resolve_figure_dir(path):
"""Resolve the sibling figure directory from an output or figure path."""
normalized = os.path.normpath(path)
if os.path.basename(normalized) == "figure":
return normalized
return os.path.join(os.path.dirname(normalized), "figure")
def render_pdf_to_images(pdf_path, dpi=150):
"""Render a PDF to RGB images using Ghostscript."""
gs_path = shutil.which("gs")
if gs_path is None:
raise RuntimeError("Ghostscript executable 'gs' was not found in PATH")
with tempfile.TemporaryDirectory(prefix="amss_verify_pdf_") as temp_dir:
output_pattern = os.path.join(temp_dir, "page-%03d.ppm")
cmd = [
gs_path,
"-q",
"-dSAFER",
"-dBATCH",
"-dNOPAUSE",
"-sDEVICE=ppmraw",
f"-r{dpi}",
f"-o{output_pattern}",
pdf_path
]
try:
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
except subprocess.CalledProcessError as exc:
message = exc.stderr.strip() or str(exc)
raise RuntimeError(f"Failed to render PDF '{pdf_path}': {message}") from exc
ppm_files = sorted(
os.path.join(temp_dir, filename)
for filename in os.listdir(temp_dir)
if filename.endswith(".ppm")
)
if not ppm_files:
raise RuntimeError(f"No rendered pages were produced for '{pdf_path}'")
images = []
for ppm_file in ppm_files:
with Image.open(ppm_file) as img:
images.append(np.array(img.convert("RGB"), dtype=np.uint8))
return images
def compare_rendered_pages(ref_img, target_img):
"""Return (different_pixels, total_pixels) for two rendered RGB pages."""
ref_h, ref_w = ref_img.shape[:2]
tgt_h, tgt_w = target_img.shape[:2]
total_pixels = max(ref_h, tgt_h) * max(ref_w, tgt_w)
if ref_h == tgt_h and ref_w == tgt_w:
different_pixels = int(np.count_nonzero(np.any(ref_img != target_img, axis=2)))
return different_pixels, total_pixels
diff_mask = np.ones((max(ref_h, tgt_h), max(ref_w, tgt_w)), dtype=bool)
overlap_h = min(ref_h, tgt_h)
overlap_w = min(ref_w, tgt_w)
overlap_diff = np.any(ref_img[:overlap_h, :overlap_w] != target_img[:overlap_h, :overlap_w], axis=2)
diff_mask[:overlap_h, :overlap_w] = overlap_diff
different_pixels = int(np.count_nonzero(diff_mask))
return different_pixels, total_pixels
def compare_pdf_images(ref_pdf, target_pdf, dpi=150, threshold_percent=0.001):
"""Compare two PDFs by rasterizing them and counting differing pixels."""
ref_pages = render_pdf_to_images(ref_pdf, dpi=dpi)
target_pages = render_pdf_to_images(target_pdf, dpi=dpi)
total_pixels = 0
different_pixels = 0
max_pages = max(len(ref_pages), len(target_pages))
for page_idx in range(max_pages):
if page_idx < len(ref_pages) and page_idx < len(target_pages):
page_diff, page_total = compare_rendered_pages(ref_pages[page_idx], target_pages[page_idx])
else:
existing_page = ref_pages[page_idx] if page_idx < len(ref_pages) else target_pages[page_idx]
page_total = existing_page.shape[0] * existing_page.shape[1]
page_diff = page_total
total_pixels += page_total
different_pixels += page_diff
diff_percent = (different_pixels / total_pixels * 100.0) if total_pixels else 0.0
return {
"different_pixels": different_pixels,
"total_pixels": total_pixels,
"diff_percent": diff_percent,
"pages_ref": len(ref_pages),
"pages_target": len(target_pages),
"passed": diff_percent < threshold_percent
}
def compare_required_figures(reference_figure_dir, target_figure_dir):
"""Compare the required GW150914 figure PDFs."""
figure_names = [
"ADM_Constraint_Grid_Level_0.pdf",
"BH_Trajectory_21_XY.pdf",
"BH_Trajectory_XY.pdf"
]
results = []
for figure_name in figure_names:
ref_pdf = os.path.join(reference_figure_dir, figure_name)
target_pdf = os.path.join(target_figure_dir, figure_name)
if not os.path.exists(ref_pdf):
raise FileNotFoundError(f"Reference figure not found: {ref_pdf}")
if not os.path.exists(target_pdf):
raise FileNotFoundError(f"Target figure not found: {target_pdf}")
comparison = compare_pdf_images(ref_pdf, target_pdf)
comparison["name"] = figure_name
results.append(comparison)
return results
def calculate_all_rms_errors(bh_data_ref, bh_data_target):
def calculate_rms_error(bh_data_ref, bh_data_target):
"""
Calculate 3D Vector RMS and component-wise RMS (X, Y, Z) independently.
Uses r = sqrt(x^2 + y^2) as the denominator for all error normalizations.
Returns the maximum error between BH1 and BH2 for each category.
Calculate trajectory-based RMS error on the XY plane between baseline and optimized simulations.
This function computes the RMS error independently for BH1 and BH2 trajectories,
then returns the maximum of the two as the final RMS error metric.
For each black hole, the RMS is calculated as:
RMS = sqrt( (1/M) * sum( (Δr_i / r_i^max)^2 ) ) × 100%
where:
Δr_i = sqrt((x_ref,i - x_new,i)^2 + (y_ref,i - y_new,i)^2)
r_i^max = max(sqrt(x_ref,i^2 + y_ref,i^2), sqrt(x_new,i^2 + y_new,i^2))
Args:
bh_data_ref: Reference (baseline) trajectory data
bh_data_target: Target (optimized) trajectory data
Returns:
rms_value: Final RMS error as a percentage (max of BH1 and BH2)
error: Error message if any
"""
# Align data: truncate to the length of the shorter dataset
M = min(len(bh_data_ref['time']), len(bh_data_target['time']))
if M < 10:
return None, "Insufficient data points for comparison"
results = {}
# Extract XY coordinates for both black holes
x1_ref = bh_data_ref['x1'][:M]
y1_ref = bh_data_ref['y1'][:M]
x2_ref = bh_data_ref['x2'][:M]
y2_ref = bh_data_ref['y2'][:M]
for bh in ['1', '2']:
x_r, y_r, z_r = bh_data_ref[f'x{bh}'][:M], bh_data_ref[f'y{bh}'][:M], bh_data_ref[f'z{bh}'][:M]
x_n, y_n, z_n = bh_data_target[f'x{bh}'][:M], bh_data_target[f'y{bh}'][:M], bh_data_target[f'z{bh}'][:M]
x1_new = bh_data_target['x1'][:M]
y1_new = bh_data_target['y1'][:M]
x2_new = bh_data_target['x2'][:M]
y2_new = bh_data_target['y2'][:M]
# 核心修改:根据组委会的邮件指示,分母统一使用 r = sqrt(x^2 + y^2)
r_ref = np.sqrt(x_r**2 + y_r**2)
r_new = np.sqrt(x_n**2 + y_n**2)
denom_max = np.maximum(r_ref, r_new)
# Calculate RMS for BH1
delta_r1 = np.sqrt((x1_ref - x1_new)**2 + (y1_ref - y1_new)**2)
r1_ref = np.sqrt(x1_ref**2 + y1_ref**2)
r1_new = np.sqrt(x1_new**2 + y1_new**2)
r1_max = np.maximum(r1_ref, r1_new)
valid = denom_max > 1e-15
if np.sum(valid) < 10:
results[f'BH{bh}'] = { '3D_Vector': 0.0, 'X_Component': 0.0, 'Y_Component': 0.0, 'Z_Component': 0.0 }
continue
# Calculate RMS for BH2
delta_r2 = np.sqrt((x2_ref - x2_new)**2 + (y2_ref - y2_new)**2)
r2_ref = np.sqrt(x2_ref**2 + y2_ref**2)
r2_new = np.sqrt(x2_new**2 + y2_new**2)
r2_max = np.maximum(r2_ref, r2_new)
def calc_rms(delta):
# 将对应分量的偏差除以统一的轨道半径分母 denom_max
return np.sqrt(np.mean((delta[valid] / denom_max[valid])**2)) * 100
# Avoid division by zero for BH1
valid_mask1 = r1_max > 1e-15
if np.sum(valid_mask1) < 10:
return None, "Insufficient valid data points for BH1"
# 1. Total 3D Vector RMS
delta_vec = np.sqrt((x_r - x_n)**2 + (y_r - y_n)**2 + (z_r - z_n)**2)
rms_3d = calc_rms(delta_vec)
terms1 = (delta_r1[valid_mask1] / r1_max[valid_mask1])**2
rms_bh1 = np.sqrt(np.mean(terms1)) * 100
# 2. Component-wise RMS (分离计算各轴,但共用半径分母)
rms_x = calc_rms(np.abs(x_r - x_n))
rms_y = calc_rms(np.abs(y_r - y_n))
rms_z = calc_rms(np.abs(z_r - z_n))
# Avoid division by zero for BH2
valid_mask2 = r2_max > 1e-15
if np.sum(valid_mask2) < 10:
return None, "Insufficient valid data points for BH2"
results[f'BH{bh}'] = {
'3D_Vector': rms_3d,
'X_Component': rms_x,
'Y_Component': rms_y,
'Z_Component': rms_z
}
terms2 = (delta_r2[valid_mask2] / r2_max[valid_mask2])**2
rms_bh2 = np.sqrt(np.mean(terms2)) * 100
# 获取 BH1 BH2 中的最大误差
max_rms = {
'3D_Vector': max(results['BH1']['3D_Vector'], results['BH2']['3D_Vector']),
'X_Component': max(results['BH1']['X_Component'], results['BH2']['X_Component']),
'Y_Component': max(results['BH1']['Y_Component'], results['BH2']['Y_Component']),
'Z_Component': max(results['BH1']['Z_Component'], results['BH2']['Z_Component'])
}
# Final RMS is the maximum of BH1 and BH2
rms_final = max(rms_bh1, rms_bh2)
return rms_final, None
return max_rms, None
def analyze_constraint_violation(constraint_data, n_levels=9):
"""
@@ -277,32 +155,34 @@ def analyze_constraint_violation(constraint_data, n_levels=9):
def print_header():
"""Print report header"""
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
print(Color.BOLD + " AMSS-NCKU GW150914 Comprehensive Regression Test" + Color.RESET)
print(Color.BOLD + " AMSS-NCKU GW150914 Simulation Regression Test Report" + Color.RESET)
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
def print_rms_results(rms_dict, error, threshold=1.0):
print(f"\n{Color.BOLD}1. RMS Error Analysis (Maximums of BH1 & BH2){Color.RESET}")
print("-" * 65)
def print_rms_results(rms_rel, error, threshold=1.0):
"""Print RMS error results"""
print(f"\n{Color.BOLD}1. RMS Error Analysis (Baseline vs Optimized){Color.RESET}")
print("-" * 45)
if error:
print(f" {Color.RED}Error: {error}{Color.RESET}")
return False
all_passed = True
print(f" Requirement: < {threshold}%\n")
passed = rms_rel < threshold
for key, val in rms_dict.items():
passed = val < threshold
all_passed = all_passed and passed
status = get_status_text(passed)
print(f" {key:15}: {val:8.4f}% | Status: {status}")
print(f" RMS relative error: {rms_rel:.4f}%")
print(f" Requirement: < {threshold}%")
print(f" Status: {get_status_text(passed)}")
return passed
return all_passed
def print_constraint_results(results, threshold=2.0):
"""Print constraint violation results"""
print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}")
print("-" * 65)
print("-" * 45)
names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz']
for i, name in enumerate(names):
@@ -319,45 +199,19 @@ def print_constraint_results(results, threshold=2.0):
return passed
def print_figure_results(results, threshold_percent=0.001):
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
print("-" * 65)
print(f" Requirement: < {threshold_percent:.3f}% differing pixels\n")
all_passed = True
for result in results:
passed = result["passed"]
all_passed = all_passed and passed
status = get_status_text(passed)
print(f" {result['name']:32}: {result['diff_percent']:10.6f}% | Status: {status}")
if result["pages_ref"] != result["pages_target"]:
print(f" {'':32} pages(ref/target): {result['pages_ref']}/{result['pages_target']}")
return all_passed
def print_figure_error(error_message):
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
print("-" * 65)
print(f" {Color.RED}Error: {error_message}{Color.RESET}")
return False
def print_summary(rms_passed, constraint_passed, figure_passed):
def print_summary(rms_passed, constraint_passed):
"""Print summary"""
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
print(Color.BOLD + "Verification Summary" + Color.RESET)
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
all_passed = rms_passed and constraint_passed and figure_passed
all_passed = rms_passed and constraint_passed
res_rms = get_status_text(rms_passed)
res_con = get_status_text(constraint_passed)
res_fig = get_status_text(figure_passed)
print(f" [1] Comprehensive RMS check: {res_rms}")
print(f" [1] RMS trajectory check: {res_rms}")
print(f" [2] ADM constraint check: {res_con}")
print(f" [3] Figure pixel comparison: {res_fig}")
final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
print(f"\n Overall result: {final_status}")
@@ -365,58 +219,61 @@ def print_summary(rms_passed, constraint_passed, figure_passed):
return all_passed
def main():
# Determine target (optimized) output directory
if len(sys.argv) > 1:
target_dir = sys.argv[1]
else:
script_dir = os.path.dirname(os.path.abspath(__file__))
target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")
# Determine reference (baseline) directory
script_dir = os.path.dirname(os.path.abspath(__file__))
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
target_figure_dir = resolve_figure_dir(target_dir)
reference_figure_dir = os.path.join(script_dir, "GW150914-origin/figure")
# Data file paths
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
# Check if files exist
if not os.path.exists(bh_file_ref):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}")
sys.exit(1)
if not os.path.exists(bh_file_target):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}")
sys.exit(1)
if not os.path.exists(constraint_file):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}")
sys.exit(1)
# Print header
print_header()
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
print(f"{Color.BOLD}Reference Figures: {Color.RESET} {Color.BLUE}{reference_figure_dir}{Color.RESET}")
print(f"{Color.BOLD}Target Figures: {Color.RESET} {Color.BLUE}{target_figure_dir}{Color.RESET}")
# Load data
bh_data_ref = load_bh_trajectory(bh_file_ref)
bh_data_target = load_bh_trajectory(bh_file_target)
constraint_data = load_constraint_data(constraint_file)
# Output modified RMS results
rms_dict, error = calculate_all_rms_errors(bh_data_ref, bh_data_target)
rms_passed = print_rms_results(rms_dict, error)
# Calculate RMS error
rms_rel, error = calculate_rms_error(bh_data_ref, bh_data_target)
rms_passed = print_rms_results(rms_rel, error)
# Output constraint results
# Analyze constraint violation
constraint_results = analyze_constraint_violation(constraint_data)
constraint_passed = print_constraint_results(constraint_results)
try:
figure_results = compare_required_figures(reference_figure_dir, target_figure_dir)
figure_passed = print_figure_results(figure_results)
except (FileNotFoundError, RuntimeError) as exc:
figure_passed = print_figure_error(str(exc))
# Print summary
all_passed = print_summary(rms_passed, constraint_passed)
all_passed = print_summary(rms_passed, constraint_passed, figure_passed)
# Return exit code
sys.exit(0 if all_passed else 1)
if __name__ == "__main__":
main()

710
AMSS_NCKU_source/ABE.optrpt Normal file
View File

@@ -0,0 +1,710 @@
Begin optimization report for: __cxx_global_var_init
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, int, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::map()
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, int, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::~map()
=================================================================
Begin optimization report for: __cxx_global_var_init.1
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, double, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::map()
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, double, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::~map()
=================================================================
Begin optimization report for: __cxx_global_var_init.2
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::map()
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::~map()
=================================================================
Begin optimization report for: main
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1947, 7)
LOOP END
LOOP BEGIN at ABE.C (125, 13)
LOOP END
=================================================================
Begin optimization report for: std::allocator<char>::allocator()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::basic_string<std::allocator<char>>(char const*, std::allocator<char> const&)
=================================================================
Begin optimization report for: std::__new_allocator<char>::~__new_allocator()
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::insert(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&)
=================================================================
Begin optimization report for: std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>::pair<char const (&) [9], std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&, true>(char const (&) [9], std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&)
=================================================================
Begin optimization report for: std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>::~pair()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()
=================================================================
Begin optimization report for: std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_Rb_tree_iterator()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::basic_string()
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::find(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1947, 7)
LOOP END
=================================================================
Begin optimization report for: std::operator!=(std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>> const&, std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>> const&)
=================================================================
Begin optimization report for: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::end()
=================================================================
Begin optimization report for: std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::operator->() const
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::operator=(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::c_str() const
=================================================================
Begin optimization report for: std::basic_ostream<char, std::char_traits<char>>& std::operator<<<std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*)
=================================================================
Begin optimization report for: std::ostream::operator<<(std::ostream& (*)(std::ostream&))
=================================================================
Begin optimization report for: std::basic_ostream<char, std::char_traits<char>>& std::endl<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&)
=================================================================
Begin optimization report for: std::basic_ifstream<char, std::char_traits<char>>::basic_ifstream(char const*, std::_Ios_Openmode)
=================================================================
Begin optimization report for: std::basic_ios<char, std::char_traits<char>>::good() const
=================================================================
Begin optimization report for: std::istream::getline(char*, long)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::operator=(char const*)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::basic_string(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)
=================================================================
Begin optimization report for: bool std::operator==<char, std::char_traits<char>, std::allocator<char>>(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&, char const*)
=================================================================
Begin optimization report for: atoi
=================================================================
Begin optimization report for: atof
=================================================================
Begin optimization report for: std::basic_ifstream<char, std::char_traits<char>>::close()
=================================================================
Begin optimization report for: std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>::pair<char const (&) [11], std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&, true>(char const (&) [11], std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&)
=================================================================
Begin optimization report for: std::basic_ifstream<char, std::char_traits<char>>::~basic_ifstream()
=================================================================
Begin optimization report for: std::basic_ofstream<char, std::char_traits<char>>::basic_ofstream()
=================================================================
Begin optimization report for: std::basic_ofstream<char, std::char_traits<char>>::open(char const*, std::_Ios_Openmode)
=================================================================
Begin optimization report for: std::basic_ofstream<char, std::char_traits<char>>::close()
=================================================================
Begin optimization report for: std::basic_ofstream<char, std::char_traits<char>>::~basic_ofstream()
=================================================================
Begin optimization report for: std::ostream::operator<<(double)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_Rb_tree()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_Rb_tree_impl<std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, true>::_Rb_tree_impl()
=================================================================
Begin optimization report for: std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::allocator()
=================================================================
Begin optimization report for: std::_Rb_tree_key_compare<std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_Rb_tree_key_compare()
=================================================================
Begin optimization report for: std::_Rb_tree_header::_Rb_tree_header()
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::__new_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree_header::_M_reset()
=================================================================
Begin optimization report for: __clang_call_terminate
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::~_Rb_tree()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_M_erase(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>*)
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1930, 7)
remark #15521: Loop was not vectorized: loop control variable was not identified. Explicitly compute the iteration count before executing the loop or try using canonical loop form from OpenMP specification
remark #25478: While loop unrolled by 2
LOOP END
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_M_begin()
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::~__new_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_S_right(std::_Rb_tree_node_base*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_S_left(std::_Rb_tree_node_base*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_M_drop_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_M_destroy_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_M_put_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>*)
=================================================================
Begin optimization report for: void std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>>::destroy<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>&, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_M_get_Node_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>::_M_valptr()
=================================================================
Begin optimization report for: void std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::destroy<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>*)
=================================================================
Begin optimization report for: std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>::~pair()
=================================================================
Begin optimization report for: __gnu_cxx::__aligned_membuf<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>::_M_ptr()
=================================================================
Begin optimization report for: __gnu_cxx::__aligned_membuf<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>::_M_addr()
=================================================================
Begin optimization report for: std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>>::deallocate(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>&, std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>*, unsigned long)
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::deallocate(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>*, unsigned long)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, int>>>::_M_mbegin() const
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_Rb_tree()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_Rb_tree_impl<std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, true>::_Rb_tree_impl()
=================================================================
Begin optimization report for: std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::allocator()
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::__new_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::~_Rb_tree()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_M_erase(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>*)
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1930, 7)
remark #15521: Loop was not vectorized: loop control variable was not identified. Explicitly compute the iteration count before executing the loop or try using canonical loop form from OpenMP specification
remark #25478: While loop unrolled by 2
LOOP END
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_M_begin()
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::~__new_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_S_right(std::_Rb_tree_node_base*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_S_left(std::_Rb_tree_node_base*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_M_drop_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_M_destroy_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_M_put_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>*)
=================================================================
Begin optimization report for: void std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>>::destroy<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>&, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_M_get_Node_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>::_M_valptr()
=================================================================
Begin optimization report for: void std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::destroy<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>*)
=================================================================
Begin optimization report for: std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>::~pair()
=================================================================
Begin optimization report for: __gnu_cxx::__aligned_membuf<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>::_M_ptr()
=================================================================
Begin optimization report for: __gnu_cxx::__aligned_membuf<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>::_M_addr()
=================================================================
Begin optimization report for: std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>>::deallocate(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>&, std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>*, unsigned long)
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::deallocate(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>*, unsigned long)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, double>>>::_M_mbegin() const
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_Rb_tree()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_Rb_tree_impl<std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, true>::_Rb_tree_impl()
=================================================================
Begin optimization report for: std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::allocator()
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::__new_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::~_Rb_tree()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_erase(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*)
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1930, 7)
remark #15521: Loop was not vectorized: loop control variable was not identified. Explicitly compute the iteration count before executing the loop or try using canonical loop form from OpenMP specification
LOOP END
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_begin()
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::~__new_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_S_right(std::_Rb_tree_node_base*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_S_left(std::_Rb_tree_node_base*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_drop_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_destroy_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_put_node(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*)
=================================================================
Begin optimization report for: void std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>>::destroy<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>&, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_get_Node_allocator()
=================================================================
Begin optimization report for: std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_M_valptr()
=================================================================
Begin optimization report for: void std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::destroy<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>*)
=================================================================
Begin optimization report for: __gnu_cxx::__aligned_membuf<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_M_ptr()
=================================================================
Begin optimization report for: __gnu_cxx::__aligned_membuf<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_M_addr()
=================================================================
Begin optimization report for: std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>>::deallocate(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>&, std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*, unsigned long)
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::deallocate(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*, unsigned long)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_mbegin() const
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_data() const
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_local_data()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_Alloc_hider::_Alloc_hider(char*, std::allocator<char>&&)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_init_local_buf()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_set_length(unsigned long)
=================================================================
Begin optimization report for: std::__ptr_traits_ptr_to<char*, char, false>::pointer_to(char&)
=================================================================
Begin optimization report for: std::__new_allocator<char>::__new_allocator(std::__new_allocator<char> const&)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_length(unsigned long)
=================================================================
Begin optimization report for: std::char_traits<char>::assign(char&, char const&)
=================================================================
Begin optimization report for: std::__new_allocator<char>::__new_allocator()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_dispose()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_Alloc_hider::~_Alloc_hider()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_is_local() const
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_destroy(unsigned long)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_local_data() const
=================================================================
Begin optimization report for: std::__ptr_traits_ptr_to<char const*, char const, false>::pointer_to(char const&)
=================================================================
Begin optimization report for: std::allocator_traits<std::allocator<char>>::deallocate(std::allocator<char>&, char*, unsigned long)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_get_allocator()
=================================================================
Begin optimization report for: std::__new_allocator<char>::deallocate(char*, unsigned long)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_Alloc_hider::_Alloc_hider(char*, std::allocator<char> const&)
=================================================================
Begin optimization report for: std::char_traits<char>::length(char const*)
=================================================================
Begin optimization report for: void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct<char const*>(char const*, char const*, std::forward_iterator_tag)
=================================================================
Begin optimization report for: std::iterator_traits<char const*>::difference_type std::distance<char const*>(char const*, char const*)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_data(char*)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_capacity(unsigned long)
=================================================================
Begin optimization report for: void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct<char const*>(char const*, char const*, std::forward_iterator_tag)::_Guard::_Guard(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_S_copy_chars(char*, char const*, char const*)
=================================================================
Begin optimization report for: void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct<char const*>(char const*, char const*, std::forward_iterator_tag)::_Guard::~_Guard()
=================================================================
Begin optimization report for: std::iterator_traits<char const*>::difference_type std::__distance<char const*>(char const*, char const*, std::random_access_iterator_tag)
=================================================================
Begin optimization report for: std::iterator_traits<char const*>::iterator_category std::__iterator_category<char const*>(char const* const&)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_S_copy(char*, char const*, unsigned long)
=================================================================
Begin optimization report for: std::char_traits<char>::copy(char*, char const*, unsigned long)
=================================================================
Begin optimization report for: __gnu_cxx::__alloc_traits<std::allocator<char>, char>::_S_select_on_copy(std::allocator<char> const&)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_get_allocator() const
=================================================================
Begin optimization report for: void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct<char*>(char*, char*, std::forward_iterator_tag)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::length() const
=================================================================
Begin optimization report for: std::allocator_traits<std::allocator<char>>::select_on_container_copy_construction(std::allocator<char> const&)
=================================================================
Begin optimization report for: std::allocator<char>::allocator(std::allocator<char> const&)
=================================================================
Begin optimization report for: std::iterator_traits<char*>::difference_type std::distance<char*>(char*, char*)
=================================================================
Begin optimization report for: void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct<char*>(char*, char*, std::forward_iterator_tag)::_Guard::_Guard(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_S_copy_chars(char*, char*, char*)
=================================================================
Begin optimization report for: void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct<char*>(char*, char*, std::forward_iterator_tag)::_Guard::~_Guard()
=================================================================
Begin optimization report for: std::iterator_traits<char*>::difference_type std::__distance<char*>(char*, char*, std::random_access_iterator_tag)
=================================================================
Begin optimization report for: std::iterator_traits<char*>::iterator_category std::__iterator_category<char*>(char* const&)
=================================================================
Begin optimization report for: std::pair<std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, bool> std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_insert_unique<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&)
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (2113, 7)
LOOP END
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_get_insert_unique_pos(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (2113, 7)
LOOP END
=================================================================
Begin optimization report for: std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::operator()(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&) const
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_Alloc_node::_Alloc_node(std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>&)
=================================================================
Begin optimization report for: std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>> std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_insert_<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_Alloc_node>(std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&, std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_Alloc_node&)
=================================================================
Begin optimization report for: std::pair<std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, bool>::pair<std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, bool, true>(std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>&&, bool&&)
=================================================================
Begin optimization report for: std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_Rb_tree_iterator(std::_Rb_tree_node_base*)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_end()
=================================================================
Begin optimization report for: std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>::operator()(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&) const
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_S_key(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>> const*)
=================================================================
Begin optimization report for: std::operator==(std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>> const&, std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>> const&)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::begin()
=================================================================
Begin optimization report for: std::pair<std::_Rb_tree_node_base*, std::_Rb_tree_node_base*>::pair<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*&, std::_Rb_tree_node_base*&, true>(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*&, std::_Rb_tree_node_base*&)
=================================================================
Begin optimization report for: std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::operator--()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_S_key(std::_Rb_tree_node_base const*)
=================================================================
Begin optimization report for: std::pair<std::_Rb_tree_node_base*, std::_Rb_tree_node_base*>::pair<std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, true>(std::_Rb_tree_node_base* const&, std::_Rb_tree_node_base* const&)
=================================================================
Begin optimization report for: bool std::operator<<char, std::char_traits<char>, std::allocator<char>>(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::compare(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&) const
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::size() const
=================================================================
Begin optimization report for: unsigned long const& std::min<unsigned long>(unsigned long const&, unsigned long const&)
=================================================================
Begin optimization report for: std::char_traits<char>::compare(char const*, char const*, unsigned long)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::data() const
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_S_compare(unsigned long, unsigned long)
=================================================================
Begin optimization report for: std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::operator()(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>> const&) const
=================================================================
Begin optimization report for: std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_M_valptr() const
=================================================================
Begin optimization report for: __gnu_cxx::__aligned_membuf<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_M_ptr() const
=================================================================
Begin optimization report for: __gnu_cxx::__aligned_membuf<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>::_M_addr() const
=================================================================
Begin optimization report for: std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>* std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_Alloc_node::operator()<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&) const
=================================================================
Begin optimization report for: std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>* std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_create_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&)
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_get_node()
=================================================================
Begin optimization report for: void std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_construct_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&)
=================================================================
Begin optimization report for: std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>>::allocate(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>&, unsigned long)
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::allocate(unsigned long, void const*)
=================================================================
Begin optimization report for: std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_max_size() const
=================================================================
Begin optimization report for: void std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>>::construct<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>&, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>*, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&)
=================================================================
Begin optimization report for: void std::__new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::construct<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>*, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&)
=================================================================
Begin optimization report for: std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>::pair(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>&&)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::basic_string(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&&)
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_use_local_data()
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::find(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1947, 7)
LOOP END
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::_M_lower_bound(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*, std::_Rb_tree_node_base*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)
LOOP BEGIN at /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_tree.h (1947, 7)
LOOP END
=================================================================
Begin optimization report for: std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>>::end()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::assign(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)
=================================================================
Begin optimization report for: std::basic_ios<char, std::char_traits<char>>::setstate(std::_Ios_Iostate)
=================================================================
Begin optimization report for: std::operator|(std::_Ios_Iostate, std::_Ios_Iostate)
=================================================================
Begin optimization report for: std::basic_ios<char, std::char_traits<char>>::rdstate() const
=================================================================
Begin optimization report for: std::basic_ostream<char, std::char_traits<char>>& std::flush<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&)
=================================================================
Begin optimization report for: std::basic_ios<char, std::char_traits<char>>::widen(char) const
=================================================================
Begin optimization report for: std::ctype<char> const& std::__check_facet<std::ctype<char>>(std::ctype<char> const*)
=================================================================
Begin optimization report for: std::ctype<char>::widen(char) const
=================================================================
Begin optimization report for: std::basic_ios<char, std::char_traits<char>>::basic_ios()
=================================================================
Begin optimization report for: std::basic_istream<char, std::char_traits<char>>::basic_istream()
=================================================================
Begin optimization report for: std::basic_ifstream<char, std::char_traits<char>>::open(char const*, std::_Ios_Openmode)
=================================================================
Begin optimization report for: std::basic_filebuf<char, std::char_traits<char>>::~basic_filebuf()
=================================================================
Begin optimization report for: std::basic_istream<char, std::char_traits<char>>::~basic_istream()
=================================================================
Begin optimization report for: std::operator|(std::_Ios_Openmode, std::_Ios_Openmode)
=================================================================
Begin optimization report for: std::basic_streambuf<char, std::char_traits<char>>::~basic_streambuf()
=================================================================
Begin optimization report for: std::basic_ifstream<char, std::char_traits<char>>::~basic_ifstream()
=================================================================
Begin optimization report for: virtual thunk to std::basic_ifstream<char, std::char_traits<char>>::~basic_ifstream()
=================================================================
Begin optimization report for: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::assign(char const*)
=================================================================
Begin optimization report for: std::basic_ostream<char, std::char_traits<char>>::basic_ostream()
=================================================================
Begin optimization report for: std::basic_ostream<char, std::char_traits<char>>::~basic_ostream()
=================================================================
Begin optimization report for: std::basic_ofstream<char, std::char_traits<char>>::~basic_ofstream()
=================================================================
Begin optimization report for: virtual thunk to std::basic_ofstream<char, std::char_traits<char>>::~basic_ofstream()
=================================================================
Begin optimization report for: _GLOBAL__sub_I_ABE.C
=================================================================

File diff suppressed because it is too large Load Diff

View File

@@ -1,255 +0,0 @@
#include "tool.h"
/*
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
*
* 约定:
* nghost = 3
* ex[3] = {ex1,ex2,ex3}
* f = 原始网格 (ex1*ex2*ex3)
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
* SoA[3] = 输入参数
*/
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3])
{
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// 对应 Fortran: dX = X(2)-X(1) Fortran 1-based
// C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
(void)d2dx; (void)d2dy; (void)d2dz;
// Fortran:
// imax = ex(1); jmax = ex(2); kmax = ex(3)
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran:
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran 主循环:
* do k=1,ex(3)-1
* do j=1,ex(2)-1
* do i=1,ex(1)-1
*
* 转成 C 0-based
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
*
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
* iF=i0+1, jF=j0+1, kF=k0+1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// ---------------- x direction ----------------
const double sfx = Sfx[p];
if (sfx > ZEO) {
// Fortran: if(i+3 <= imax)
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
// elseif(i+2 <= imax) <=> i0 <= ex1-3
else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i+1 <= imax) <=> i0 <= ex1-2循环里总成立
else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
// Fortran: if(i-3 >= imin)
// (iF-3) >= iminF <=> (i0-2) >= iminF
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i-1 >= imin) <=> i0 >= iminF
else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
// ---------------- y direction ----------------
const double sfy = Sfy[p];
if (sfy > ZEO) {
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
// ---------------- z direction ----------------
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
}
free(fh);
}

View File

@@ -1,248 +0,0 @@
#include "tool.h"
/*
* Combined advection (lopsided) + KO dissipation (kodis).
* Uses one shared symmetry_bd buffer per call.
*/
void lopsided_kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3], double eps)
{
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, EQ_SYMM = 1;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
// fh for Fortran-style domain (-2:ex1,-2:ex2,-2:ex3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(3, ex, f, fh, SoA);
// Advection (same stencil logic as lopsided_c.C)
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
} else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
} else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
} else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
} else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
}
// KO dissipation (same domain restriction as kodiss_c.C)
if (eps > ZEO) {
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
const int i0_hi = imaxF - 4; // inclusive
const int j0_hi = jmaxF - 4;
const int k0_hi = kmaxF - 4;
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double Dx_term =
((fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dX;
const double Dy_term =
((fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dY;
const double Dz_term =
((fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dZ;
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
}
}
}
free(fh);
}

View File

@@ -1,332 +0,0 @@
#include "tool.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff;
const int NO_SYMM = 0, EQ_SYMM = 1;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
const double F1o4 = 2.5e-1; // 1/4
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double F1o12 = ONE / 12.0;
const double F1o144 = ONE / 144.0;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
const double SoA[3] = { SYM1, SYM2, SYM3 };
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static double *fh = NULL;
static size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(2, ex, f, fh, SoA);
/* 系数:按 Fortran 原式 */
const double Sdxdx = ONE / (dX * dX);
const double Sdydy = ONE / (dY * dY);
const double Sdzdz = ONE / (dZ * dZ);
const double Fdxdx = F1o12 / (dX * dX);
const double Fdydy = F1o12 / (dY * dY);
const double Fdzdz = F1o12 / (dZ * dZ);
const double Sdxdy = F1o4 / (dX * dY);
const double Sdxdz = F1o4 / (dX * dZ);
const double Sdydz = F1o4 / (dY * dZ);
const double Fdxdy = F1o144 / (dX * dY);
const double Fdxdz = F1o144 / (dX * dZ);
const double Fdydz = F1o144 / (dY * dZ);
/* 只清零不被主循环覆盖的边界面 */
{
/* 高边界k0=ex3-1 */
for (int j0 = 0; j0 < ex2; ++j0)
for (int i0 = 0; i0 < ex1; ++i0) {
const size_t p = idx_ex(i0, j0, ex3 - 1, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
/* 高边界j0=ex2-1 */
for (int k0 = 0; k0 < ex3 - 1; ++k0)
for (int i0 = 0; i0 < ex1; ++i0) {
const size_t p = idx_ex(i0, ex2 - 1, k0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
/* 高边界i0=ex1-1 */
for (int k0 = 0; k0 < ex3 - 1; ++k0)
for (int j0 = 0; j0 < ex2 - 1; ++j0) {
const size_t p = idx_ex(ex1 - 1, j0, k0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
/* 低边界:当二阶模板也不可用时,对应 i0/j0/k0=0 面 */
if (kminF == 1) {
for (int j0 = 0; j0 < ex2; ++j0)
for (int i0 = 0; i0 < ex1; ++i0) {
const size_t p = idx_ex(i0, j0, 0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
}
if (jminF == 1) {
for (int k0 = 0; k0 < ex3; ++k0)
for (int i0 = 0; i0 < ex1; ++i0) {
const size_t p = idx_ex(i0, 0, k0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
}
if (iminF == 1) {
for (int k0 = 0; k0 < ex3; ++k0)
for (int j0 = 0; j0 < ex2; ++j0) {
const size_t p = idx_ex(0, j0, k0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
}
}
/*
* 两段式:
* 1) 二阶可用区域先计算二阶模板
* 2) 高阶可用区域再覆盖四阶模板
*/
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i4_hi = ex1 - 3;
const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3;
/*
* Strategy A:
* Avoid redundant work in overlap of 2nd/4th-order regions.
* Only compute 2nd-order on shell points that are NOT overwritten by
* the 4th-order pass.
*/
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
if (has4 &&
i0 >= i4_lo && i0 <= i4_hi &&
j0 >= j4_lo && j0 <= j4_hi &&
k0 >= k4_lo && k0 <= k4_hi) {
continue;
}
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}
}
}
}
if (has4) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
}
}
}
// free(fh);
}

View File

@@ -1,167 +0,0 @@
#include "tool.h"
/*
* C 版 fderivs
*
* Fortran:
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
*
* 约定:
* f, fx, fy, fz: ex1*ex2*ex3按 idx_ex 布局
* X: ex1, Y: ex2, Z: ex3
*/
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff; // Fortran 里没用到
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, EIT = 8.0;
const double F12 = 12.0;
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
// Fortran 1-based bounds
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
// SoA(1:3) = SYM1,SYM2,SYM3
const double SoA[3] = { SYM1, SYM2, SYM3 };
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static double *fh = NULL;
static size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// call symmetry_bd(2,ex,f,fh,SoA)
symmetry_bd(2, ex, f, fh, SoA);
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
// fx = fy = fz = 0
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO;
fy[p] = ZEO;
fz[p] = ZEO;
}
/*
* 两段式:
* 1) 先在二阶可用区域计算二阶模板
* 2) 再在高阶可用区域覆盖为四阶模板
*
* 与原 if/elseif 逻辑等价,但减少逐点分支判断。
*/
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i4_hi = ex1 - 3;
const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3;
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d2dx * (
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fy[p] = d2dy * (
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fz[p] = d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
}
}
}
}
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
}
}
}
}
// free(fh);
}

View File

@@ -1,117 +0,0 @@
#include "tool.h"
/*
* C 版 kodis
*
* Fortran signature:
* subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
*
* 约定:
* X: ex1, Y: ex2, Z: ex3
* f, f_rhs: ex1*ex2*ex3 按 idx_ex 布局
* SoA[3]
* eps: double
*/
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps)
{
const double ONE = 1.0, SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, OCTANT = 2;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// Fortran: dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
(void)ONE; // ONE 在原 Fortran 里只是参数,这里不一定用得上
// Fortran: imax=ex(1) 等是 1-based 上界
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran: imin=jmin=kmin=1某些对称情况变 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3),对应 ord=3
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran loops:
* do k=1,ex3
* do j=1,ex2
* do i=1,ex1
*
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
* 并定义 Fortran index: iF=i0+1, ...
*/
// 收紧循环范围:只遍历满足 iF±3/jF±3/kF±3 条件的内部点
// iF-3 >= iminF => iF >= iminF+3 => i0 >= iminF+2 (因为 iF=i0+1)
// iF+3 <= imaxF => iF <= imaxF-3 => i0 <= imaxF-4
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
const int i0_hi = imaxF - 4; // inclusive
const int j0_hi = jmaxF - 4;
const int k0_hi = kmaxF - 4;
if (i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi) {
free(fh);
return;
}
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
const double Dx_term =
( (fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF , jF, kF, ex)] ) / dX;
const double Dy_term =
( (fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF , kF, ex)] ) / dY;
const double Dz_term =
( (fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF , ex)] ) / dZ;
// Fortran:
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
}
free(fh);
}

View File

@@ -7,178 +7,12 @@
#include <string>
#include <cmath>
#include <new>
#include <vector>
using namespace std;
#include "misc.h"
#include "MPatch.h"
#include "Parallel.h"
#include "fmisc.h"
#ifdef INTERP_LB_PROFILE
#include "interp_lb_profile.h"
#endif
namespace
{
struct InterpBlockView
{
Block *bp;
double llb[dim];
double uub[dim];
};
struct BlockBinIndex
{
int bins[dim];
double lo[dim];
double inv[dim];
vector<InterpBlockView> views;
vector<vector<int>> bin_to_blocks;
bool valid;
BlockBinIndex() : valid(false)
{
for (int i = 0; i < dim; i++)
{
bins[i] = 1;
lo[i] = 0.0;
inv[i] = 0.0;
}
}
};
inline int clamp_int(int v, int lo, int hi)
{
return (v < lo) ? lo : ((v > hi) ? hi : v);
}
inline int coord_to_bin(double x, double lo, double inv, int nb)
{
if (nb <= 1 || inv <= 0.0)
return 0;
int b = int(floor((x - lo) * inv));
return clamp_int(b, 0, nb - 1);
}
inline int bin_loc(const BlockBinIndex &index, int b0, int b1, int b2)
{
return b0 + index.bins[0] * (b1 + index.bins[1] * b2);
}
inline bool point_in_block_view(const InterpBlockView &view, const double *pox, const double *DH)
{
for (int i = 0; i < dim; i++)
{
if (pox[i] - view.llb[i] < -DH[i] / 2 || pox[i] - view.uub[i] > DH[i] / 2)
return false;
}
return true;
}
void build_block_bin_index(Patch *patch, const double *DH, BlockBinIndex &index)
{
index = BlockBinIndex();
MyList<Block> *Bp = patch->blb;
while (Bp)
{
Block *BP = Bp->data;
InterpBlockView view;
view.bp = BP;
for (int i = 0; i < dim; i++)
{
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
}
index.views.push_back(view);
if (Bp == patch->ble)
break;
Bp = Bp->next;
}
const int nblocks = int(index.views.size());
if (nblocks <= 0)
return;
int bins_1d = int(ceil(pow(double(nblocks), 1.0 / 3.0)));
bins_1d = clamp_int(bins_1d, 1, 32);
for (int i = 0; i < dim; i++)
{
index.bins[i] = bins_1d;
index.lo[i] = patch->bbox[i] + patch->lli[i] * DH[i];
const double hi = patch->bbox[dim + i] - patch->uui[i] * DH[i];
if (hi > index.lo[i] && bins_1d > 1)
index.inv[i] = bins_1d / (hi - index.lo[i]);
else
index.inv[i] = 0.0;
}
index.bin_to_blocks.resize(index.bins[0] * index.bins[1] * index.bins[2]);
for (int bi = 0; bi < nblocks; bi++)
{
const InterpBlockView &view = index.views[bi];
int bmin[dim], bmax[dim];
for (int d = 0; d < dim; d++)
{
const double low = view.llb[d] - DH[d] / 2;
const double up = view.uub[d] + DH[d] / 2;
bmin[d] = coord_to_bin(low, index.lo[d], index.inv[d], index.bins[d]);
bmax[d] = coord_to_bin(up, index.lo[d], index.inv[d], index.bins[d]);
if (bmax[d] < bmin[d])
{
int t = bmin[d];
bmin[d] = bmax[d];
bmax[d] = t;
}
}
for (int bz = bmin[2]; bz <= bmax[2]; bz++)
for (int by = bmin[1]; by <= bmax[1]; by++)
for (int bx = bmin[0]; bx <= bmax[0]; bx++)
index.bin_to_blocks[bin_loc(index, bx, by, bz)].push_back(bi);
}
index.valid = true;
}
int find_block_index_for_point(const BlockBinIndex &index, const double *pox, const double *DH)
{
if (!index.valid)
return -1;
const int bx = coord_to_bin(pox[0], index.lo[0], index.inv[0], index.bins[0]);
const int by = coord_to_bin(pox[1], index.lo[1], index.inv[1], index.bins[1]);
const int bz = coord_to_bin(pox[2], index.lo[2], index.inv[2], index.bins[2]);
const vector<int> &cand = index.bin_to_blocks[bin_loc(index, bx, by, bz)];
for (size_t ci = 0; ci < cand.size(); ci++)
{
const int bi = cand[ci];
if (point_in_block_view(index.views[bi], pox, DH))
return bi;
}
// Fallback to full scan for numerical edge cases around bin boundaries.
for (size_t bi = 0; bi < index.views.size(); bi++)
if (point_in_block_view(index.views[bi], pox, DH))
return int(bi);
return -1;
}
} // namespace
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
{
@@ -507,9 +341,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
double *Shellf, int Symmetry)
{
// NOTE: we do not Synchnize variables here, make sure of that before calling this routine
int myrank, nprocs;
int myrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
int ordn = 2 * ghost_width;
MyList<var> *varl;
@@ -521,20 +354,24 @@ void Patch::Interp_Points(MyList<var> *VarList,
varl = varl->next;
}
memset(Shellf, 0, sizeof(double) * NN * num_var);
double *shellf;
shellf = new double[NN * num_var];
memset(shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] records which MPI rank owns point j
// All ranks traverse the same block list so they all agree on ownership
int *owner_rank;
owner_rank = new int[NN];
for (int j = 0; j < NN; j++)
owner_rank[j] = -1;
// we use weight to monitor code, later some day we can move it for optimization
int *weight;
weight = new int[NN];
memset(weight, 0, sizeof(int) * NN);
double *DH, *llb, *uub;
DH = new double[dim];
double DH[dim];
for (int i = 0; i < dim; i++)
{
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
}
llb = new double[dim];
uub = new double[dim];
for (int j = 0; j < NN; j++) // run along points
{
@@ -557,11 +394,45 @@ void Patch::Interp_Points(MyList<var> *VarList,
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
MyList<Block> *Bp = blb;
bool notfind = true;
while (notfind && Bp) // run along Blocks
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
Block *BP = Bp->data;
bool flag = true;
for (int i = 0; i < dim; i++)
{
// NOTE: our dividing structure is (exclude ghost)
// -1 0
// 1 2
// so (0,1) does not belong to any part for vertex structure
// here we put (0,0.5) to left part and (0.5,1) to right part
// BUT for cell structure the bbox is (-1.5,0.5) and (0.5,2.5), there is no missing region at all
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{
flag = false;
break;
}
}
if (flag)
{
notfind = false;
if (myrank == BP->rank)
{
//---> interpolation
@@ -569,338 +440,119 @@ void Patch::Interp_Points(MyList<var> *VarList,
int k = 0;
while (varl) // run along variables
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
// pox,ordn,varl->data->SoA,Symmetry);
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
weight[j] = 1;
}
}
if (Bp == ble)
break;
Bp = Bp->next;
}
}
// Replace MPI_Allreduce with per-owner MPI_Bcast:
// Group consecutive points by owner rank and broadcast each group.
// Since each point's data is non-zero only on the owner rank,
// Bcast from owner is equivalent to Allreduce(MPI_SUM) but much cheaper.
MPI_Allreduce(shellf, Shellf, NN * num_var, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
int *Weight;
Weight = new int[NN];
MPI_Allreduce(weight, Weight, NN, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
// misc::tillherecheck("print me");
for (int i = 0; i < NN; i++)
{
int j = 0;
while (j < NN)
{
int cur_owner = owner_rank[j];
if (cur_owner < 0)
if (Weight[i] > 1)
{
if (myrank == 0)
cout << "WARNING: Patch::Interp_Points meets multiple weight" << endl;
for (int j = 0; j < num_var; j++)
Shellf[j + i * num_var] = Shellf[j + i * num_var] / Weight[i];
}
else if (Weight[i] == 0 && myrank == 0)
{
cout << "ERROR: Patch::Interp_Points fails to find point (";
for (int d = 0; d < dim; d++)
for (int j = 0; j < dim; j++)
{
cout << XX[d][j];
if (d < dim - 1)
cout << XX[j][i];
if (j < dim - 1)
cout << ",";
else
cout << ")";
}
cout << " on Patch (";
for (int d = 0; d < dim; d++)
for (int j = 0; j < dim; j++)
{
cout << bbox[d] << "+" << lli[d] * DH[d];
if (d < dim - 1)
cout << bbox[j] << "+" << lli[j] * getdX(j);
if (j < dim - 1)
cout << ",";
else
cout << ")--";
}
cout << "(";
for (int d = 0; d < dim; d++)
for (int j = 0; j < dim; j++)
{
cout << bbox[dim + d] << "-" << uui[d] * DH[d];
if (d < dim - 1)
cout << bbox[dim + j] << "-" << uui[j] * getdX(j);
if (j < dim - 1)
cout << ",";
else
cout << ")" << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
j++;
continue;
}
// Find contiguous run of points with the same owner
int jstart = j;
while (j < NN && owner_rank[j] == cur_owner)
j++;
int count = (j - jstart) * num_var;
MPI_Bcast(Shellf + jstart * num_var, count, MPI_DOUBLE, cur_owner, MPI_COMM_WORLD);
}
}
delete[] owner_rank;
}
void Patch::Interp_Points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry,
int Nmin_consumer, int Nmax_consumer)
{
// Targeted point-to-point overload: each owner sends each point only to
// the one rank that needs it for integration (consumer), reducing
// communication volume by ~nprocs times compared to the Bcast version.
#ifdef INTERP_LB_PROFILE
double t_interp_start = MPI_Wtime();
#endif
int myrank, nprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
int ordn = 2 * ghost_width;
MyList<var> *varl;
int num_var = 0;
varl = VarList;
while (varl)
#if 0
checkBlock();
#else
cout << "splited domains:" << endl;
{
num_var++;
varl = varl->next;
}
memset(Shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] records which MPI rank owns point j
int *owner_rank;
owner_rank = new int[NN];
for (int j = 0; j < NN; j++)
owner_rank[j] = -1;
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
// --- Interpolation phase (identical to original) ---
for (int j = 0; j < NN; j++)
MyList<Block> *Bp = blb;
while (Bp)
{
double pox[dim];
Block *BP = Bp->data;
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
varl = VarList;
int k = 0;
while (varl)
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
}
#ifdef INTERP_LB_PROFILE
double t_interp_end = MPI_Wtime();
double t_interp_local = t_interp_end - t_interp_start;
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
// --- Error check for unfound points ---
for (int j = 0; j < NN; j++)
{
if (owner_rank[j] < 0 && myrank == 0)
{
cout << "ERROR: Patch::Interp_Points fails to find point (";
for (int d = 0; d < dim; d++)
{
cout << XX[d][j];
if (d < dim - 1)
cout << ",";
else
cout << ")";
}
cout << " on Patch (";
for (int d = 0; d < dim; d++)
{
cout << bbox[d] << "+" << lli[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")--";
}
cout << "(";
for (int d = 0; d < dim; d++)
for (int j = 0; j < dim; j++)
{
cout << bbox[dim + d] << "-" << uui[d] * DH[d];
if (d < dim - 1)
cout << llb[j] << ":" << uub[j];
if (j < dim - 1)
cout << ",";
else
cout << ")" << endl;
}
if (Bp == ble)
break;
Bp = Bp->next;
}
}
#endif
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// --- Targeted point-to-point communication phase ---
// Compute consumer_rank[j] using the same deterministic formula as surface_integral
int *consumer_rank = new int[NN];
{
int mp = NN / nprocs;
int Lp = NN - nprocs * mp;
for (int j = 0; j < NN; j++)
{
if (j < Lp * (mp + 1))
consumer_rank[j] = j / (mp + 1);
else
consumer_rank[j] = Lp + (j - Lp * (mp + 1)) / mp;
}
}
// Count sends and recvs per rank
int *send_count = new int[nprocs];
int *recv_count = new int[nprocs];
memset(send_count, 0, sizeof(int) * nprocs);
memset(recv_count, 0, sizeof(int) * nprocs);
for (int j = 0; j < NN; j++)
{
int own = owner_rank[j];
int con = consumer_rank[j];
if (own == con)
continue; // local — no communication needed
if (own == myrank)
send_count[con]++;
if (con == myrank)
recv_count[own]++;
}
// Build send buffers: for each destination rank, pack (index, data) pairs
// Each entry: 1 int (point index j) + num_var doubles
int total_send = 0, total_recv = 0;
int *send_offset = new int[nprocs];
int *recv_offset = new int[nprocs];
for (int r = 0; r < nprocs; r++)
{
send_offset[r] = total_send;
total_send += send_count[r];
recv_offset[r] = total_recv;
total_recv += recv_count[r];
}
// Pack send buffers: each message contains (j, data[0..num_var-1]) per point
int stride = 1 + num_var; // 1 double for index + num_var doubles for data
double *sendbuf = new double[total_send * stride];
double *recvbuf = new double[total_recv * stride];
// Temporary counters for packing
int *pack_pos = new int[nprocs];
memset(pack_pos, 0, sizeof(int) * nprocs);
for (int j = 0; j < NN; j++)
{
int own = owner_rank[j];
int con = consumer_rank[j];
if (own != myrank || con == myrank)
continue;
int pos = (send_offset[con] + pack_pos[con]) * stride;
sendbuf[pos] = (double)j; // point index
for (int v = 0; v < num_var; v++)
sendbuf[pos + 1 + v] = Shellf[j * num_var + v];
pack_pos[con]++;
}
// Post non-blocking recvs and sends
int n_req = 0;
for (int r = 0; r < nprocs; r++)
{
if (recv_count[r] > 0) n_req++;
if (send_count[r] > 0) n_req++;
}
MPI_Request *reqs = new MPI_Request[n_req];
int req_idx = 0;
for (int r = 0; r < nprocs; r++)
{
if (recv_count[r] > 0)
{
MPI_Irecv(recvbuf + recv_offset[r] * stride,
recv_count[r] * stride, MPI_DOUBLE,
r, 0, MPI_COMM_WORLD, &reqs[req_idx++]);
}
}
for (int r = 0; r < nprocs; r++)
{
if (send_count[r] > 0)
{
MPI_Isend(sendbuf + send_offset[r] * stride,
send_count[r] * stride, MPI_DOUBLE,
r, 0, MPI_COMM_WORLD, &reqs[req_idx++]);
}
}
if (n_req > 0)
MPI_Waitall(n_req, reqs, MPI_STATUSES_IGNORE);
// Unpack recv buffers into Shellf
for (int i = 0; i < total_recv; i++)
{
int pos = i * stride;
int j = (int)recvbuf[pos];
for (int v = 0; v < num_var; v++)
Shellf[j * num_var + v] = recvbuf[pos + 1 + v];
}
delete[] reqs;
delete[] sendbuf;
delete[] recvbuf;
delete[] pack_pos;
delete[] send_offset;
delete[] recv_offset;
delete[] send_count;
delete[] recv_count;
delete[] consumer_rank;
delete[] owner_rank;
#ifdef INTERP_LB_PROFILE
{
static bool profile_written = false;
if (!profile_written) {
double *all_times = nullptr;
if (myrank == 0) all_times = new double[nprocs];
MPI_Gather(&t_interp_local, 1, MPI_DOUBLE,
all_times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if (myrank == 0) {
int heavy[64];
int nh = InterpLBProfile::identify_heavy_ranks(
all_times, nprocs, 2.5, heavy, 64);
InterpLBProfile::write_profile(
"interp_lb_profile.bin", nprocs,
all_times, heavy, nh, 2.5);
printf("[InterpLB] Profile written: %d heavy ranks\n", nh);
for (int i = 0; i < nh; i++)
printf(" Heavy rank %d: %.6f s\n", heavy[i], all_times[heavy[i]]);
delete[] all_times;
}
profile_written = true;
}
}
#endif
delete[] shellf;
delete[] weight;
delete[] Weight;
delete[] DH;
delete[] llb;
delete[] uub;
}
void Patch::Interp_Points(MyList<var> *VarList,
int NN, double **XX,
@@ -921,24 +573,24 @@ void Patch::Interp_Points(MyList<var> *VarList,
varl = varl->next;
}
memset(Shellf, 0, sizeof(double) * NN * num_var);
double *shellf;
shellf = new double[NN * num_var];
memset(shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] stores the global rank that owns point j
int *owner_rank;
owner_rank = new int[NN];
for (int j = 0; j < NN; j++)
owner_rank[j] = -1;
// we use weight to monitor code, later some day we can move it for optimization
int *weight;
weight = new int[NN];
memset(weight, 0, sizeof(int) * NN);
// Build global-to-local rank translation for Comm_here
MPI_Group world_group, local_group;
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Comm_group(Comm_here, &local_group);
double *DH, *llb, *uub;
DH = new double[dim];
double DH[dim];
for (int i = 0; i < dim; i++)
{
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
}
llb = new double[dim];
uub = new double[dim];
for (int j = 0; j < NN; j++) // run along points
{
@@ -961,11 +613,45 @@ void Patch::Interp_Points(MyList<var> *VarList,
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
MyList<Block> *Bp = blb;
bool notfind = true;
while (notfind && Bp) // run along Blocks
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
Block *BP = Bp->data;
bool flag = true;
for (int i = 0; i < dim; i++)
{
// NOTE: our dividing structure is (exclude ghost)
// -1 0
// 1 2
// so (0,1) does not belong to any part for vertex structure
// here we put (0,0.5) to left part and (0.5,1) to right part
// BUT for cell structure the bbox is (-1.5,0.5) and (0.5,2.5), there is no missing region at all
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{
flag = false;
break;
}
}
if (flag)
{
notfind = false;
if (myrank == BP->rank)
{
//---> interpolation
@@ -973,44 +659,113 @@ void Patch::Interp_Points(MyList<var> *VarList,
int k = 0;
while (varl) // run along variables
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
// pox,ordn,varl->data->SoA,Symmetry);
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
weight[j] = 1;
}
}
}
// Collect unique global owner ranks and translate to local ranks in Comm_here
// Then broadcast each owner's points via MPI_Bcast on Comm_here
{
int j = 0;
while (j < NN)
{
int cur_owner_global = owner_rank[j];
if (cur_owner_global < 0)
{
// Point not found — skip (error check disabled for sub-communicator levels)
j++;
continue;
}
// Translate global rank to local rank in Comm_here
int cur_owner_local;
MPI_Group_translate_ranks(world_group, 1, &cur_owner_global, local_group, &cur_owner_local);
// Find contiguous run of points with the same owner
int jstart = j;
while (j < NN && owner_rank[j] == cur_owner_global)
j++;
int count = (j - jstart) * num_var;
MPI_Bcast(Shellf + jstart * num_var, count, MPI_DOUBLE, cur_owner_local, Comm_here);
if (Bp == ble)
break;
Bp = Bp->next;
}
}
MPI_Group_free(&world_group);
MPI_Group_free(&local_group);
delete[] owner_rank;
MPI_Allreduce(shellf, Shellf, NN * num_var, MPI_DOUBLE, MPI_SUM, Comm_here);
int *Weight;
Weight = new int[NN];
MPI_Allreduce(weight, Weight, NN, MPI_INT, MPI_SUM, Comm_here);
// misc::tillherecheck("print me");
// if(lmyrank == 0) cout<<"myrank = "<<myrank<<"print me"<<endl;
for (int i = 0; i < NN; i++)
{
if (Weight[i] > 1)
{
if (lmyrank == 0)
cout << "WARNING: Patch::Interp_Points meets multiple weight" << endl;
for (int j = 0; j < num_var; j++)
Shellf[j + i * num_var] = Shellf[j + i * num_var] / Weight[i];
}
#if 0 // for not involved levels, this may fail
else if(Weight[i] == 0 && lmyrank == 0)
{
cout<<"ERROR: Patch::Interp_Points fails to find point (";
for(int j=0;j<dim;j++)
{
cout<<XX[j][i];
if(j<dim-1) cout<<",";
else cout<<")";
}
cout<<" on Patch (";
for(int j=0;j<dim;j++)
{
cout<<bbox[j]<<"+"<<lli[j]*getdX(j);
if(j<dim-1) cout<<",";
else cout<<")--";
}
cout<<"(";
for(int j=0;j<dim;j++)
{
cout<<bbox[dim+j]<<"-"<<uui[j]*getdX(j);
if(j<dim-1) cout<<",";
else cout<<")"<<endl;
}
#if 0
checkBlock();
#else
cout<<"splited domains:"<<endl;
{
MyList<Block> *Bp=blb;
while(Bp)
{
Block *BP=Bp->data;
for(int i=0;i<dim;i++)
{
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
llb[i] = (feq(BP->bbox[i] ,bbox[i] ,DH[i]/2)) ? BP->bbox[i]+lli[i]*DH[i] : BP->bbox[i] +(ghost_width-0.5)*DH[i];
uub[i] = (feq(BP->bbox[dim+i],bbox[dim+i],DH[i]/2)) ? BP->bbox[dim+i]-uui[i]*DH[i] : BP->bbox[dim+i]-(ghost_width-0.5)*DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i] ,bbox[i] ,DH[i]/2)) ? BP->bbox[i]+lli[i]*DH[i] : BP->bbox[i] +ghost_width*DH[i];
uub[i] = (feq(BP->bbox[dim+i],bbox[dim+i],DH[i]/2)) ? BP->bbox[dim+i]-uui[i]*DH[i] : BP->bbox[dim+i]-ghost_width*DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
}
cout<<"(";
for(int j=0;j<dim;j++)
{
cout<<llb[j]<<":"<<uub[j];
if(j<dim-1) cout<<",";
else cout<<")"<<endl;
}
if(Bp == ble) break;
Bp=Bp->next;
}
}
#endif
MPI_Abort(MPI_COMM_WORLD,1);
}
#endif
}
delete[] shellf;
delete[] weight;
delete[] Weight;
delete[] DH;
delete[] llb;
delete[] uub;
}
void Patch::checkBlock()
{

View File

@@ -39,10 +39,6 @@ public:
bool Find_Point(double *XX);
void Interp_Points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry,
int Nmin_consumer, int Nmax_consumer);
void Interp_Points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry, MPI_Comm Comm_here);

View File

@@ -32,16 +32,6 @@ namespace Parallel
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
MyList<Block> *distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0);
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
int ib0_orig, int ib3_orig,
int jb1_orig, int jb4_orig,
int kb2_orig, int kb5_orig,
Patch* PP, int r_left, int r_right,
int ingfsi, int fngfsi, bool periodic,
Block* &split_first_block, Block* &split_last_block);
Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
int block_id, int ingfsi, int fngfsi, int lev);
void KillBlocks(MyList<Patch> *PatchLIST);
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
@@ -91,49 +81,6 @@ namespace Parallel
int Symmetry);
void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry);
void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
void Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
struct SyncCache {
bool valid;
int cpusize;
MyList<gridseg> **combined_src;
MyList<gridseg> **combined_dst;
int *send_lengths;
int *recv_lengths;
double **send_bufs;
double **recv_bufs;
int *send_buf_caps;
int *recv_buf_caps;
MPI_Request *reqs;
MPI_Status *stats;
int max_reqs;
bool lengths_valid;
int *tc_req_node;
int *tc_req_is_recv;
int *tc_completed;
SyncCache();
void invalidate();
void destroy();
};
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
MyList<var> *VarList1, MyList<var> *VarList2,
int Symmetry, SyncCache &cache);
struct AsyncSyncState {
int req_no;
bool active;
int *req_node;
int *req_is_recv;
int pending_recv;
AsyncSyncState() : req_no(0), active(false), req_node(0), req_is_recv(0), pending_recv(0) {}
};
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
SyncCache &cache, AsyncSyncState &state);
void Sync_finish(SyncCache &cache, AsyncSyncState &state,
MyList<var> *VarList, int Symmetry);
void OutBdLow2Hi(Patch *Patc, Patch *Patf,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
int Symmetry);
@@ -146,15 +93,6 @@ namespace Parallel
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
int Symmetry);
void Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<var> *VarList1, MyList<var> *VarList2,
int Symmetry, SyncCache &cache);
void OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<var> *VarList1, MyList<var> *VarList2,
int Symmetry, SyncCache &cache);
void OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<var> *VarList1, MyList<var> *VarList2,
int Symmetry, SyncCache &cache);
void Prolong(Patch *Patc, Patch *Patf,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
int Symmetry);
@@ -183,7 +121,6 @@ namespace Parallel
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
double L2Norm(Patch *Pat, var *vf);
void L2Norm7(Patch *Pat, var **vf, double *norms);
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
void checkvarl(MyList<var> *pp, bool first_only);
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
@@ -219,7 +156,6 @@ namespace Parallel
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here);
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry, MPI_Comm Comm_here);

View File

@@ -1,212 +0,0 @@
#include "rungekutta4_rout.h"
#include <cstdio>
#include <cstdlib>
#include <cstddef>
#include <complex>
#include <immintrin.h>
namespace {
inline void rk4_stage0(std::size_t n,
const double *__restrict f0,
const double *__restrict frhs,
double *__restrict f1,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d vr = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, vr, v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d vr = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, vr, v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * frhs[i];
}
}
inline void rk4_rhs_accum(std::size_t n,
const double *__restrict f1,
double *__restrict frhs) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d v2 = _mm512_set1_pd(2.0);
for (; i + 7 < n; i += 8) {
const __m512d v1 = _mm512_loadu_pd(f1 + i);
const __m512d vrhs = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(frhs + i, _mm512_fmadd_pd(v2, v1, vrhs));
}
#elif defined(__AVX2__)
const __m256d v2 = _mm256_set1_pd(2.0);
for (; i + 3 < n; i += 4) {
const __m256d v1 = _mm256_loadu_pd(f1 + i);
const __m256d vrhs = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(frhs + i, _mm256_fmadd_pd(v2, v1, vrhs));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
frhs[i] = frhs[i] + 2.0 * f1[i];
}
}
inline void rk4_f1_from_f0_f1(std::size_t n,
const double *__restrict f0,
double *__restrict f1,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d v1 = _mm512_loadu_pd(f1 + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, v1, v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d v1 = _mm256_loadu_pd(f1 + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, v1, v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * f1[i];
}
}
inline void rk4_stage3(std::size_t n,
const double *__restrict f0,
double *__restrict f1,
const double *__restrict frhs,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d v1 = _mm512_loadu_pd(f1 + i);
const __m512d vr = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, _mm512_add_pd(v1, vr), v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d v1 = _mm256_loadu_pd(f1 + i);
const __m256d vr = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, _mm256_add_pd(v1, vr), v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * (f1[i] + frhs[i]);
}
}
} // namespace
extern "C" {
void f_rungekutta4_scalar(double &dT, double &f0, double &f1, double &f_rhs, int &RK4) {
constexpr double F1o6 = 1.0 / 6.0;
constexpr double HLF = 0.5;
constexpr double TWO = 2.0;
switch (RK4) {
case 0:
f1 = f0 + HLF * dT * f_rhs;
break;
case 1:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + HLF * dT * f1;
break;
case 2:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + dT * f1;
break;
case 3:
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
break;
default:
std::fprintf(stderr, "rungekutta4_scalar_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
}
void rungekutta4_cplxscalar_(double &dT,
std::complex<double> &f0,
std::complex<double> &f1,
std::complex<double> &f_rhs,
int &RK4) {
constexpr double F1o6 = 1.0 / 6.0;
constexpr double HLF = 0.5;
constexpr double TWO = 2.0;
switch (RK4) {
case 0:
f1 = f0 + HLF * dT * f_rhs;
break;
case 1:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + HLF * dT * f1;
break;
case 2:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + dT * f1;
break;
case 3:
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
break;
default:
std::fprintf(stderr, "rungekutta4_cplxscalar_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
}
int f_rungekutta4_rout(int *ex, double &dT,
double *f0, double *f1, double *f_rhs,
int &RK4) {
const std::size_t n = static_cast<std::size_t>(ex[0]) *
static_cast<std::size_t>(ex[1]) *
static_cast<std::size_t>(ex[2]);
const double *const __restrict f0r = f0;
double *const __restrict f1r = f1;
double *const __restrict frhs = f_rhs;
if (__builtin_expect(static_cast<unsigned>(RK4) > 3u, 0)) {
std::fprintf(stderr, "rungekutta4_rout_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
switch (RK4) {
case 0:
rk4_stage0(n, f0r, frhs, f1r, 0.5 * dT);
break;
case 1:
rk4_rhs_accum(n, f1r, frhs);
rk4_f1_from_f0_f1(n, f0r, f1r, 0.5 * dT);
break;
case 2:
rk4_rhs_accum(n, f1r, frhs);
rk4_f1_from_f0_f1(n, f0r, f1r, dT);
break;
default:
rk4_stage3(n, f0r, f1r, frhs, (1.0 / 6.0) * dT);
break;
}
return 0;
}
} // extern "C"

View File

@@ -3472,43 +3472,6 @@ double ShellPatch::L2Norm(var *vf)
return tvf;
}
void ShellPatch::L2Norm7(var **vf, double *norms)
{
double tvf[7], dtvf[7];
int BDW = overghost;
for (int i = 0; i < 7; i++)
dtvf[i] = 0;
MyList<ss_patch> *sPp = PatL;
while (sPp)
{
MyList<Block> *Bp = sPp->data->blb;
while (Bp)
{
Block *cg = Bp->data;
if (myrank == cg->rank)
{
f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn],
cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn],
cg->fgfs[vf[6]->sgfn], tvf, BDW);
for (int i = 0; i < 7; i++)
dtvf[i] += tvf[i];
}
if (Bp == sPp->data->ble)
break;
Bp = Bp->next;
}
sPp = sPp->next;
}
MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
for (int i = 0; i < 7; i++)
norms[i] = sqrt(tvf[i]);
}
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,

View File

@@ -198,7 +198,6 @@ public:
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
char *filename, int sst);
double L2Norm(var *vf);
void L2Norm7(var **vf, double *norms);
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
};

View File

@@ -1,8 +1,7 @@
#ifndef TWO_PUNCTURES_H
#define TWO_PUNCTURES_H
#include <omp.h>
#define StencilSize 19
#define N_PlaneRelax 1
#define NRELAX 200
@@ -43,18 +42,6 @@ private:
int ntotal;
// ===== Precomputed spectral derivative matrices =====
double *D1_A, *D2_A;
double *D1_B, *D2_B;
double *DF1_phi, *DF2_phi;
// ===== Pre-allocated workspace for LineRelax (per-thread) =====
int max_threads;
double **ws_diag_be, **ws_e_be, **ws_f_be, **ws_b_be, **ws_x_be;
double **ws_l_be, **ws_u_be, **ws_d_be, **ws_y_be;
double **ws_diag_al, **ws_e_al, **ws_f_al, **ws_b_al, **ws_x_al;
double **ws_l_al, **ws_u_al, **ws_d_al, **ws_y_al;
struct parameters
{
int nvar, n1, n2, n3;
@@ -71,28 +58,6 @@ public:
int Newtonmaxit);
~TwoPunctures();
// 02/07: New/modified methods
void allocate_workspace();
void free_workspace();
void precompute_derivative_matrices();
void build_cheb_deriv_matrices(int n, double *D1, double *D2);
void build_fourier_deriv_matrices(int N, double *DF1, double *DF2);
void Derivatives_AB3_MatMul(int nvar, int n1, int n2, int n3, derivs v);
void ThomasAlgorithm_ws(int N, double *b, double *a, double *c, double *x, double *q,
double *l, double *u_ws, double *d, double *y);
void LineRelax_be_omp(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD, int tid);
void LineRelax_al_omp(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD, int tid);
void relax_omp(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void Solve();
void set_initial_guess(derivs v);
int index(int i, int j, int k, int l, int a, int b, int c, int d);
@@ -151,11 +116,23 @@ public:
double BY_KKofxyz(double x, double y, double z);
void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix);
void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u);
void relax(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void LineRelax_be(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD);
void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2,
int n3, derivs dv, derivs u, double *values);
void LinEquations(double A, double B, double X, double R,
double x, double r, double phi,
double y, double z, derivs dU, derivs U, double *values);
void LineRelax_al(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD);
void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q);
void Save(char *fname);
// provided by Vasileios Paschalidis (vpaschal@illinois.edu)

View File

@@ -48,7 +48,6 @@ public:
double StartTime, TotalTime;
double AnasTime, DumpTime, d2DumpTime, CheckTime;
double LastAnas, LastConsOut;
int *ConstraintRefreshLevels;
double Courant;
double numepss, numepsb, numepsh;
int Symmetry;
@@ -127,15 +126,8 @@ public:
MyList<var> *OldStateList, *DumpList;
MyList<var> *ConstraintList;
Parallel::SyncCache *sync_cache_pre; // per-level cache for predictor sync
Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync
Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1]
Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev]
Parallel::SyncCache *sync_cache_restrict; // cached Restrict in RestrictProlong
Parallel::SyncCache *sync_cache_outbd; // cached OutBdLow2Hi in RestrictProlong
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
monitor *ConVMonitor, *TimingMonitor;
monitor *ConVMonitor;
surface_integral *Waveshell;
checkpoint *CheckPoint;

View File

@@ -32,19 +32,6 @@
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
#define f_compute_constraint_fr compute_constraint_fr_
#endif
#ifdef __cplusplus
extern "C"
{
#endif
void f_bssn_rhs_kernel_timing_reset();
int f_bssn_rhs_kernel_timing_bucket_count();
const double *f_bssn_rhs_kernel_timing_local_seconds();
const char *f_bssn_rhs_kernel_timing_label(int);
#ifdef __cplusplus
}
#endif
extern "C"
{
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z

View File

@@ -130,11 +130,7 @@ void cgh::compose_cgh(int nprocs)
for (int lev = 0; lev < levels; lev++)
{
checkPatchList(PatL[lev], false);
#ifdef INTERP_LB_OPTIMIZE
Parallel::distribute_optimize(PatL[lev], nprocs, ingfs, fngfs, false);
#else
Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false);
#endif
#if (RPB == 1)
// we need distributed box of PatL[lev] and PatL[lev-1]
if (lev > 0)
@@ -1305,13 +1301,13 @@ bool cgh::Interp_One_Point(MyList<var> *VarList,
}
bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
void cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor)
{
if (lev < movls)
return false;
return;
#if (0)
// #if (PSTR == 1 || PSTR == 2)
@@ -1400,7 +1396,7 @@ bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, do
for (bhi = 0; bhi < BH_num; bhi++)
delete[] tmpPorg[bhi];
delete[] tmpPorg;
return false;
return;
}
// x direction
rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX;
@@ -1504,7 +1500,6 @@ bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, do
for (int bhi = 0; bhi < BH_num; bhi++)
delete[] tmpPorg[bhi];
delete[] tmpPorg;
return tot_flag;
}

View File

@@ -74,7 +74,7 @@ public:
MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB);
bool Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor);

View File

@@ -69,10 +69,12 @@
fy = ZEO
fz = ZEO
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
! Intel compiler directives for aggressive vectorization
!DIR$ SIMD
!DIR$ IVDEP
do k=1,ex(3)-1
do j=1,ex(2)-1
!DIR$ SIMD
do i=1,ex(1)-1
! x direction
if(i+1 <= imax .and. i-1 >= imin)then
@@ -373,8 +375,6 @@
fxz = ZEO
fyz = ZEO
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
@@ -962,6 +962,7 @@
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
integer :: ib_s, ib_e, jb_s, jb_e, kb_s, kb_e
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
@@ -1001,105 +1002,66 @@
fy = ZEO
fz = ZEO
ib_s = max(1, imin + 2)
ib_e = min(ex(1)-1, imax - 2)
jb_s = max(1, jmin + 2)
jb_e = min(ex(2)-1, jmax - 2)
kb_s = max(1, kmin + 2)
kb_e = min(ex(3)-1, kmax - 2)
! Intel compiler directives for aggressive vectorization
!DIR$ SIMD
!DIR$ IVDEP
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
#if 0
! x direction
if(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! - f(i-1) + f(i+1)
! fx(i) = --------------------------------
! 2 dx
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
! Check if (j, k) are within the safe 4th-order range
if (k >= kb_s .and. k <= kb_e .and. j >= jb_s .and. j <= jb_e) then
! set imax and imin 0
endif
! y direction
if(j+2 <= jmax .and. j-2 >= jmin)then
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
! set jmax and jmin 0
endif
! z direction
if(k+2 <= kmax .and. k-2 >= kmin)then
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
! set kmax and kmin 0
endif
#elif 0
! x direction
if(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+3 <= imax .and. i-1 >= imin)then
fx(i,j,k)=d12dx*(-3.d0*fh(i-1,j,k)-1.d1*fh(i,j,k)+1.8d1*fh(i+1,j,k)-6.d0*fh(i+2,j,k)+fh(i+3,j,k))
elseif(i+1 <= imax .and. i-3 >= imin)then
fx(i,j,k)=d12dx*( 3.d0*fh(i+1,j,k)+1.d1*fh(i,j,k)-1.8d1*fh(i-1,j,k)+6.d0*fh(i-2,j,k)-fh(i-3,j,k))
! set imax and imin 0
endif
! y direction
if(j+2 <= jmax .and. j-2 >= jmin)then
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+3 <= jmax .and. j-1 >= jmin)then
fy(i,j,k)=d12dy*(-3.d0*fh(i,j-1,k)-1.d1*fh(i,j,k)+1.8d1*fh(i,j+1,k)-6.d0*fh(i,j+2,k)+fh(i,j+3,k))
elseif(j+1 <= jmax .and. j-3 >= jmin)then
fy(i,j,k)=d12dy*( 3.d0*fh(i,j+1,k)+1.d1*fh(i,j,k)-1.8d1*fh(i,j-1,k)+6.d0*fh(i,j-2,k)-fh(i,j-3,k))
! set jmax and jmin 0
endif
! z direction
if(k+2 <= kmax .and. k-2 >= kmin)then
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+3 <= kmax .and. k-1 >= kmin)then
fz(i,j,k)=d12dz*(-3.d0*fh(i,j,k-1)-1.d1*fh(i,j,k)+1.8d1*fh(i,j,k+1)-6.d0*fh(i,j,k+2)+fh(i,j,k+3))
elseif(k+1 <= kmax .and. k-3 >= kmin)then
fz(i,j,k)=d12dz*( 3.d0*fh(i,j,k+1)+1.d1*fh(i,j,k)-1.8d1*fh(i,j,k-1)+6.d0*fh(i,j,k-2)-fh(i,j,k-3))
! set kmax and kmin 0
endif
#else
! for bam comparison
if(i+2 <= imax .and. i-2 >= imin .and. &
j+2 <= jmax .and. j-2 >= jmin .and. &
k+2 <= kmax .and. k-2 >= kmin) then
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(i+1 <= imax .and. i-1 >= imin .and. &
! 1. Left Boundary Peel
do i=1, min(ex(1)-1, ib_s-1)
if(i+1 <= imax .and. i-1 >= imin .and. &
j+1 <= jmax .and. j-1 >= jmin .and. &
k+1 <= kmax .and. k-1 >= kmin) then
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
endif
#endif
enddo
! 2. BULK CORE (No branches, full SIMD)
!DIR$ SIMD
do i=ib_s, ib_e
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
enddo
! 3. Right Boundary Peel
do i=max(1, ib_e+1), ex(1)-1
if(i+1 <= imax .and. i-1 >= imin .and. &
j+1 <= jmax .and. j-1 >= jmin .and. &
k+1 <= kmax .and. k-1 >= kmin) then
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
endif
enddo
else
! We are in a boundary slab (j or k is near edge)
! Fallback to standard loop with checks (max 2nd order possible here)
do i=1,ex(1)-1
if(i+1 <= imax .and. i-1 >= imin .and. &
j+1 <= jmax .and. j-1 >= jmin .and. &
k+1 <= kmax .and. k-1 >= kmin) then
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
endif
enddo
endif
enddo
enddo
@@ -2015,8 +1977,12 @@
fy = ZEO
fz = ZEO
! Intel compiler directives for aggressive vectorization
!DIR$ SIMD
!DIR$ IVDEP
do k=1,ex(3)-1
do j=1,ex(2)-1
!DIR$ SIMD
do i=1,ex(1)-1
! x direction
if(i+3 <= imax .and. i-3 >= imin)then
@@ -3084,8 +3050,12 @@
fy = ZEO
fz = ZEO
! Intel compiler directives for aggressive vectorization
!DIR$ SIMD
!DIR$ IVDEP
do k=1,ex(3)-1
do j=1,ex(2)-1
!DIR$ SIMD
do i=1,ex(1)-1
! x direction
if(i+4 <= imax .and. i-4 >= imin)then

File diff suppressed because it is too large Load Diff

View File

@@ -1513,7 +1513,6 @@
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
integer :: i_core_min,i_core_max,j_core_min,j_core_max,k_core_min,k_core_max
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
@@ -1566,47 +1565,9 @@
fxz = ZEO
fyz = ZEO
i_core_min = max(1, imin+2)
i_core_max = min(ex(1), imax-2)
j_core_min = max(1, jmin+2)
j_core_max = min(ex(2), jmax-2)
k_core_min = max(1, kmin+2)
k_core_max = min(ex(3), kmax-2)
if(i_core_min <= i_core_max .and. j_core_min <= j_core_max .and. k_core_min <= k_core_max)then
do k=k_core_min,k_core_max
do j=j_core_min,j_core_max
do i=i_core_min,i_core_max
! interior points always use 4th-order stencils without branch checks
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
enddo
enddo
enddo
endif
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i>=i_core_min .and. i<=i_core_max .and. &
j>=j_core_min .and. j<=j_core_max .and. &
k>=k_core_min .and. k<=k_core_max) cycle
!~~~~~~ fxx
if(i+2 <= imax .and. i-2 >= imin)then
!

Some files were not shown because too many files have changed in this diff Show More