Compare commits

..

2 Commits

Author SHA1 Message Date
f7ada421cf skip redundant MPI ghost cell syncs for stages 0, 1 & 2
BSSN 每个 RK4 时间步执行 4 次 MPI ghost zone 同步:
Stage 0(预测)结束后:Parallel::Sync(SynchList_pre)
Stage 1(校正 1)结束后:Parallel::Sync(SynchList_cor)
Stage 2(校正 2)结束后:Parallel::Sync(SynchList_cor)
Stage 3(校正 3)结束后:Parallel::Sync(SynchList_cor) ← 必要(为下一步提供 ghost)

bssnEM_class.C、Z4c_class.C 结构相同,一起修改了
2026-02-26 16:16:33 +08:00
fb9f153662 Initialize output arrays to zero in fdderivs_c.C and fderivs_c.C 2026-02-26 11:48:28 +08:00
233 changed files with 190852 additions and 194805 deletions

6
.idea/vcs.xml generated
View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@@ -126,6 +126,11 @@ setup.generate_AMSSNCKU_input()
#inputvalue = input() ## Wait for user input (press Enter) to proceed #inputvalue = input() ## Wait for user input (press Enter) to proceed
#print() #print()
setup.print_puncture_information()
##################################################################
## Generate AMSS-NCKU program input files based on the configured parameters ## Generate AMSS-NCKU program input files based on the configured parameters
print( ) print( )
@@ -265,12 +270,6 @@ if not os.path.exists( ABE_file ):
## Copy the executable ABE (or ABEGPU) into the run directory ## Copy the executable ABE (or ABEGPU) into the run directory
shutil.copy2(ABE_file, output_directory) shutil.copy2(ABE_file, output_directory)
## Copy interp load balance profile if present (for optimize pass)
interp_lb_profile = os.path.join(AMSS_NCKU_source_copy, "interp_lb_profile.bin")
if os.path.exists(interp_lb_profile):
shutil.copy2(interp_lb_profile, output_directory)
print( " Copied interp_lb_profile.bin to run directory " )
########################### ###########################
## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory ## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
@@ -307,7 +306,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
import generate_TwoPuncture_input import generate_TwoPuncture_input
generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input(numerical_grid.puncture_data) generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input()
print( ) print( )
print( " The input parfile for the TwoPunctureABE executable has been generated. " ) print( " The input parfile for the TwoPunctureABE executable has been generated. " )
@@ -349,7 +348,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
import renew_puncture_parameter import renew_puncture_parameter
renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory, numerical_grid.puncture_data) renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory)
## Generated AMSS-NCKU input filename ## Generated AMSS-NCKU input filename

View File

@@ -1,19 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
AMSS-NCKU GW150914 Simulation Regression Test Script (Comprehensive Version) AMSS-NCKU GW150914 Simulation Regression Test Script
Verification Requirements: Verification Requirements:
1. RMS errors < 1% for: 1. XY-plane trajectory RMS error < 1% (Optimized vs. baseline, max of BH1 and BH2)
- 3D Vector Total RMS
- X Component RMS
- Y Component RMS
- Z Component RMS
2. ADM constraint violation < 2 (Grid Level 0) 2. ADM constraint violation < 2 (Grid Level 0)
3. The following figure PDFs must match GW150914-origin exactly after rasterization:
- ADM_Constraint_Grid_Level_0.pdf
- BH_Trajectory_21_XY.pdf
- BH_Trajectory_XY.pdf
The script also reports the percentage of differing pixels for each figure.
RMS Calculation Method: RMS Calculation Method:
- Computes trajectory deviation on the XY plane independently for BH1 and BH2 - Computes trajectory deviation on the XY plane independently for BH1 and BH2
@@ -28,10 +19,6 @@ Reference: GW150914-origin (baseline simulation)
import numpy as np import numpy as np
import sys import sys
import os import os
import shutil
import subprocess
import tempfile
from PIL import Image
# ANSI Color Codes # ANSI Color Codes
class Color: class Color:
@@ -71,187 +58,78 @@ def load_constraint_data(filepath):
return np.array(data) return np.array(data)
def resolve_figure_dir(path): def calculate_rms_error(bh_data_ref, bh_data_target):
"""Resolve the sibling figure directory from an output or figure path."""
normalized = os.path.normpath(path)
if os.path.basename(normalized) == "figure":
return normalized
return os.path.join(os.path.dirname(normalized), "figure")
def render_pdf_to_images(pdf_path, dpi=150):
"""Render a PDF to RGB images using Ghostscript."""
gs_path = shutil.which("gs")
if gs_path is None:
raise RuntimeError("Ghostscript executable 'gs' was not found in PATH")
with tempfile.TemporaryDirectory(prefix="amss_verify_pdf_") as temp_dir:
output_pattern = os.path.join(temp_dir, "page-%03d.ppm")
cmd = [
gs_path,
"-q",
"-dSAFER",
"-dBATCH",
"-dNOPAUSE",
"-sDEVICE=ppmraw",
f"-r{dpi}",
f"-o{output_pattern}",
pdf_path
]
try:
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
except subprocess.CalledProcessError as exc:
message = exc.stderr.strip() or str(exc)
raise RuntimeError(f"Failed to render PDF '{pdf_path}': {message}") from exc
ppm_files = sorted(
os.path.join(temp_dir, filename)
for filename in os.listdir(temp_dir)
if filename.endswith(".ppm")
)
if not ppm_files:
raise RuntimeError(f"No rendered pages were produced for '{pdf_path}'")
images = []
for ppm_file in ppm_files:
with Image.open(ppm_file) as img:
images.append(np.array(img.convert("RGB"), dtype=np.uint8))
return images
def compare_rendered_pages(ref_img, target_img):
"""Return (different_pixels, total_pixels) for two rendered RGB pages."""
ref_h, ref_w = ref_img.shape[:2]
tgt_h, tgt_w = target_img.shape[:2]
total_pixels = max(ref_h, tgt_h) * max(ref_w, tgt_w)
if ref_h == tgt_h and ref_w == tgt_w:
different_pixels = int(np.count_nonzero(np.any(ref_img != target_img, axis=2)))
return different_pixels, total_pixels
diff_mask = np.ones((max(ref_h, tgt_h), max(ref_w, tgt_w)), dtype=bool)
overlap_h = min(ref_h, tgt_h)
overlap_w = min(ref_w, tgt_w)
overlap_diff = np.any(ref_img[:overlap_h, :overlap_w] != target_img[:overlap_h, :overlap_w], axis=2)
diff_mask[:overlap_h, :overlap_w] = overlap_diff
different_pixels = int(np.count_nonzero(diff_mask))
return different_pixels, total_pixels
def compare_pdf_images(ref_pdf, target_pdf, dpi=150, threshold_percent=0.001):
"""Compare two PDFs by rasterizing them and counting differing pixels."""
ref_pages = render_pdf_to_images(ref_pdf, dpi=dpi)
target_pages = render_pdf_to_images(target_pdf, dpi=dpi)
total_pixels = 0
different_pixels = 0
max_pages = max(len(ref_pages), len(target_pages))
for page_idx in range(max_pages):
if page_idx < len(ref_pages) and page_idx < len(target_pages):
page_diff, page_total = compare_rendered_pages(ref_pages[page_idx], target_pages[page_idx])
else:
existing_page = ref_pages[page_idx] if page_idx < len(ref_pages) else target_pages[page_idx]
page_total = existing_page.shape[0] * existing_page.shape[1]
page_diff = page_total
total_pixels += page_total
different_pixels += page_diff
diff_percent = (different_pixels / total_pixels * 100.0) if total_pixels else 0.0
return {
"different_pixels": different_pixels,
"total_pixels": total_pixels,
"diff_percent": diff_percent,
"pages_ref": len(ref_pages),
"pages_target": len(target_pages),
"passed": diff_percent < threshold_percent
}
def compare_required_figures(reference_figure_dir, target_figure_dir):
"""Compare the required GW150914 figure PDFs."""
figure_names = [
"ADM_Constraint_Grid_Level_0.pdf",
"BH_Trajectory_21_XY.pdf",
"BH_Trajectory_XY.pdf"
]
results = []
for figure_name in figure_names:
ref_pdf = os.path.join(reference_figure_dir, figure_name)
target_pdf = os.path.join(target_figure_dir, figure_name)
if not os.path.exists(ref_pdf):
raise FileNotFoundError(f"Reference figure not found: {ref_pdf}")
if not os.path.exists(target_pdf):
raise FileNotFoundError(f"Target figure not found: {target_pdf}")
comparison = compare_pdf_images(ref_pdf, target_pdf)
comparison["name"] = figure_name
results.append(comparison)
return results
def calculate_all_rms_errors(bh_data_ref, bh_data_target):
""" """
Calculate 3D Vector RMS and component-wise RMS (X, Y, Z) independently. Calculate trajectory-based RMS error on the XY plane between baseline and optimized simulations.
Uses r = sqrt(x^2 + y^2) as the denominator for all error normalizations.
Returns the maximum error between BH1 and BH2 for each category. This function computes the RMS error independently for BH1 and BH2 trajectories,
then returns the maximum of the two as the final RMS error metric.
For each black hole, the RMS is calculated as:
RMS = sqrt( (1/M) * sum( (Δr_i / r_i^max)^2 ) ) × 100%
where:
Δr_i = sqrt((x_ref,i - x_new,i)^2 + (y_ref,i - y_new,i)^2)
r_i^max = max(sqrt(x_ref,i^2 + y_ref,i^2), sqrt(x_new,i^2 + y_new,i^2))
Args:
bh_data_ref: Reference (baseline) trajectory data
bh_data_target: Target (optimized) trajectory data
Returns:
rms_value: Final RMS error as a percentage (max of BH1 and BH2)
error: Error message if any
""" """
# Align data: truncate to the length of the shorter dataset
M = min(len(bh_data_ref['time']), len(bh_data_target['time'])) M = min(len(bh_data_ref['time']), len(bh_data_target['time']))
if M < 10: if M < 10:
return None, "Insufficient data points for comparison" return None, "Insufficient data points for comparison"
results = {} # Extract XY coordinates for both black holes
x1_ref = bh_data_ref['x1'][:M]
y1_ref = bh_data_ref['y1'][:M]
x2_ref = bh_data_ref['x2'][:M]
y2_ref = bh_data_ref['y2'][:M]
for bh in ['1', '2']: x1_new = bh_data_target['x1'][:M]
x_r, y_r, z_r = bh_data_ref[f'x{bh}'][:M], bh_data_ref[f'y{bh}'][:M], bh_data_ref[f'z{bh}'][:M] y1_new = bh_data_target['y1'][:M]
x_n, y_n, z_n = bh_data_target[f'x{bh}'][:M], bh_data_target[f'y{bh}'][:M], bh_data_target[f'z{bh}'][:M] x2_new = bh_data_target['x2'][:M]
y2_new = bh_data_target['y2'][:M]
# 核心修改:根据组委会的邮件指示,分母统一使用 r = sqrt(x^2 + y^2) # Calculate RMS for BH1
r_ref = np.sqrt(x_r**2 + y_r**2) delta_r1 = np.sqrt((x1_ref - x1_new)**2 + (y1_ref - y1_new)**2)
r_new = np.sqrt(x_n**2 + y_n**2) r1_ref = np.sqrt(x1_ref**2 + y1_ref**2)
denom_max = np.maximum(r_ref, r_new) r1_new = np.sqrt(x1_new**2 + y1_new**2)
r1_max = np.maximum(r1_ref, r1_new)
valid = denom_max > 1e-15 # Calculate RMS for BH2
if np.sum(valid) < 10: delta_r2 = np.sqrt((x2_ref - x2_new)**2 + (y2_ref - y2_new)**2)
results[f'BH{bh}'] = { '3D_Vector': 0.0, 'X_Component': 0.0, 'Y_Component': 0.0, 'Z_Component': 0.0 } r2_ref = np.sqrt(x2_ref**2 + y2_ref**2)
continue r2_new = np.sqrt(x2_new**2 + y2_new**2)
r2_max = np.maximum(r2_ref, r2_new)
def calc_rms(delta): # Avoid division by zero for BH1
# 将对应分量的偏差除以统一的轨道半径分母 denom_max valid_mask1 = r1_max > 1e-15
return np.sqrt(np.mean((delta[valid] / denom_max[valid])**2)) * 100 if np.sum(valid_mask1) < 10:
return None, "Insufficient valid data points for BH1"
# 1. Total 3D Vector RMS terms1 = (delta_r1[valid_mask1] / r1_max[valid_mask1])**2
delta_vec = np.sqrt((x_r - x_n)**2 + (y_r - y_n)**2 + (z_r - z_n)**2) rms_bh1 = np.sqrt(np.mean(terms1)) * 100
rms_3d = calc_rms(delta_vec)
# 2. Component-wise RMS (分离计算各轴,但共用半径分母) # Avoid division by zero for BH2
rms_x = calc_rms(np.abs(x_r - x_n)) valid_mask2 = r2_max > 1e-15
rms_y = calc_rms(np.abs(y_r - y_n)) if np.sum(valid_mask2) < 10:
rms_z = calc_rms(np.abs(z_r - z_n)) return None, "Insufficient valid data points for BH2"
results[f'BH{bh}'] = { terms2 = (delta_r2[valid_mask2] / r2_max[valid_mask2])**2
'3D_Vector': rms_3d, rms_bh2 = np.sqrt(np.mean(terms2)) * 100
'X_Component': rms_x,
'Y_Component': rms_y,
'Z_Component': rms_z
}
# 获取 BH1 BH2 中的最大误差 # Final RMS is the maximum of BH1 and BH2
max_rms = { rms_final = max(rms_bh1, rms_bh2)
'3D_Vector': max(results['BH1']['3D_Vector'], results['BH2']['3D_Vector']),
'X_Component': max(results['BH1']['X_Component'], results['BH2']['X_Component']), return rms_final, None
'Y_Component': max(results['BH1']['Y_Component'], results['BH2']['Y_Component']),
'Z_Component': max(results['BH1']['Z_Component'], results['BH2']['Z_Component'])
}
return max_rms, None
def analyze_constraint_violation(constraint_data, n_levels=9): def analyze_constraint_violation(constraint_data, n_levels=9):
""" """
@@ -277,32 +155,34 @@ def analyze_constraint_violation(constraint_data, n_levels=9):
def print_header(): def print_header():
"""Print report header"""
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
print(Color.BOLD + " AMSS-NCKU GW150914 Comprehensive Regression Test" + Color.RESET) print(Color.BOLD + " AMSS-NCKU GW150914 Simulation Regression Test Report" + Color.RESET)
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
def print_rms_results(rms_dict, error, threshold=1.0):
print(f"\n{Color.BOLD}1. RMS Error Analysis (Maximums of BH1 & BH2){Color.RESET}") def print_rms_results(rms_rel, error, threshold=1.0):
print("-" * 65) """Print RMS error results"""
print(f"\n{Color.BOLD}1. RMS Error Analysis (Baseline vs Optimized){Color.RESET}")
print("-" * 45)
if error: if error:
print(f" {Color.RED}Error: {error}{Color.RESET}") print(f" {Color.RED}Error: {error}{Color.RESET}")
return False return False
all_passed = True passed = rms_rel < threshold
print(f" Requirement: < {threshold}%\n")
for key, val in rms_dict.items(): print(f" RMS relative error: {rms_rel:.4f}%")
passed = val < threshold print(f" Requirement: < {threshold}%")
all_passed = all_passed and passed print(f" Status: {get_status_text(passed)}")
status = get_status_text(passed)
print(f" {key:15}: {val:8.4f}% | Status: {status}") return passed
return all_passed
def print_constraint_results(results, threshold=2.0): def print_constraint_results(results, threshold=2.0):
"""Print constraint violation results"""
print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}") print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}")
print("-" * 65) print("-" * 45)
names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz'] names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz']
for i, name in enumerate(names): for i, name in enumerate(names):
@@ -319,45 +199,19 @@ def print_constraint_results(results, threshold=2.0):
return passed return passed
def print_figure_results(results, threshold_percent=0.001): def print_summary(rms_passed, constraint_passed):
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}") """Print summary"""
print("-" * 65)
print(f" Requirement: < {threshold_percent:.3f}% differing pixels\n")
all_passed = True
for result in results:
passed = result["passed"]
all_passed = all_passed and passed
status = get_status_text(passed)
print(f" {result['name']:32}: {result['diff_percent']:10.6f}% | Status: {status}")
if result["pages_ref"] != result["pages_target"]:
print(f" {'':32} pages(ref/target): {result['pages_ref']}/{result['pages_target']}")
return all_passed
def print_figure_error(error_message):
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
print("-" * 65)
print(f" {Color.RED}Error: {error_message}{Color.RESET}")
return False
def print_summary(rms_passed, constraint_passed, figure_passed):
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
print(Color.BOLD + "Verification Summary" + Color.RESET) print(Color.BOLD + "Verification Summary" + Color.RESET)
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
all_passed = rms_passed and constraint_passed and figure_passed all_passed = rms_passed and constraint_passed
res_rms = get_status_text(rms_passed) res_rms = get_status_text(rms_passed)
res_con = get_status_text(constraint_passed) res_con = get_status_text(constraint_passed)
res_fig = get_status_text(figure_passed)
print(f" [1] Comprehensive RMS check: {res_rms}") print(f" [1] RMS trajectory check: {res_rms}")
print(f" [2] ADM constraint check: {res_con}") print(f" [2] ADM constraint check: {res_con}")
print(f" [3] Figure pixel comparison: {res_fig}")
final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}" final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
print(f"\n Overall result: {final_status}") print(f"\n Overall result: {final_status}")
@@ -365,58 +219,61 @@ def print_summary(rms_passed, constraint_passed, figure_passed):
return all_passed return all_passed
def main(): def main():
# Determine target (optimized) output directory
if len(sys.argv) > 1: if len(sys.argv) > 1:
target_dir = sys.argv[1] target_dir = sys.argv[1]
else: else:
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output") target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")
# Determine reference (baseline) directory
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output") reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
target_figure_dir = resolve_figure_dir(target_dir)
reference_figure_dir = os.path.join(script_dir, "GW150914-origin/figure")
# Data file paths
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat") bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
bh_file_target = os.path.join(target_dir, "bssn_BH.dat") bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
constraint_file = os.path.join(target_dir, "bssn_constraint.dat") constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
# Check if files exist
if not os.path.exists(bh_file_ref): if not os.path.exists(bh_file_ref):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}") print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}")
sys.exit(1) sys.exit(1)
if not os.path.exists(bh_file_target): if not os.path.exists(bh_file_target):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}") print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}")
sys.exit(1) sys.exit(1)
if not os.path.exists(constraint_file): if not os.path.exists(constraint_file):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}") print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}")
sys.exit(1) sys.exit(1)
# Print header
print_header() print_header()
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}") print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}") print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
print(f"{Color.BOLD}Reference Figures: {Color.RESET} {Color.BLUE}{reference_figure_dir}{Color.RESET}")
print(f"{Color.BOLD}Target Figures: {Color.RESET} {Color.BLUE}{target_figure_dir}{Color.RESET}")
# Load data
bh_data_ref = load_bh_trajectory(bh_file_ref) bh_data_ref = load_bh_trajectory(bh_file_ref)
bh_data_target = load_bh_trajectory(bh_file_target) bh_data_target = load_bh_trajectory(bh_file_target)
constraint_data = load_constraint_data(constraint_file) constraint_data = load_constraint_data(constraint_file)
# Output modified RMS results # Calculate RMS error
rms_dict, error = calculate_all_rms_errors(bh_data_ref, bh_data_target) rms_rel, error = calculate_rms_error(bh_data_ref, bh_data_target)
rms_passed = print_rms_results(rms_dict, error) rms_passed = print_rms_results(rms_rel, error)
# Output constraint results # Analyze constraint violation
constraint_results = analyze_constraint_violation(constraint_data) constraint_results = analyze_constraint_violation(constraint_data)
constraint_passed = print_constraint_results(constraint_results) constraint_passed = print_constraint_results(constraint_results)
try: # Print summary
figure_results = compare_required_figures(reference_figure_dir, target_figure_dir) all_passed = print_summary(rms_passed, constraint_passed)
figure_passed = print_figure_results(figure_results)
except (FileNotFoundError, RuntimeError) as exc:
figure_passed = print_figure_error(str(exc))
all_passed = print_summary(rms_passed, constraint_passed, figure_passed) # Return exit code
sys.exit(0 if all_passed else 1) sys.exit(0 if all_passed else 1)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -1,248 +0,0 @@
#include "tool.h"
/*
* Combined advection (lopsided) + KO dissipation (kodis).
* Uses one shared symmetry_bd buffer per call.
*/
void lopsided_kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3], double eps)
{
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, EQ_SYMM = 1;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
// fh for Fortran-style domain (-2:ex1,-2:ex2,-2:ex3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(3, ex, f, fh, SoA);
// Advection (same stencil logic as lopsided_c.C)
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
} else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
} else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
} else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
} else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
}
// KO dissipation (same domain restriction as kodiss_c.C)
if (eps > ZEO) {
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
const int i0_hi = imaxF - 4; // inclusive
const int j0_hi = jmaxF - 4;
const int k0_hi = kmaxF - 4;
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double Dx_term =
((fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dX;
const double Dy_term =
((fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dY;
const double Dz_term =
((fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dZ;
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
}
}
}
free(fh);
}

View File

@@ -7,178 +7,12 @@
#include <string> #include <string>
#include <cmath> #include <cmath>
#include <new> #include <new>
#include <vector>
using namespace std; using namespace std;
#include "misc.h" #include "misc.h"
#include "MPatch.h" #include "MPatch.h"
#include "Parallel.h" #include "Parallel.h"
#include "fmisc.h" #include "fmisc.h"
#ifdef INTERP_LB_PROFILE
#include "interp_lb_profile.h"
#endif
namespace
{
struct InterpBlockView
{
Block *bp;
double llb[dim];
double uub[dim];
};
struct BlockBinIndex
{
int bins[dim];
double lo[dim];
double inv[dim];
vector<InterpBlockView> views;
vector<vector<int>> bin_to_blocks;
bool valid;
BlockBinIndex() : valid(false)
{
for (int i = 0; i < dim; i++)
{
bins[i] = 1;
lo[i] = 0.0;
inv[i] = 0.0;
}
}
};
inline int clamp_int(int v, int lo, int hi)
{
return (v < lo) ? lo : ((v > hi) ? hi : v);
}
inline int coord_to_bin(double x, double lo, double inv, int nb)
{
if (nb <= 1 || inv <= 0.0)
return 0;
int b = int(floor((x - lo) * inv));
return clamp_int(b, 0, nb - 1);
}
inline int bin_loc(const BlockBinIndex &index, int b0, int b1, int b2)
{
return b0 + index.bins[0] * (b1 + index.bins[1] * b2);
}
inline bool point_in_block_view(const InterpBlockView &view, const double *pox, const double *DH)
{
for (int i = 0; i < dim; i++)
{
if (pox[i] - view.llb[i] < -DH[i] / 2 || pox[i] - view.uub[i] > DH[i] / 2)
return false;
}
return true;
}
void build_block_bin_index(Patch *patch, const double *DH, BlockBinIndex &index)
{
index = BlockBinIndex();
MyList<Block> *Bp = patch->blb;
while (Bp)
{
Block *BP = Bp->data;
InterpBlockView view;
view.bp = BP;
for (int i = 0; i < dim; i++)
{
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
}
index.views.push_back(view);
if (Bp == patch->ble)
break;
Bp = Bp->next;
}
const int nblocks = int(index.views.size());
if (nblocks <= 0)
return;
int bins_1d = int(ceil(pow(double(nblocks), 1.0 / 3.0)));
bins_1d = clamp_int(bins_1d, 1, 32);
for (int i = 0; i < dim; i++)
{
index.bins[i] = bins_1d;
index.lo[i] = patch->bbox[i] + patch->lli[i] * DH[i];
const double hi = patch->bbox[dim + i] - patch->uui[i] * DH[i];
if (hi > index.lo[i] && bins_1d > 1)
index.inv[i] = bins_1d / (hi - index.lo[i]);
else
index.inv[i] = 0.0;
}
index.bin_to_blocks.resize(index.bins[0] * index.bins[1] * index.bins[2]);
for (int bi = 0; bi < nblocks; bi++)
{
const InterpBlockView &view = index.views[bi];
int bmin[dim], bmax[dim];
for (int d = 0; d < dim; d++)
{
const double low = view.llb[d] - DH[d] / 2;
const double up = view.uub[d] + DH[d] / 2;
bmin[d] = coord_to_bin(low, index.lo[d], index.inv[d], index.bins[d]);
bmax[d] = coord_to_bin(up, index.lo[d], index.inv[d], index.bins[d]);
if (bmax[d] < bmin[d])
{
int t = bmin[d];
bmin[d] = bmax[d];
bmax[d] = t;
}
}
for (int bz = bmin[2]; bz <= bmax[2]; bz++)
for (int by = bmin[1]; by <= bmax[1]; by++)
for (int bx = bmin[0]; bx <= bmax[0]; bx++)
index.bin_to_blocks[bin_loc(index, bx, by, bz)].push_back(bi);
}
index.valid = true;
}
int find_block_index_for_point(const BlockBinIndex &index, const double *pox, const double *DH)
{
if (!index.valid)
return -1;
const int bx = coord_to_bin(pox[0], index.lo[0], index.inv[0], index.bins[0]);
const int by = coord_to_bin(pox[1], index.lo[1], index.inv[1], index.bins[1]);
const int bz = coord_to_bin(pox[2], index.lo[2], index.inv[2], index.bins[2]);
const vector<int> &cand = index.bin_to_blocks[bin_loc(index, bx, by, bz)];
for (size_t ci = 0; ci < cand.size(); ci++)
{
const int bi = cand[ci];
if (point_in_block_view(index.views[bi], pox, DH))
return bi;
}
// Fallback to full scan for numerical edge cases around bin boundaries.
for (size_t bi = 0; bi < index.views.size(); bi++)
if (point_in_block_view(index.views[bi], pox, DH))
return int(bi);
return -1;
}
} // namespace
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi) Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
{ {
@@ -530,11 +364,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
owner_rank[j] = -1; owner_rank[j] = -1;
double DH[dim]; double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points for (int j = 0; j < NN; j++) // run along points
{ {
@@ -557,24 +389,57 @@ void Patch::Interp_Points(MyList<var> *VarList,
} }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp) // run along Blocks
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank;
if (myrank == BP->rank) bool flag = true;
for (int i = 0; i < dim; i++)
{ {
//---> interpolation #ifdef Vertex
varl = VarList; #ifdef Cell
int k = 0; #error Both Cell and Vertex are defined
while (varl) // run along variables #endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
//---> interpolation
varl = VarList;
int k = 0;
while (varl) // run along variables
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }
@@ -642,9 +507,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
// Targeted point-to-point overload: each owner sends each point only to // Targeted point-to-point overload: each owner sends each point only to
// the one rank that needs it for integration (consumer), reducing // the one rank that needs it for integration (consumer), reducing
// communication volume by ~nprocs times compared to the Bcast version. // communication volume by ~nprocs times compared to the Bcast version.
#ifdef INTERP_LB_PROFILE
double t_interp_start = MPI_Wtime();
#endif
int myrank, nprocs; int myrank, nprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
@@ -667,11 +529,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
owner_rank[j] = -1; owner_rank[j] = -1;
double DH[dim]; double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
// --- Interpolation phase (identical to original) --- // --- Interpolation phase (identical to original) ---
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
@@ -695,31 +555,59 @@ void Patch::Interp_Points(MyList<var> *VarList,
} }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp)
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank;
if (myrank == BP->rank) bool flag = true;
for (int i = 0; i < dim; i++)
{ {
varl = VarList; #ifdef Vertex
int k = 0; #ifdef Cell
while (varl) #error Both Cell and Vertex are defined
#endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
varl = VarList;
int k = 0;
while (varl)
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }
#ifdef INTERP_LB_PROFILE
double t_interp_end = MPI_Wtime();
double t_interp_local = t_interp_end - t_interp_start;
#endif
// --- Error check for unfound points --- // --- Error check for unfound points ---
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
{ {
@@ -876,31 +764,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
delete[] recv_count; delete[] recv_count;
delete[] consumer_rank; delete[] consumer_rank;
delete[] owner_rank; delete[] owner_rank;
#ifdef INTERP_LB_PROFILE
{
static bool profile_written = false;
if (!profile_written) {
double *all_times = nullptr;
if (myrank == 0) all_times = new double[nprocs];
MPI_Gather(&t_interp_local, 1, MPI_DOUBLE,
all_times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if (myrank == 0) {
int heavy[64];
int nh = InterpLBProfile::identify_heavy_ranks(
all_times, nprocs, 2.5, heavy, 64);
InterpLBProfile::write_profile(
"interp_lb_profile.bin", nprocs,
all_times, heavy, nh, 2.5);
printf("[InterpLB] Profile written: %d heavy ranks\n", nh);
for (int i = 0; i < nh; i++)
printf(" Heavy rank %d: %.6f s\n", heavy[i], all_times[heavy[i]]);
delete[] all_times;
}
profile_written = true;
}
}
#endif
} }
void Patch::Interp_Points(MyList<var> *VarList, void Patch::Interp_Points(MyList<var> *VarList,
int NN, double **XX, int NN, double **XX,
@@ -934,11 +797,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
MPI_Comm_group(MPI_COMM_WORLD, &world_group); MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Comm_group(Comm_here, &local_group); MPI_Comm_group(Comm_here, &local_group);
double DH[dim]; double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points for (int j = 0; j < NN; j++) // run along points
{ {
@@ -961,24 +822,57 @@ void Patch::Interp_Points(MyList<var> *VarList,
} }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp) // run along Blocks
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank;
if (myrank == BP->rank) bool flag = true;
for (int i = 0; i < dim; i++)
{ {
//---> interpolation #ifdef Vertex
varl = VarList; #ifdef Cell
int k = 0; #error Both Cell and Vertex are defined
while (varl) // run along variables #endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
//---> interpolation
varl = VarList;
int k = 0;
while (varl) // run along variables
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }

View File

@@ -32,16 +32,6 @@ namespace Parallel
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape); int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
MyList<Block> *distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0);
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
int ib0_orig, int ib3_orig,
int jb1_orig, int jb4_orig,
int kb2_orig, int kb5_orig,
Patch* PP, int r_left, int r_right,
int ingfsi, int fngfsi, bool periodic,
Block* &split_first_block, Block* &split_last_block);
Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
int block_id, int ingfsi, int fngfsi, int lev);
void KillBlocks(MyList<Patch> *PatchLIST); void KillBlocks(MyList<Patch> *PatchLIST);
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z)); void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
@@ -108,9 +98,6 @@ namespace Parallel
MPI_Status *stats; MPI_Status *stats;
int max_reqs; int max_reqs;
bool lengths_valid; bool lengths_valid;
int *tc_req_node;
int *tc_req_is_recv;
int *tc_completed;
SyncCache(); SyncCache();
void invalidate(); void invalidate();
void destroy(); void destroy();
@@ -124,10 +111,7 @@ namespace Parallel
struct AsyncSyncState { struct AsyncSyncState {
int req_no; int req_no;
bool active; bool active;
int *req_node; AsyncSyncState() : req_no(0), active(false) {}
int *req_is_recv;
int pending_recv;
AsyncSyncState() : req_no(0), active(false), req_node(0), req_is_recv(0), pending_recv(0) {}
}; };
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
@@ -183,7 +167,6 @@ namespace Parallel
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst); MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry); void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
double L2Norm(Patch *Pat, var *vf); double L2Norm(Patch *Pat, var *vf);
void L2Norm7(Patch *Pat, var **vf, double *norms);
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only); void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
void checkvarl(MyList<var> *pp, bool first_only); void checkvarl(MyList<var> *pp, bool first_only);
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat); MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
@@ -219,7 +202,6 @@ namespace Parallel
void checkpatchlist(MyList<Patch> *PatL, bool buflog); void checkpatchlist(MyList<Patch> *PatL, bool buflog);
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here); double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here);
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList, bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
int NN, double **XX, int NN, double **XX,
double *Shellf, int Symmetry, MPI_Comm Comm_here); double *Shellf, int Symmetry, MPI_Comm Comm_here);

View File

@@ -1,212 +0,0 @@
#include "rungekutta4_rout.h"
#include <cstdio>
#include <cstdlib>
#include <cstddef>
#include <complex>
#include <immintrin.h>
namespace {
inline void rk4_stage0(std::size_t n,
const double *__restrict f0,
const double *__restrict frhs,
double *__restrict f1,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d vr = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, vr, v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d vr = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, vr, v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * frhs[i];
}
}
inline void rk4_rhs_accum(std::size_t n,
const double *__restrict f1,
double *__restrict frhs) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d v2 = _mm512_set1_pd(2.0);
for (; i + 7 < n; i += 8) {
const __m512d v1 = _mm512_loadu_pd(f1 + i);
const __m512d vrhs = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(frhs + i, _mm512_fmadd_pd(v2, v1, vrhs));
}
#elif defined(__AVX2__)
const __m256d v2 = _mm256_set1_pd(2.0);
for (; i + 3 < n; i += 4) {
const __m256d v1 = _mm256_loadu_pd(f1 + i);
const __m256d vrhs = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(frhs + i, _mm256_fmadd_pd(v2, v1, vrhs));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
frhs[i] = frhs[i] + 2.0 * f1[i];
}
}
inline void rk4_f1_from_f0_f1(std::size_t n,
const double *__restrict f0,
double *__restrict f1,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d v1 = _mm512_loadu_pd(f1 + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, v1, v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d v1 = _mm256_loadu_pd(f1 + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, v1, v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * f1[i];
}
}
inline void rk4_stage3(std::size_t n,
const double *__restrict f0,
double *__restrict f1,
const double *__restrict frhs,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d v1 = _mm512_loadu_pd(f1 + i);
const __m512d vr = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, _mm512_add_pd(v1, vr), v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d v1 = _mm256_loadu_pd(f1 + i);
const __m256d vr = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, _mm256_add_pd(v1, vr), v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * (f1[i] + frhs[i]);
}
}
} // namespace
extern "C" {
void f_rungekutta4_scalar(double &dT, double &f0, double &f1, double &f_rhs, int &RK4) {
constexpr double F1o6 = 1.0 / 6.0;
constexpr double HLF = 0.5;
constexpr double TWO = 2.0;
switch (RK4) {
case 0:
f1 = f0 + HLF * dT * f_rhs;
break;
case 1:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + HLF * dT * f1;
break;
case 2:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + dT * f1;
break;
case 3:
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
break;
default:
std::fprintf(stderr, "rungekutta4_scalar_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
}
void rungekutta4_cplxscalar_(double &dT,
std::complex<double> &f0,
std::complex<double> &f1,
std::complex<double> &f_rhs,
int &RK4) {
constexpr double F1o6 = 1.0 / 6.0;
constexpr double HLF = 0.5;
constexpr double TWO = 2.0;
switch (RK4) {
case 0:
f1 = f0 + HLF * dT * f_rhs;
break;
case 1:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + HLF * dT * f1;
break;
case 2:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + dT * f1;
break;
case 3:
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
break;
default:
std::fprintf(stderr, "rungekutta4_cplxscalar_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
}
int f_rungekutta4_rout(int *ex, double &dT,
double *f0, double *f1, double *f_rhs,
int &RK4) {
const std::size_t n = static_cast<std::size_t>(ex[0]) *
static_cast<std::size_t>(ex[1]) *
static_cast<std::size_t>(ex[2]);
const double *const __restrict f0r = f0;
double *const __restrict f1r = f1;
double *const __restrict frhs = f_rhs;
if (__builtin_expect(static_cast<unsigned>(RK4) > 3u, 0)) {
std::fprintf(stderr, "rungekutta4_rout_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
switch (RK4) {
case 0:
rk4_stage0(n, f0r, frhs, f1r, 0.5 * dT);
break;
case 1:
rk4_rhs_accum(n, f1r, frhs);
rk4_f1_from_f0_f1(n, f0r, f1r, 0.5 * dT);
break;
case 2:
rk4_rhs_accum(n, f1r, frhs);
rk4_f1_from_f0_f1(n, f0r, f1r, dT);
break;
default:
rk4_stage3(n, f0r, f1r, frhs, (1.0 / 6.0) * dT);
break;
}
return 0;
}
} // extern "C"

View File

@@ -3472,43 +3472,6 @@ double ShellPatch::L2Norm(var *vf)
return tvf; return tvf;
} }
void ShellPatch::L2Norm7(var **vf, double *norms)
{
double tvf[7], dtvf[7];
int BDW = overghost;
for (int i = 0; i < 7; i++)
dtvf[i] = 0;
MyList<ss_patch> *sPp = PatL;
while (sPp)
{
MyList<Block> *Bp = sPp->data->blb;
while (Bp)
{
Block *cg = Bp->data;
if (myrank == cg->rank)
{
f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn],
cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn],
cg->fgfs[vf[6]->sgfn], tvf, BDW);
for (int i = 0; i < 7; i++)
dtvf[i] += tvf[i];
}
if (Bp == sPp->data->ble)
break;
Bp = Bp->next;
}
sPp = sPp->next;
}
MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
for (int i = 0; i < 7; i++)
norms[i] = sqrt(tvf[i]);
}
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs // find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX, void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,

View File

@@ -198,7 +198,6 @@ public:
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
char *filename, int sst); char *filename, int sst);
double L2Norm(var *vf); double L2Norm(var *vf);
void L2Norm7(var **vf, double *norms);
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf); void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
}; };

View File

@@ -485,25 +485,7 @@ void Z4c_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally)
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
@@ -868,25 +850,28 @@ void Z4c_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant
if (iter_count == 3) {
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{ {
prev_clock = curr_clock; clock_t prev_clock, curr_clock;
curr_clock = clock(); if (myrank == 0)
cout << " Shell stuff synchronization used " curr_clock = clock();
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) SH->Synch(SynchList_cor, Symmetry);
<< " seconds! " << endl; if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
} }
}
#endif #endif
} // end CA-RK4 guard
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
{ {
@@ -1558,7 +1543,7 @@ void Z4c_class::Step(int lev, int YN)
} }
} }
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); // CA-RK4: skip post-prediction MPI ghost sync (redundant; ghost cells computable locally)
if (lev == 0) if (lev == 0)
{ {
@@ -2120,7 +2105,9 @@ void Z4c_class::Step(int lev, int YN)
} }
} }
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); // CA-RK4: only MPI sync after last corrector (iter_count == 3); stages 1 & 2 are redundant
if (iter_count == 3)
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
if (lev == 0) if (lev == 0)
{ {

View File

@@ -1221,25 +1221,7 @@ void bssnEM_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally)
#ifdef WithShell
if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_pre, Symmetry);
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
}
#endif
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
@@ -1683,25 +1665,28 @@ void bssnEM_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant
if (iter_count == 3) {
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
{
clock_t prev_clock, curr_clock;
if (myrank == 0)
curr_clock = clock();
SH->Synch(SynchList_cor, Symmetry);
if (myrank == 0)
{ {
prev_clock = curr_clock; clock_t prev_clock, curr_clock;
curr_clock = clock(); if (myrank == 0)
cout << " Shell stuff synchronization used " curr_clock = clock();
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) SH->Synch(SynchList_cor, Symmetry);
<< " seconds! " << endl; if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
}
} }
}
#endif #endif
} // end CA-RK4 guard
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
{ {

View File

@@ -48,7 +48,6 @@ public:
double StartTime, TotalTime; double StartTime, TotalTime;
double AnasTime, DumpTime, d2DumpTime, CheckTime; double AnasTime, DumpTime, d2DumpTime, CheckTime;
double LastAnas, LastConsOut; double LastAnas, LastConsOut;
int *ConstraintRefreshLevels;
double Courant; double Courant;
double numepss, numepsb, numepsh; double numepss, numepsb, numepsh;
int Symmetry; int Symmetry;
@@ -131,11 +130,9 @@ public:
Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync
Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1] Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1]
Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev] Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev]
Parallel::SyncCache *sync_cache_restrict; // cached Restrict in RestrictProlong
Parallel::SyncCache *sync_cache_outbd; // cached OutBdLow2Hi in RestrictProlong
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor; monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
monitor *ConVMonitor, *TimingMonitor; monitor *ConVMonitor;
surface_integral *Waveshell; surface_integral *Waveshell;
checkpoint *CheckPoint; checkpoint *CheckPoint;

View File

@@ -62,7 +62,6 @@
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
! gont = 0: success; gont = 1: something wrong ! gont = 0: success; gont = 1: something wrong
integer::gont integer::gont
integer :: i,j,k
!~~~~~~> Other variables: !~~~~~~> Other variables:
@@ -86,13 +85,6 @@
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
real*8 :: dX, dY, dZ, PI real*8 :: dX, dY, dZ, PI
real*8 :: divb_loc,det_loc
real*8 :: gupxx_loc,gupxy_loc,gupxz_loc,gupyy_loc,gupyz_loc,gupzz_loc
real*8 :: Rxx_loc,Rxy_loc,Rxz_loc,Ryy_loc,Ryz_loc,Rzz_loc
real*8 :: fxx_loc,fxy_loc,fxz_loc
real*8 :: Gamxa_loc,Gamya_loc,Gamza_loc
real*8 :: f_loc,chin_loc
real*8 :: l_fxx,l_fxy,l_fxz,l_fyy,l_fyz,l_fzz,S_loc
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0 real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0 real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0 real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
@@ -105,7 +97,7 @@
#endif #endif
#if (GAUGE == 6 || GAUGE == 7) #if (GAUGE == 6 || GAUGE == 7)
integer :: BHN integer :: BHN,i,j,k
real*8, dimension(9) :: Porg real*8, dimension(9) :: Porg
real*8, dimension(3) :: Mass real*8, dimension(3) :: Mass
real*8 :: r1,r2,M,A,w1,w2,C1,C2 real*8 :: r1,r2,M,A,w1,w2,C1,C2
@@ -153,24 +145,22 @@
dY = Y(2) - Y(1) dY = Y(2) - Y(1)
dZ = Z(2) - Z(1) dZ = Z(2) - Z(1)
do k=1,ex(3) alpn1 = Lap + ONE
do j=1,ex(2) chin1 = chi + ONE
do i=1,ex(1) gxx = dxx + ONE
alpn1(i,j,k) = Lap(i,j,k) + ONE gyy = dyy + ONE
chin1(i,j,k) = chi(i,j,k) + ONE gzz = dzz + ONE
gxx(i,j,k) = dxx(i,j,k) + ONE
gyy(i,j,k) = dyy(i,j,k) + ONE
gzz(i,j,k) = dzz(i,j,k) + ONE
enddo
enddo
enddo
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev) call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev) call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev) call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
div_beta = betaxx + betayy + betazz
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev) call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev) call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev) call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
@@ -178,179 +168,151 @@
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev) call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
do k=1,ex(3) gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
do j=1,ex(2) TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
do i=1,ex(1)
divb_loc = betaxx(i,j,k) + betayy(i,j,k) + betazz(i,j,k)
div_beta(i,j,k) = divb_loc
chi_rhs(i,j,k) = F2o3 * chin1(i,j,k) * (alpn1(i,j,k) * trK(i,j,k) - divb_loc) gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
gxx_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axx(i,j,k) - F2o3 * gxx(i,j,k) * divb_loc + & gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
TWO * ( gxx(i,j,k) * betaxx(i,j,k) + gxy(i,j,k) * betayx(i,j,k) + gxz(i,j,k) * betazx(i,j,k) ) TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
gyy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayy(i,j,k) - F2o3 * gyy(i,j,k) * divb_loc + & gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
TWO * ( gxy(i,j,k) * betaxy(i,j,k) + gyy(i,j,k) * betayy(i,j,k) + gyz(i,j,k) * betazy(i,j,k) ) gxx * betaxy + gxz * betazy + &
gyy * betayx + gyz * betazx &
- gxy * betazz
gzz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Azz(i,j,k) - F2o3 * gzz(i,j,k) * divb_loc + & gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
TWO * ( gxz(i,j,k) * betaxz(i,j,k) + gyz(i,j,k) * betayz(i,j,k) + gzz(i,j,k) * betazz(i,j,k) ) gxy * betaxz + gyy * betayz + &
gxz * betaxy + gzz * betazy &
- gyz * betaxx
gxy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axy(i,j,k) + F1o3 * gxy(i,j,k) * divb_loc + & gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
gxx(i,j,k) * betaxy(i,j,k) + gxz(i,j,k) * betazy(i,j,k) + gyy(i,j,k) * betayx(i,j,k) + & gxx * betaxz + gxy * betayz + &
gyz(i,j,k) * betazx(i,j,k) - gxy(i,j,k) * betazz(i,j,k) gyz * betayx + gzz * betazx &
- gxz * betayy !rhs for gij
gyz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayz(i,j,k) + F1o3 * gyz(i,j,k) * divb_loc + & ! invert tilted metric
gxy(i,j,k) * betaxz(i,j,k) + gyy(i,j,k) * betayz(i,j,k) + gxz(i,j,k) * betaxy(i,j,k) + & gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
gzz(i,j,k) * betazy(i,j,k) - gyz(i,j,k) * betaxx(i,j,k) gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
gupxx = ( gyy * gzz - gyz * gyz ) / gupzz
gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
gupxz = ( gxy * gyz - gyy * gxz ) / gupzz
gupyy = ( gxx * gzz - gxz * gxz ) / gupzz
gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
gupzz = ( gxx * gyy - gxy * gxy ) / gupzz
gxz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axz(i,j,k) + F1o3 * gxz(i,j,k) * divb_loc + & if(co == 0)then
gxx(i,j,k) * betaxz(i,j,k) + gxy(i,j,k) * betayz(i,j,k) + gyz(i,j,k) * betayx(i,j,k) + & ! Gam^i_Res = Gam^i + gup^ij_,j
gzz(i,j,k) * betazx(i,j,k) - gxz(i,j,k) * betayy(i,j,k) Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)&
+gupxy*(gupxx*gxyx+gupxy*gyyx+gupxz*gyzx)&
+gupxz*(gupxx*gxzx+gupxy*gyzx+gupxz*gzzx)&
+gupxx*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
+gupxy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
+gupxz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
+gupxx*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
+gupxy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
+gupxz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
Gmy_Res = Gamy - (gupxx*(gupxy*gxxx+gupyy*gxyx+gupyz*gxzx)&
+gupxy*(gupxy*gxyx+gupyy*gyyx+gupyz*gyzx)&
+gupxz*(gupxy*gxzx+gupyy*gyzx+gupyz*gzzx)&
+gupxy*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
+gupyy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
+gupyz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
+gupxy*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
+gupyy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
+gupyz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
Gmz_Res = Gamz - (gupxx*(gupxz*gxxx+gupyz*gxyx+gupzz*gxzx)&
+gupxy*(gupxz*gxyx+gupyz*gyyx+gupzz*gyzx)&
+gupxz*(gupxz*gxzx+gupyz*gyzx+gupzz*gzzx)&
+gupxy*(gupxz*gxxy+gupyz*gxyy+gupzz*gxzy)&
+gupyy*(gupxz*gxyy+gupyz*gyyy+gupzz*gyzy)&
+gupyz*(gupxz*gxzy+gupyz*gyzy+gupzz*gzzy)&
+gupxz*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
+gupyz*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
+gupzz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
endif
det_loc = gxx(i,j,k) * gyy(i,j,k) * gzz(i,j,k) + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) + & ! second kind of connection
gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) - gxz(i,j,k) * gyy(i,j,k) * gxz(i,j,k) - & Gamxxx =HALF*( gupxx*gxxx + gupxy*(TWO*gxyx - gxxy ) + gupxz*(TWO*gxzx - gxxz ))
gxy(i,j,k) * gxy(i,j,k) * gzz(i,j,k) - gxx(i,j,k) * gyz(i,j,k) * gyz(i,j,k) Gamyxx =HALF*( gupxy*gxxx + gupyy*(TWO*gxyx - gxxy ) + gupyz*(TWO*gxzx - gxxz ))
gupxx_loc = ( gyy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gyz(i,j,k) ) / det_loc Gamzxx =HALF*( gupxz*gxxx + gupyz*(TWO*gxyx - gxxy ) + gupzz*(TWO*gxzx - gxxz ))
gupxy_loc = - ( gxy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gxz(i,j,k) ) / det_loc
gupxz_loc = ( gxy(i,j,k) * gyz(i,j,k) - gyy(i,j,k) * gxz(i,j,k) ) / det_loc
gupyy_loc = ( gxx(i,j,k) * gzz(i,j,k) - gxz(i,j,k) * gxz(i,j,k) ) / det_loc
gupyz_loc = - ( gxx(i,j,k) * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / det_loc
gupzz_loc = ( gxx(i,j,k) * gyy(i,j,k) - gxy(i,j,k) * gxy(i,j,k) ) / det_loc
gupxx(i,j,k) = gupxx_loc
gupxy(i,j,k) = gupxy_loc
gupxz(i,j,k) = gupxz_loc
gupyy(i,j,k) = gupyy_loc
gupyz(i,j,k) = gupyz_loc
gupzz(i,j,k) = gupzz_loc
if(co == 0)then Gamxyy =HALF*( gupxx*(TWO*gxyy - gyyx ) + gupxy*gyyy + gupxz*(TWO*gyzy - gyyz ))
Gmx_Res(i,j,k) = Gamx(i,j,k) - ( & Gamyyy =HALF*( gupxy*(TWO*gxyy - gyyx ) + gupyy*gyyy + gupyz*(TWO*gyzy - gyyz ))
gupxx_loc*(gupxx_loc*gxxx(i,j,k)+gupxy_loc*gxyx(i,j,k)+gupxz_loc*gxzx(i,j,k)) + & Gamzyy =HALF*( gupxz*(TWO*gxyy - gyyx ) + gupyz*gyyy + gupzz*(TWO*gyzy - gyyz ))
gupxy_loc*(gupxx_loc*gxyx(i,j,k)+gupxy_loc*gyyx(i,j,k)+gupxz_loc*gyzx(i,j,k)) + &
gupxz_loc*(gupxx_loc*gxzx(i,j,k)+gupxy_loc*gyzx(i,j,k)+gupxz_loc*gzzx(i,j,k)) + &
gupxx_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + &
gupxy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + &
gupxz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + &
gupxx_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
gupxy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
gupxz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
Gmy_Res(i,j,k) = Gamy(i,j,k) - ( &
gupxx_loc*(gupxy_loc*gxxx(i,j,k)+gupyy_loc*gxyx(i,j,k)+gupyz_loc*gxzx(i,j,k)) + &
gupxy_loc*(gupxy_loc*gxyx(i,j,k)+gupyy_loc*gyyx(i,j,k)+gupyz_loc*gyzx(i,j,k)) + &
gupxz_loc*(gupxy_loc*gxzx(i,j,k)+gupyy_loc*gyzx(i,j,k)+gupyz_loc*gzzx(i,j,k)) + &
gupxy_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + &
gupyy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + &
gupyz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + &
gupxy_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
gupyy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
gupyz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
Gmz_Res(i,j,k) = Gamz(i,j,k) - ( &
gupxx_loc*(gupxz_loc*gxxx(i,j,k)+gupyz_loc*gxyx(i,j,k)+gupzz_loc*gxzx(i,j,k)) + &
gupxy_loc*(gupxz_loc*gxyx(i,j,k)+gupyz_loc*gyyx(i,j,k)+gupzz_loc*gyzx(i,j,k)) + &
gupxz_loc*(gupxz_loc*gxzx(i,j,k)+gupyz_loc*gyzx(i,j,k)+gupzz_loc*gzzx(i,j,k)) + &
gupxy_loc*(gupxz_loc*gxxy(i,j,k)+gupyz_loc*gxyy(i,j,k)+gupzz_loc*gxzy(i,j,k)) + &
gupyy_loc*(gupxz_loc*gxyy(i,j,k)+gupyz_loc*gyyy(i,j,k)+gupzz_loc*gyzy(i,j,k)) + &
gupyz_loc*(gupxz_loc*gxzy(i,j,k)+gupyz_loc*gyzy(i,j,k)+gupzz_loc*gzzy(i,j,k)) + &
gupxz_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
gupyz_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
gupzz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
endif
Gamxxx(i,j,k)=HALF*( gupxx_loc*gxxx(i,j,k) + gupxy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupxz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamxzz =HALF*( gupxx*(TWO*gxzz - gzzx ) + gupxy*(TWO*gyzz - gzzy ) + gupxz*gzzz)
Gamyxx(i,j,k)=HALF*( gupxy_loc*gxxx(i,j,k) + gupyy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupyz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamyzz =HALF*( gupxy*(TWO*gxzz - gzzx ) + gupyy*(TWO*gyzz - gzzy ) + gupyz*gzzz)
Gamzxx(i,j,k)=HALF*( gupxz_loc*gxxx(i,j,k) + gupyz_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupzz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamzzz =HALF*( gupxz*(TWO*gxzz - gzzx ) + gupyz*(TWO*gyzz - gzzy ) + gupzz*gzzz)
Gamxyy(i,j,k)=HALF*( gupxx_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupxy_loc*gyyy(i,j,k) + gupxz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamxxy =HALF*( gupxx*gxxy + gupxy*gyyx + gupxz*( gxzy + gyzx - gxyz ) )
Gamyyy(i,j,k)=HALF*( gupxy_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyy_loc*gyyy(i,j,k) + gupyz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamyxy =HALF*( gupxy*gxxy + gupyy*gyyx + gupyz*( gxzy + gyzx - gxyz ) )
Gamzyy(i,j,k)=HALF*( gupxz_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyz_loc*gyyy(i,j,k) + gupzz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamzxy =HALF*( gupxz*gxxy + gupyz*gyyx + gupzz*( gxzy + gyzx - gxyz ) )
Gamxzz(i,j,k)=HALF*( gupxx_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupxy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupxz_loc*gzzz(i,j,k)) Gamxxz =HALF*( gupxx*gxxz + gupxy*( gxyz + gyzx - gxzy ) + gupxz*gzzx )
Gamyzz(i,j,k)=HALF*( gupxy_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupyz_loc*gzzz(i,j,k)) Gamyxz =HALF*( gupxy*gxxz + gupyy*( gxyz + gyzx - gxzy ) + gupyz*gzzx )
Gamzzz(i,j,k)=HALF*( gupxz_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyz_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupzz_loc*gzzz(i,j,k)) Gamzxz =HALF*( gupxz*gxxz + gupyz*( gxyz + gyzx - gxzy ) + gupzz*gzzx )
Gamxxy(i,j,k)=HALF*( gupxx_loc*gxxy(i,j,k) + gupxy_loc*gyyx(i,j,k) + gupxz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) Gamxyz =HALF*( gupxx*( gxyz + gxzy - gyzx ) + gupxy*gyyz + gupxz*gzzy )
Gamyxy(i,j,k)=HALF*( gupxy_loc*gxxy(i,j,k) + gupyy_loc*gyyx(i,j,k) + gupyz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) Gamyyz =HALF*( gupxy*( gxyz + gxzy - gyzx ) + gupyy*gyyz + gupyz*gzzy )
Gamzxy(i,j,k)=HALF*( gupxz_loc*gxxy(i,j,k) + gupyz_loc*gyyx(i,j,k) + gupzz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) Gamzyz =HALF*( gupxz*( gxyz + gxzy - gyzx ) + gupyz*gyyz + gupzz*gzzy )
Gamxxz(i,j,k)=HALF*( gupxx_loc*gxxz(i,j,k) + gupxy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupxz_loc*gzzx(i,j,k) )
Gamyxz(i,j,k)=HALF*( gupxy_loc*gxxz(i,j,k) + gupyy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupyz_loc*gzzx(i,j,k) )
Gamzxz(i,j,k)=HALF*( gupxz_loc*gxxz(i,j,k) + gupyz_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupzz_loc*gzzx(i,j,k) )
Gamxyz(i,j,k)=HALF*( gupxx_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupxy_loc*gyyz(i,j,k) + gupxz_loc*gzzy(i,j,k) )
Gamyyz(i,j,k)=HALF*( gupxy_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyy_loc*gyyz(i,j,k) + gupyz_loc*gzzy(i,j,k) )
Gamzyz(i,j,k)=HALF*( gupxz_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyz_loc*gyyz(i,j,k) + gupzz_loc*gzzy(i,j,k) )
enddo
enddo
enddo
! Raise indices of \tilde A_{ij} and store in R_ij ! Raise indices of \tilde A_{ij} and store in R_ij
Rxx = gupxx * gupxx * Axx + gupxy * gupxy * Ayy + gupxz * gupxz * Azz + &
TWO*(gupxx * gupxy * Axy + gupxx * gupxz * Axz + gupxy * gupxz * Ayz)
Ryy = gupxy * gupxy * Axx + gupyy * gupyy * Ayy + gupyz * gupyz * Azz + &
TWO*(gupxy * gupyy * Axy + gupxy * gupyz * Axz + gupyy * gupyz * Ayz)
Rzz = gupxz * gupxz * Axx + gupyz * gupyz * Ayy + gupzz * gupzz * Azz + &
TWO*(gupxz * gupyz * Axy + gupxz * gupzz * Axz + gupyz * gupzz * Ayz)
Rxy = gupxx * gupxy * Axx + gupxy * gupyy * Ayy + gupxz * gupyz * Azz + &
(gupxx * gupyy + gupxy * gupxy)* Axy + &
(gupxx * gupyz + gupxz * gupxy)* Axz + &
(gupxy * gupyz + gupxz * gupyy)* Ayz
Rxz = gupxx * gupxz * Axx + gupxy * gupyz * Ayy + gupxz * gupzz * Azz + &
(gupxx * gupyz + gupxy * gupxz)* Axy + &
(gupxx * gupzz + gupxz * gupxz)* Axz + &
(gupxy * gupzz + gupxz * gupyz)* Ayz
Ryz = gupxy * gupxz * Axx + gupyy * gupyz * Ayy + gupyz * gupzz * Azz + &
(gupxy * gupyz + gupyy * gupxz)* Axy + &
(gupxy * gupzz + gupyz * gupxz)* Axz + &
(gupyy * gupzz + gupyz * gupyz)* Ayz
! Right hand side for Gam^i without shift terms... ! Right hand side for Gam^i without shift terms...
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev) call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev) call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
gupxx_loc = gupxx(i,j,k)
gupxy_loc = gupxy(i,j,k)
gupxz_loc = gupxz(i,j,k)
gupyy_loc = gupyy(i,j,k)
gupyz_loc = gupyz(i,j,k)
gupzz_loc = gupzz(i,j,k)
Rxx_loc = gupxx_loc * gupxx_loc * Axx(i,j,k) + gupxy_loc * gupxy_loc * Ayy(i,j,k) + gupxz_loc * gupxz_loc * Azz(i,j,k) + & Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
TWO * (gupxx_loc * gupxy_loc * Axy(i,j,k) + gupxx_loc * gupxz_loc * Axz(i,j,k) + gupxy_loc * gupxz_loc * Ayz(i,j,k)) TWO * alpn1 * ( &
Ryy_loc = gupxy_loc * gupxy_loc * Axx(i,j,k) + gupyy_loc * gupyy_loc * Ayy(i,j,k) + gupyz_loc * gupyz_loc * Azz(i,j,k) + & -F3o2/chin1 * ( chix * Rxx + chiy * Rxy + chiz * Rxz ) - &
TWO * (gupxy_loc * gupyy_loc * Axy(i,j,k) + gupxy_loc * gupyz_loc * Axz(i,j,k) + gupyy_loc * gupyz_loc * Ayz(i,j,k)) gupxx * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
Rzz_loc = gupxz_loc * gupxz_loc * Axx(i,j,k) + gupyz_loc * gupyz_loc * Ayy(i,j,k) + gupzz_loc * gupzz_loc * Azz(i,j,k) + & gupxy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
TWO * (gupxz_loc * gupyz_loc * Axy(i,j,k) + gupxz_loc * gupzz_loc * Axz(i,j,k) + gupyz_loc * gupzz_loc * Ayz(i,j,k)) gupxz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
Rxy_loc = gupxx_loc * gupxy_loc * Axx(i,j,k) + gupxy_loc * gupyy_loc * Ayy(i,j,k) + gupxz_loc * gupyz_loc * Azz(i,j,k) + & Gamxxx * Rxx + Gamxyy * Ryy + Gamxzz * Rzz + &
(gupxx_loc * gupyy_loc + gupxy_loc * gupxy_loc) * Axy(i,j,k) + & TWO * ( Gamxxy * Rxy + Gamxxz * Rxz + Gamxyz * Ryz ) )
(gupxx_loc * gupyz_loc + gupxz_loc * gupxy_loc) * Axz(i,j,k) + &
(gupxy_loc * gupyz_loc + gupxz_loc * gupyy_loc) * Ayz(i,j,k)
Rxz_loc = gupxx_loc * gupxz_loc * Axx(i,j,k) + gupxy_loc * gupyz_loc * Ayy(i,j,k) + gupxz_loc * gupzz_loc * Azz(i,j,k) + &
(gupxx_loc * gupyz_loc + gupxy_loc * gupxz_loc) * Axy(i,j,k) + &
(gupxx_loc * gupzz_loc + gupxz_loc * gupxz_loc) * Axz(i,j,k) + &
(gupxy_loc * gupzz_loc + gupxz_loc * gupyz_loc) * Ayz(i,j,k)
Ryz_loc = gupxy_loc * gupxz_loc * Axx(i,j,k) + gupyy_loc * gupyz_loc * Ayy(i,j,k) + gupyz_loc * gupzz_loc * Azz(i,j,k) + &
(gupxy_loc * gupyz_loc + gupyy_loc * gupxz_loc) * Axy(i,j,k) + &
(gupxy_loc * gupzz_loc + gupyz_loc * gupxz_loc) * Axz(i,j,k) + &
(gupyy_loc * gupzz_loc + gupyz_loc * gupyz_loc) * Ayz(i,j,k)
Rxx(i,j,k) = Rxx_loc
Ryy(i,j,k) = Ryy_loc
Rzz(i,j,k) = Rzz_loc
Rxy(i,j,k) = Rxy_loc
Rxz(i,j,k) = Rxz_loc
Ryz(i,j,k) = Ryz_loc
Gamx_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxx_loc + Lapy(i,j,k) * Rxy_loc + Lapz(i,j,k) * Rxz_loc) + & Gamy_rhs = - TWO * ( Lapx * Rxy + Lapy * Ryy + Lapz * Ryz ) + &
TWO * alpn1(i,j,k) * ( & TWO * alpn1 * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxx_loc + chiy(i,j,k) * Rxy_loc + chiz(i,j,k) * Rxz_loc) - & -F3o2/chin1 * ( chix * Rxy + chiy * Ryy + chiz * Ryz ) - &
gupxx_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - & gupxy * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
gupxy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - & gupyy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
gupxz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + & gupyz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
Gamxxx(i,j,k) * Rxx_loc + Gamxyy(i,j,k) * Ryy_loc + Gamxzz(i,j,k) * Rzz_loc + & Gamyxx * Rxx + Gamyyy * Ryy + Gamyzz * Rzz + &
TWO * (Gamxxy(i,j,k) * Rxy_loc + Gamxxz(i,j,k) * Rxz_loc + Gamxyz(i,j,k) * Ryz_loc)) TWO * ( Gamyxy * Rxy + Gamyxz * Rxz + Gamyyz * Ryz ) )
Gamy_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxy_loc + Lapy(i,j,k) * Ryy_loc + Lapz(i,j,k) * Ryz_loc) + & Gamz_rhs = - TWO * ( Lapx * Rxz + Lapy * Ryz + Lapz * Rzz ) + &
TWO * alpn1(i,j,k) * ( & TWO * alpn1 * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxy_loc + chiy(i,j,k) * Ryy_loc + chiz(i,j,k) * Ryz_loc) - & -F3o2/chin1 * ( chix * Rxz + chiy * Ryz + chiz * Rzz ) - &
gupxy_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - & gupxz * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
gupyy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - & gupyz * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
gupyz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + & gupzz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
Gamyxx(i,j,k) * Rxx_loc + Gamyyy(i,j,k) * Ryy_loc + Gamyzz(i,j,k) * Rzz_loc + & Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
TWO * (Gamyxy(i,j,k) * Rxy_loc + Gamyxz(i,j,k) * Rxz_loc + Gamyyz(i,j,k) * Ryz_loc)) TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
Gamz_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxz_loc + Lapy(i,j,k) * Ryz_loc + Lapz(i,j,k) * Rzz_loc) + &
TWO * alpn1(i,j,k) * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxz_loc + chiy(i,j,k) * Ryz_loc + chiz(i,j,k) * Rzz_loc) - &
gupxz_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
gupyz_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
gupzz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
Gamzxx(i,j,k) * Rxx_loc + Gamzyy(i,j,k) * Ryy_loc + Gamzzz(i,j,k) * Rzz_loc + &
TWO * (Gamzxy(i,j,k) * Rxy_loc + Gamzxz(i,j,k) * Rxz_loc + Gamzyz(i,j,k) * Ryz_loc))
enddo
enddo
enddo
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,& call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev) X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
@@ -359,54 +321,38 @@
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,& call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev) X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
fxx = gxxx + gxyy + gxzz
fxy = gxyx + gyyy + gyzz
fxz = gxzx + gyzy + gzzz
Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz + &
TWO*( gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz )
Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz + &
TWO*( gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz )
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev) call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev) call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
divb_loc = div_beta(i,j,k)
fxx_loc = gxxx(i,j,k) + gxyy(i,j,k) + gxzz(i,j,k)
fxy_loc = gxyx(i,j,k) + gyyy(i,j,k) + gyzz(i,j,k)
fxz_loc = gxzx(i,j,k) + gyzy(i,j,k) + gzzz(i,j,k)
gupxx_loc = gupxx(i,j,k) Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
gupxy_loc = gupxy(i,j,k) Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
gupxz_loc = gupxz(i,j,k) F1o3 * (gupxx * fxx + gupxy * fxy + gupxz * fxz ) + &
gupyy_loc = gupyy(i,j,k) gupxx * gxxx + gupyy * gyyx + gupzz * gzzx + &
gupyz_loc = gupyz(i,j,k) TWO * (gupxy * gxyx + gupxz * gxzx + gupyz * gyzx )
gupzz_loc = gupzz(i,j,k)
Gamxa_loc = gupxx_loc * Gamxxx(i,j,k) + gupyy_loc * Gamxyy(i,j,k) + gupzz_loc * Gamxzz(i,j,k) + & Gamy_rhs = Gamy_rhs + F2o3 * Gamya * div_beta - &
TWO * (gupxy_loc * Gamxxy(i,j,k) + gupxz_loc * Gamxxz(i,j,k) + gupyz_loc * Gamxyz(i,j,k)) Gamxa * betayx - Gamya * betayy - Gamza * betayz + &
Gamya_loc = gupxx_loc * Gamyxx(i,j,k) + gupyy_loc * Gamyyy(i,j,k) + gupzz_loc * Gamyzz(i,j,k) + & F1o3 * (gupxy * fxx + gupyy * fxy + gupyz * fxz ) + &
TWO * (gupxy_loc * Gamyxy(i,j,k) + gupxz_loc * Gamyxz(i,j,k) + gupyz_loc * Gamyyz(i,j,k)) gupxx * gxxy + gupyy * gyyy + gupzz * gzzy + &
Gamza_loc = gupxx_loc * Gamzxx(i,j,k) + gupyy_loc * Gamzyy(i,j,k) + gupzz_loc * Gamzzz(i,j,k) + & TWO * (gupxy * gxyy + gupxz * gxzy + gupyz * gyzy )
TWO * (gupxy_loc * Gamzxy(i,j,k) + gupxz_loc * Gamzxz(i,j,k) + gupyz_loc * Gamzyz(i,j,k))
Gamxa(i,j,k) = Gamxa_loc
Gamya(i,j,k) = Gamya_loc
Gamza(i,j,k) = Gamza_loc
Gamx_rhs(i,j,k) = Gamx_rhs(i,j,k) + F2o3 * Gamxa_loc * divb_loc - & Gamz_rhs = Gamz_rhs + F2o3 * Gamza * div_beta - &
Gamxa_loc * betaxx(i,j,k) - Gamya_loc * betaxy(i,j,k) - Gamza_loc * betaxz(i,j,k) + & Gamxa * betazx - Gamya * betazy - Gamza * betazz + &
F1o3 * (gupxx_loc * fxx_loc + gupxy_loc * fxy_loc + gupxz_loc * fxz_loc) + & F1o3 * (gupxz * fxx + gupyz * fxy + gupzz * fxz ) + &
gupxx_loc * gxxx(i,j,k) + gupyy_loc * gyyx(i,j,k) + gupzz_loc * gzzx(i,j,k) + & gupxx * gxxz + gupyy * gyyz + gupzz * gzzz + &
TWO * (gupxy_loc * gxyx(i,j,k) + gupxz_loc * gxzx(i,j,k) + gupyz_loc * gyzx(i,j,k)) TWO * (gupxy * gxyz + gupxz * gxzz + gupyz * gyzz ) !rhs for Gam^i
Gamy_rhs(i,j,k) = Gamy_rhs(i,j,k) + F2o3 * Gamya_loc * divb_loc - &
Gamxa_loc * betayx(i,j,k) - Gamya_loc * betayy(i,j,k) - Gamza_loc * betayz(i,j,k) + &
F1o3 * (gupxy_loc * fxx_loc + gupyy_loc * fxy_loc + gupyz_loc * fxz_loc) + &
gupxx_loc * gxxy(i,j,k) + gupyy_loc * gyyy(i,j,k) + gupzz_loc * gzzy(i,j,k) + &
TWO * (gupxy_loc * gxyy(i,j,k) + gupxz_loc * gxzy(i,j,k) + gupyz_loc * gyzy(i,j,k))
Gamz_rhs(i,j,k) = Gamz_rhs(i,j,k) + F2o3 * Gamza_loc * divb_loc - &
Gamxa_loc * betazx(i,j,k) - Gamya_loc * betazy(i,j,k) - Gamza_loc * betazz(i,j,k) + &
F1o3 * (gupxz_loc * fxx_loc + gupyz_loc * fxy_loc + gupzz_loc * fxz_loc) + &
gupxx_loc * gxxz(i,j,k) + gupyy_loc * gyyz(i,j,k) + gupzz_loc * gzzz(i,j,k) + &
TWO * (gupxy_loc * gxyz(i,j,k) + gupxz_loc * gxzz(i,j,k) + gupyz_loc * gyzz(i,j,k))
enddo
enddo
enddo
!first kind of connection stored in gij,k !first kind of connection stored in gij,k
gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx
@@ -658,187 +604,189 @@
!covariant second derivative of chi respect to tilted metric !covariant second derivative of chi respect to tilted metric
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev) call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
do k=1,ex(3) fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
do j=1,ex(2) fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
do i=1,ex(1) fxz = fxz - Gamxxz * chix - Gamyxz * chiy - Gamzxz * chiz
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k) * chix(i,j,k) - Gamyxx(i,j,k) * chiy(i,j,k) - Gamzxx(i,j,k) * chiz(i,j,k) fyy = fyy - Gamxyy * chix - Gamyyy * chiy - Gamzyy * chiz
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k) * chix(i,j,k) - Gamyxy(i,j,k) * chiy(i,j,k) - Gamzxy(i,j,k) * chiz(i,j,k) fyz = fyz - Gamxyz * chix - Gamyyz * chiy - Gamzyz * chiz
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k) * chix(i,j,k) - Gamyxz(i,j,k) * chiy(i,j,k) - Gamzxz(i,j,k) * chiz(i,j,k) fzz = fzz - Gamxzz * chix - Gamyzz * chiy - Gamzzz * chiz
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k) * chix(i,j,k) - Gamyyy(i,j,k) * chiy(i,j,k) - Gamzyy(i,j,k) * chiz(i,j,k) ! Store D^l D_l chi - 3/(2*chi) D^l chi D_l chi in f
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k) * chix(i,j,k) - Gamyyz(i,j,k) * chiy(i,j,k) - Gamzyz(i,j,k) * chiz(i,j,k)
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k) * chix(i,j,k) - Gamyzz(i,j,k) * chiy(i,j,k) - Gamzzz(i,j,k) * chiz(i,j,k)
chin_loc = chin1(i,j,k) f = gupxx * ( fxx - F3o2/chin1 * chix * chix ) + &
f_loc = gupxx(i,j,k) * (fxx(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chix(i,j,k)) + & gupyy * ( fyy - F3o2/chin1 * chiy * chiy ) + &
gupyy(i,j,k) * (fyy(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiy(i,j,k)) + & gupzz * ( fzz - F3o2/chin1 * chiz * chiz ) + &
gupzz(i,j,k) * (fzz(i,j,k) - F3o2/chin_loc * chiz(i,j,k) * chiz(i,j,k)) + & TWO * gupxy * ( fxy - F3o2/chin1 * chix * chiy ) + &
TWO * gupxy(i,j,k) * (fxy(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiy(i,j,k)) + & TWO * gupxz * ( fxz - F3o2/chin1 * chix * chiz ) + &
TWO * gupxz(i,j,k) * (fxz(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiz(i,j,k)) + & TWO * gupyz * ( fyz - F3o2/chin1 * chiy * chiz )
TWO * gupyz(i,j,k) * (fyz(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiz(i,j,k)) ! Add chi part to Ricci tensor:
f(i,j,k) = f_loc
Rxx(i,j,k) = Rxx(i,j,k) + (fxx(i,j,k) - chix(i,j,k)*chix(i,j,k)/chin_loc/TWO + gxx(i,j,k) * f_loc)/chin_loc/TWO Rxx = Rxx + (fxx - chix*chix/chin1/TWO + gxx * f)/chin1/TWO
Ryy(i,j,k) = Ryy(i,j,k) + (fyy(i,j,k) - chiy(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gyy(i,j,k) * f_loc)/chin_loc/TWO Ryy = Ryy + (fyy - chiy*chiy/chin1/TWO + gyy * f)/chin1/TWO
Rzz(i,j,k) = Rzz(i,j,k) + (fzz(i,j,k) - chiz(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gzz(i,j,k) * f_loc)/chin_loc/TWO Rzz = Rzz + (fzz - chiz*chiz/chin1/TWO + gzz * f)/chin1/TWO
Rxy(i,j,k) = Rxy(i,j,k) + (fxy(i,j,k) - chix(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gxy(i,j,k) * f_loc)/chin_loc/TWO Rxy = Rxy + (fxy - chix*chiy/chin1/TWO + gxy * f)/chin1/TWO
Rxz(i,j,k) = Rxz(i,j,k) + (fxz(i,j,k) - chix(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gxz(i,j,k) * f_loc)/chin_loc/TWO Rxz = Rxz + (fxz - chix*chiz/chin1/TWO + gxz * f)/chin1/TWO
Ryz(i,j,k) = Ryz(i,j,k) + (fyz(i,j,k) - chiy(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gyz(i,j,k) * f_loc)/chin_loc/TWO Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
enddo
enddo
enddo
! covariant second derivatives of the lapse respect to physical metric ! covariant second derivatives of the lapse respect to physical metric
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, & call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
SYM,SYM,SYM,symmetry,Lev) SYM,SYM,SYM,symmetry,Lev)
do k=1,ex(3) gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
do j=1,ex(2) gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
do i=1,ex(1) gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
chin_loc = chin1(i,j,k) ! now get physical second kind of connection
gxxx(i,j,k) = (gupxx(i,j,k) * chix(i,j,k) + gupxy(i,j,k) * chiy(i,j,k) + gupxz(i,j,k) * chiz(i,j,k)) / chin_loc Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
gxxy(i,j,k) = (gupxy(i,j,k) * chix(i,j,k) + gupyy(i,j,k) * chiy(i,j,k) + gupyz(i,j,k) * chiz(i,j,k)) / chin_loc Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
gxxz(i,j,k) = (gupxz(i,j,k) * chix(i,j,k) + gupyz(i,j,k) * chiy(i,j,k) + gupzz(i,j,k) * chiz(i,j,k)) / chin_loc Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF
Gamxyy = Gamxyy - ( - gyy * gxxx )*HALF
Gamyyy = Gamyyy - ( (chiy + chiy)/chin1 - gyy * gxxy )*HALF
Gamzyy = Gamzyy - ( - gyy * gxxz )*HALF
Gamxzz = Gamxzz - ( - gzz * gxxx )*HALF
Gamyzz = Gamyzz - ( - gzz * gxxy )*HALF
Gamzzz = Gamzzz - ( (chiz + chiz)/chin1 - gzz * gxxz )*HALF
Gamxxy = Gamxxy - ( chiy /chin1 - gxy * gxxx )*HALF
Gamyxy = Gamyxy - ( chix /chin1 - gxy * gxxy )*HALF
Gamzxy = Gamzxy - ( - gxy * gxxz )*HALF
Gamxxz = Gamxxz - ( chiz /chin1 - gxz * gxxx )*HALF
Gamyxz = Gamyxz - ( - gxz * gxxy )*HALF
Gamzxz = Gamzxz - ( chix /chin1 - gxz * gxxz )*HALF
Gamxyz = Gamxyz - ( - gyz * gxxx )*HALF
Gamyyz = Gamyyz - ( chiz /chin1 - gyz * gxxy )*HALF
Gamzyz = Gamzyz - ( chiy /chin1 - gyz * gxxz )*HALF
Gamxxx(i,j,k) = Gamxxx(i,j,k) - ( (chix(i,j,k) + chix(i,j,k))/chin_loc - gxx(i,j,k) * gxxx(i,j,k) )*HALF fxx = fxx - Gamxxx*Lapx - Gamyxx*Lapy - Gamzxx*Lapz
Gamyxx(i,j,k) = Gamyxx(i,j,k) - ( - gxx(i,j,k) * gxxy(i,j,k) )*HALF fyy = fyy - Gamxyy*Lapx - Gamyyy*Lapy - Gamzyy*Lapz
Gamzxx(i,j,k) = Gamzxx(i,j,k) - ( - gxx(i,j,k) * gxxz(i,j,k) )*HALF fzz = fzz - Gamxzz*Lapx - Gamyzz*Lapy - Gamzzz*Lapz
Gamxyy(i,j,k) = Gamxyy(i,j,k) - ( - gyy(i,j,k) * gxxx(i,j,k) )*HALF fxy = fxy - Gamxxy*Lapx - Gamyxy*Lapy - Gamzxy*Lapz
Gamyyy(i,j,k) = Gamyyy(i,j,k) - ( (chiy(i,j,k) + chiy(i,j,k))/chin_loc - gyy(i,j,k) * gxxy(i,j,k) )*HALF fxz = fxz - Gamxxz*Lapx - Gamyxz*Lapy - Gamzxz*Lapz
Gamzyy(i,j,k) = Gamzyy(i,j,k) - ( - gyy(i,j,k) * gxxz(i,j,k) )*HALF fyz = fyz - Gamxyz*Lapx - Gamyyz*Lapy - Gamzyz*Lapz
Gamxzz(i,j,k) = Gamxzz(i,j,k) - ( - gzz(i,j,k) * gxxx(i,j,k) )*HALF
Gamyzz(i,j,k) = Gamyzz(i,j,k) - ( - gzz(i,j,k) * gxxy(i,j,k) )*HALF
Gamzzz(i,j,k) = Gamzzz(i,j,k) - ( (chiz(i,j,k) + chiz(i,j,k))/chin_loc - gzz(i,j,k) * gxxz(i,j,k) )*HALF
Gamxxy(i,j,k) = Gamxxy(i,j,k) - ( chiy(i,j,k) /chin_loc - gxy(i,j,k) * gxxx(i,j,k) )*HALF
Gamyxy(i,j,k) = Gamyxy(i,j,k) - ( chix(i,j,k) /chin_loc - gxy(i,j,k) * gxxy(i,j,k) )*HALF
Gamzxy(i,j,k) = Gamzxy(i,j,k) - ( - gxy(i,j,k) * gxxz(i,j,k) )*HALF
Gamxxz(i,j,k) = Gamxxz(i,j,k) - ( chiz(i,j,k) /chin_loc - gxz(i,j,k) * gxxx(i,j,k) )*HALF
Gamyxz(i,j,k) = Gamyxz(i,j,k) - ( - gxz(i,j,k) * gxxy(i,j,k) )*HALF
Gamzxz(i,j,k) = Gamzxz(i,j,k) - ( chix(i,j,k) /chin_loc - gxz(i,j,k) * gxxz(i,j,k) )*HALF
Gamxyz(i,j,k) = Gamxyz(i,j,k) - ( - gyz(i,j,k) * gxxx(i,j,k) )*HALF
Gamyyz(i,j,k) = Gamyyz(i,j,k) - ( chiz(i,j,k) /chin_loc - gyz(i,j,k) * gxxy(i,j,k) )*HALF
Gamzyz(i,j,k) = Gamzyz(i,j,k) - ( chiy(i,j,k) /chin_loc - gyz(i,j,k) * gxxz(i,j,k) )*HALF
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k)*Lapx(i,j,k) - Gamyxx(i,j,k)*Lapy(i,j,k) - Gamzxx(i,j,k)*Lapz(i,j,k) ! store D^i D_i Lap in trK_rhs upto chi
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k)*Lapx(i,j,k) - Gamyyy(i,j,k)*Lapy(i,j,k) - Gamzyy(i,j,k)*Lapz(i,j,k) trK_rhs = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k)*Lapx(i,j,k) - Gamyzz(i,j,k)*Lapy(i,j,k) - Gamzzz(i,j,k)*Lapz(i,j,k) TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz )
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k)*Lapx(i,j,k) - Gamyxy(i,j,k)*Lapy(i,j,k) - Gamzxy(i,j,k)*Lapz(i,j,k) #if 1
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k)*Lapx(i,j,k) - Gamyxz(i,j,k)*Lapy(i,j,k) - Gamzxz(i,j,k)*Lapz(i,j,k) !! follow bam code
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k)*Lapx(i,j,k) - Gamyyz(i,j,k)*Lapy(i,j,k) - Gamzyz(i,j,k)*Lapz(i,j,k) S = chin1 * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
f = F2o3 * trK * trK -(&
gupxx * ( &
gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz) ) + &
gupyy * ( &
gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz) ) + &
gupzz * ( &
gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz) ) + &
TWO * ( &
gupxy * ( &
gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
gupxy * (Axx * Ayy + Axy * Axy) + &
gupxz * (Axx * Ayz + Axz * Axy) + &
gupyz * (Axy * Ayz + Axz * Ayy) ) + &
gupxz * ( &
gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
gupxy * (Axx * Ayz + Axy * Axz) + &
gupxz * (Axx * Azz + Axz * Axz) + &
gupyz * (Axy * Azz + Axz * Ayz) ) + &
gupyz * ( &
gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
gupxy * (Axy * Ayz + Ayy * Axz) + &
gupxz * (Axy * Azz + Ayz * Axz) + &
gupyz * (Ayy * Azz + Ayz * Ayz) ) )) -1.6d1*PI*rho + EIGHT * PI * S
f = - F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + alpn1/chin1*f)
trK_rhs(i,j,k) = gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
enddo fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
enddo fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
enddo fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
do k=1,ex(3) fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
do j=1,ex(2) #else
do i=1,ex(1) ! Add lapse and S_ij parts to Ricci tensor:
divb_loc = div_beta(i,j,k)
chin_loc = chin1(i,j,k)
S_loc = chin_loc * ( gupxx(i,j,k) * Sxx(i,j,k) + gupyy(i,j,k) * Syy(i,j,k) + gupzz(i,j,k) * Szz(i,j,k) + & fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
TWO * (gupxy(i,j,k) * Sxy(i,j,k) + gupxz(i,j,k) * Sxz(i,j,k) + gupyz(i,j,k) * Syz(i,j,k)) ) fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
S(i,j,k) = S_loc fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
f_loc = F2o3 * trK(i,j,k) * trK(i,j,k) - ( & ! Compute trace-free part (note: chi^-1 and chi cancel!):
gupxx(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + &
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + &
TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + &
gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) ) + &
gupyy(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + &
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) ) + &
gupzz(i,j,k) * ( gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + &
TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) ) + &
TWO * ( gupxy(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + &
gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + &
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) ) + &
gupxz(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + &
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) ) + &
gupyz(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + &
gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + &
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k)) ) ) ) - &
F16 * PI * rho(i,j,k) + EIGHT * PI * S_loc
f_loc = -F1o3 * ( gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & f = F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + & TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) )
alpn1(i,j,k)/chin_loc * f_loc ) #endif
f(i,j,k) = f_loc
l_fxx = alpn1(i,j,k) * (Rxx(i,j,k) - EIGHT * PI * Sxx(i,j,k)) - fxx(i,j,k) Axx_rhs = fxx - gxx * f
l_fxy = alpn1(i,j,k) * (Rxy(i,j,k) - EIGHT * PI * Sxy(i,j,k)) - fxy(i,j,k) Ayy_rhs = fyy - gyy * f
l_fxz = alpn1(i,j,k) * (Rxz(i,j,k) - EIGHT * PI * Sxz(i,j,k)) - fxz(i,j,k) Azz_rhs = fzz - gzz * f
l_fyy = alpn1(i,j,k) * (Ryy(i,j,k) - EIGHT * PI * Syy(i,j,k)) - fyy(i,j,k) Axy_rhs = fxy - gxy * f
l_fyz = alpn1(i,j,k) * (Ryz(i,j,k) - EIGHT * PI * Syz(i,j,k)) - fyz(i,j,k) Axz_rhs = fxz - gxz * f
l_fzz = alpn1(i,j,k) * (Rzz(i,j,k) - EIGHT * PI * Szz(i,j,k)) - fzz(i,j,k) Ayz_rhs = fyz - gyz * f
Axx_rhs(i,j,k) = l_fxx - gxx(i,j,k) * f_loc ! Now: store A_il A^l_j into fij:
Ayy_rhs(i,j,k) = l_fyy - gyy(i,j,k) * f_loc
Azz_rhs(i,j,k) = l_fzz - gzz(i,j,k) * f_loc
Axy_rhs(i,j,k) = l_fxy - gxy(i,j,k) * f_loc
Axz_rhs(i,j,k) = l_fxz - gxz(i,j,k) * f_loc
Ayz_rhs(i,j,k) = l_fyz - gyz(i,j,k) * f_loc
fxx(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + & fxx = gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + & TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz)
gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) fyy = gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
fyy(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + & TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz)
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + & fzz = gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz)
fzz(i,j,k) = gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + & fxy = gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + & gupxy *(Axx * Ayy + Axy * Axy) + &
gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) gupxz *(Axx * Ayz + Axz * Axy) + &
fxy(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + & gupyz *(Axy * Ayz + Axz * Ayy)
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + & fxz = gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + & gupxy *(Axx * Ayz + Axy * Axz) + &
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) gupxz *(Axx * Azz + Axz * Axz) + &
fxz(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + & gupyz *(Axy * Azz + Axz * Ayz)
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + & fyz = gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + & gupxy *(Axy * Ayz + Ayy * Axz) + &
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) gupxz *(Axy * Azz + Ayz * Axz) + &
fyz(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + & gupyz *(Ayy * Azz + Ayz * Ayz)
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + &
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k))
trK_rhs(i,j,k) = chin_loc * trK_rhs(i,j,k) f = chin1
! store D^i D_i Lap in trK_rhs
trK_rhs = f*trK_rhs
Axx_rhs(i,j,k) = chin_loc * Axx_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axx(i,j,k) - TWO * fxx(i,j,k)) + & Axx_rhs = f * Axx_rhs+ alpn1 * (trK * Axx - TWO * fxx) + &
TWO * (Axx(i,j,k) * betaxx(i,j,k) + Axy(i,j,k) * betayx(i,j,k) + Axz(i,j,k) * betazx(i,j,k)) - & TWO * ( Axx * betaxx + Axy * betayx + Axz * betazx )- &
F2o3 * Axx(i,j,k) * divb_loc F2o3 * Axx * div_beta
Ayy_rhs(i,j,k) = chin_loc * Ayy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayy(i,j,k) - TWO * fyy(i,j,k)) + &
TWO * (Axy(i,j,k) * betaxy(i,j,k) + Ayy(i,j,k) * betayy(i,j,k) + Ayz(i,j,k) * betazy(i,j,k)) - &
F2o3 * Ayy(i,j,k) * divb_loc
Azz_rhs(i,j,k) = chin_loc * Azz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Azz(i,j,k) - TWO * fzz(i,j,k)) + &
TWO * (Axz(i,j,k) * betaxz(i,j,k) + Ayz(i,j,k) * betayz(i,j,k) + Azz(i,j,k) * betazz(i,j,k)) - &
F2o3 * Azz(i,j,k) * divb_loc
Axy_rhs(i,j,k) = chin_loc * Axy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axy(i,j,k) - TWO * fxy(i,j,k)) + &
Axx(i,j,k) * betaxy(i,j,k) + Axz(i,j,k) * betazy(i,j,k) + Ayy(i,j,k) * betayx(i,j,k) + &
Ayz(i,j,k) * betazx(i,j,k) + F1o3 * Axy(i,j,k) * divb_loc - Axy(i,j,k) * betazz(i,j,k)
Ayz_rhs(i,j,k) = chin_loc * Ayz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayz(i,j,k) - TWO * fyz(i,j,k)) + &
Axy(i,j,k) * betaxz(i,j,k) + Ayy(i,j,k) * betayz(i,j,k) + Axz(i,j,k) * betaxy(i,j,k) + &
Azz(i,j,k) * betazy(i,j,k) + F1o3 * Ayz(i,j,k) * divb_loc - Ayz(i,j,k) * betaxx(i,j,k)
Axz_rhs(i,j,k) = chin_loc * Axz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axz(i,j,k) - TWO * fxz(i,j,k)) + &
Axx(i,j,k) * betaxz(i,j,k) + Axy(i,j,k) * betayz(i,j,k) + Ayz(i,j,k) * betayx(i,j,k) + &
Azz(i,j,k) * betazx(i,j,k) + F1o3 * Axz(i,j,k) * divb_loc - Axz(i,j,k) * betayy(i,j,k)
trK_rhs(i,j,k) = - trK_rhs(i,j,k) + alpn1(i,j,k) * ( F1o3 * trK(i,j,k) * trK(i,j,k) + & Ayy_rhs = f * Ayy_rhs+ alpn1 * (trK * Ayy - TWO * fyy) + &
gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & TWO * ( Axy * betaxy + Ayy * betayy + Ayz * betazy )- &
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + & F2o3 * Ayy * div_beta
FOUR * PI * (rho(i,j,k) + S_loc) )
enddo Azz_rhs = f * Azz_rhs+ alpn1 * (trK * Azz - TWO * fzz) + &
enddo TWO * ( Axz * betaxz + Ayz * betayz + Azz * betazz )- &
enddo F2o3 * Azz * div_beta
Axy_rhs = f * Axy_rhs+ alpn1 *( trK * Axy - TWO * fxy )+ &
Axx * betaxy + Axz * betazy + &
Ayy * betayx + Ayz * betazx + &
F1o3 * Axy * div_beta - Axy * betazz
Ayz_rhs = f * Ayz_rhs+ alpn1 *( trK * Ayz - TWO * fyz )+ &
Axy * betaxz + Ayy * betayz + &
Axz * betaxy + Azz * betazy + &
F1o3 * Ayz * div_beta - Ayz * betaxx
Axz_rhs = f * Axz_rhs+ alpn1 *( trK * Axz - TWO * fxz )+ &
Axx * betaxz + Axy * betayz + &
Ayz * betayx + Azz * betazx + &
F1o3 * Axz * div_beta - Axz * betayy !rhs for Aij
! Compute trace of S_ij
S = f * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
trK_rhs = - trK_rhs + alpn1 *( F1o3 * trK * trK + &
gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO * ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + &
FOUR * PI * ( rho + S )) !rhs for trK
!!!! gauge variable part !!!! gauge variable part
@@ -1000,15 +948,15 @@
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency) !!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
! lopsided_kodis shares the symmetry_bd buffer between advection and ! lopsided_kodis shares the symmetry_bd buffer between advection and
! dissipation, eliminating redundant full-grid copies. For metric variables ! dissipation, eliminating redundant full-grid copies. For metric variables
! gxx/gyy/gzz (=dxx/dyy/dzz+1): stencil coefficients sum to zero, ! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
! so the constant offset has no effect on dissipation. ! so the constant offset has no effect on dissipation.
call lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps) call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps) call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps) call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps) call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)

View File

@@ -32,19 +32,6 @@
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_ #define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
#define f_compute_constraint_fr compute_constraint_fr_ #define f_compute_constraint_fr compute_constraint_fr_
#endif #endif
#ifdef __cplusplus
extern "C"
{
#endif
void f_bssn_rhs_kernel_timing_reset();
int f_bssn_rhs_kernel_timing_bucket_count();
const double *f_bssn_rhs_kernel_timing_local_seconds();
const char *f_bssn_rhs_kernel_timing_label(int);
#ifdef __cplusplus
}
#endif
extern "C" extern "C"
{ {
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z

View File

@@ -130,11 +130,7 @@ void cgh::compose_cgh(int nprocs)
for (int lev = 0; lev < levels; lev++) for (int lev = 0; lev < levels; lev++)
{ {
checkPatchList(PatL[lev], false); checkPatchList(PatL[lev], false);
#ifdef INTERP_LB_OPTIMIZE
Parallel::distribute_optimize(PatL[lev], nprocs, ingfs, fngfs, false);
#else
Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false); Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false);
#endif
#if (RPB == 1) #if (RPB == 1)
// we need distributed box of PatL[lev] and PatL[lev-1] // we need distributed box of PatL[lev] and PatL[lev-1]
if (lev > 0) if (lev > 0)

View File

@@ -1513,7 +1513,6 @@
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
real*8, dimension(3) :: SoA real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
integer :: i_core_min,i_core_max,j_core_min,j_core_max,k_core_min,k_core_max
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
@@ -1566,47 +1565,9 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
i_core_min = max(1, imin+2)
i_core_max = min(ex(1), imax-2)
j_core_min = max(1, jmin+2)
j_core_max = min(ex(2), jmax-2)
k_core_min = max(1, kmin+2)
k_core_max = min(ex(3), kmax-2)
if(i_core_min <= i_core_max .and. j_core_min <= j_core_max .and. k_core_min <= k_core_max)then
do k=k_core_min,k_core_max
do j=j_core_min,j_core_max
do i=i_core_min,i_core_max
! interior points always use 4th-order stencils without branch checks
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
enddo
enddo
enddo
endif
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
if(i>=i_core_min .and. i<=i_core_max .and. &
j>=j_core_min .and. j<=j_core_max .and. &
k>=k_core_min .and. k<=k_core_max) cycle
!~~~~~~ fxx !~~~~~~ fxx
if(i+2 <= imax .and. i-2 >= imin)then if(i+2 <= imax .and. i-2 >= imin)then
! !

Some files were not shown because too many files have changed in this diff Show More