Compare commits
93 Commits
cjy-oneapi
...
lzd-cuda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d9c7ea8085 | ||
|
c689cc8dc9
|
|||
|
60fee8f1c1
|
|||
|
843b116954
|
|||
|
c768e1220b
|
|||
|
02f149e2e3
|
|||
|
422e8ec4dc
|
|||
|
c4909b9843
|
|||
|
f521a97563
|
|||
|
53c55451b3
|
|||
|
768345954f
|
|||
|
9a6df6438b
|
|||
|
8e9463aa90
|
|||
|
7c6f15002e
|
|||
|
6410c62e3e
|
|||
|
11977eb82f
|
|||
|
cce8a44fc4
|
|||
|
c589097618
|
|||
|
b713e5a9be
|
|||
|
0396701572
|
|||
| bb20c9a876 | |||
|
8fe60ea703
|
|||
|
9ab7e7c7f9
|
|||
| f9119e8a2a | |||
| 726d743376 | |||
| af344bf1e5 | |||
| 7191fc0b96 | |||
| b3ec244cf9 | |||
| e952ee8e91 | |||
| c5d1268dd1 | |||
| 4bdfc90f22 | |||
| c49a4e00c9 | |||
| 1b3c0b80d2 | |||
| 636e35bfd8 | |||
| 7f2a391dd2 | |||
| 4fa12a2009 | |||
| 86a683de26 | |||
|
aaf7bf0a26
|
|||
| 8c1f4d8108 | |||
| d310ef918b | |||
| b35e1b289f | |||
| 05851b2c59 | |||
| 3b39583d67 | |||
|
9c44d1c885
|
|||
|
4b9de28feb
|
|||
|
4eb5dc4ddb
|
|||
| 688bdb6708 | |||
| 5070134857 | |||
| 4012e9d068 | |||
| b3c367f15b | |||
| e73911f292 | |||
| 7543d3e8c7 | |||
| 42c69fab24 | |||
| 95220a05c8 | |||
| 466b084a58 | |||
| 61ccef9f97 | |||
| e11363e06e | |||
| f70e90f694 | |||
|
|
75dd5353b0 | ||
|
|
23a82d063b | ||
| 524d1d1512 | |||
| 44efb2e08c | |||
| 16013081e0 | |||
| 03416a7b28 | |||
| cca3c16c2b | |||
| e5231849ee | |||
| a766e49ff0 | |||
| 1a518cd3f6 | |||
| 1dc622e516 | |||
| 3046a0ccde | |||
| d4ec69c98a | |||
| 2c0a3055d4 | |||
| 1eba73acbe | |||
| b91cfff301 | |||
| e29ca2dca9 | |||
| 6493101ca0 | |||
| 169986cde1 | |||
| 1fbc213888 | |||
| 6024708a48 | |||
| bc457d981e | |||
| 51dead090e | |||
| 34d6922a66 | |||
| 8010ad27ed | |||
| 38e691f013 | |||
| 808387aa11 | |||
| c2b676abf2 | |||
| 2c60533501 | |||
| 318b5254cc | |||
| 3cee05f262 | |||
| e6329b013d | |||
| 2791d2e225 | |||
| 72ce153e48 | |||
|
|
79af79d471 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,6 +1,6 @@
|
||||
__pycache__
|
||||
GW150914
|
||||
GW150914-origin
|
||||
GW150914*
|
||||
docs
|
||||
*.tmp
|
||||
|
||||
.codex
|
||||
@@ -16,9 +16,9 @@ import numpy
|
||||
File_directory = "GW150914" ## output file directory
|
||||
Output_directory = "binary_output" ## binary data file directory
|
||||
## The file directory name should not be too long
|
||||
MPI_processes = 64 ## number of mpi processes used in the simulation
|
||||
MPI_processes = 8 ## number of mpi processes used in the simulation
|
||||
|
||||
GPU_Calculation = "no" ## Use GPU or not
|
||||
GPU_Calculation = "yes" ## Use GPU or not
|
||||
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
|
||||
CPU_Part = 1.0
|
||||
GPU_Part = 0.0
|
||||
|
||||
@@ -258,7 +258,7 @@ print()
|
||||
if (input_data.GPU_Calculation == "no"):
|
||||
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE")
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABEGPU")
|
||||
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE_CUDA")
|
||||
|
||||
if not os.path.exists( ABE_file ):
|
||||
print( )
|
||||
|
||||
@@ -1,10 +1,19 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AMSS-NCKU GW150914 Simulation Regression Test Script
|
||||
AMSS-NCKU GW150914 Simulation Regression Test Script (Comprehensive Version)
|
||||
|
||||
Verification Requirements:
|
||||
1. XY-plane trajectory RMS error < 1% (Optimized vs. baseline, max of BH1 and BH2)
|
||||
2. ADM constraint violation < 2 (Grid Level 0)
|
||||
Verification Requirements:
|
||||
1. RMS errors < 1% for:
|
||||
- 3D Vector Total RMS
|
||||
- X Component RMS
|
||||
- Y Component RMS
|
||||
- Z Component RMS
|
||||
2. ADM constraint violation < 2 (Grid Level 0)
|
||||
3. The following figure PDFs must match GW150914-origin exactly after rasterization:
|
||||
- ADM_Constraint_Grid_Level_0.pdf
|
||||
- BH_Trajectory_21_XY.pdf
|
||||
- BH_Trajectory_XY.pdf
|
||||
The script also reports the percentage of differing pixels for each figure.
|
||||
|
||||
RMS Calculation Method:
|
||||
- Computes trajectory deviation on the XY plane independently for BH1 and BH2
|
||||
@@ -16,9 +25,13 @@ Default: output_dir = GW150914/AMSS_NCKU_output
|
||||
Reference: GW150914-origin (baseline simulation)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from PIL import Image
|
||||
|
||||
# ANSI Color Codes
|
||||
class Color:
|
||||
@@ -45,91 +58,200 @@ def load_bh_trajectory(filepath):
|
||||
}
|
||||
|
||||
|
||||
def load_constraint_data(filepath):
|
||||
"""Load constraint violation data"""
|
||||
data = []
|
||||
def load_constraint_data(filepath):
|
||||
"""Load constraint violation data"""
|
||||
data = []
|
||||
with open(filepath, 'r') as f:
|
||||
for line in f:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
parts = line.split()
|
||||
if len(parts) >= 8:
|
||||
data.append([float(x) for x in parts[:8]])
|
||||
return np.array(data)
|
||||
data.append([float(x) for x in parts[:8]])
|
||||
return np.array(data)
|
||||
|
||||
|
||||
def resolve_figure_dir(path):
|
||||
"""Resolve the sibling figure directory from an output or figure path."""
|
||||
normalized = os.path.normpath(path)
|
||||
if os.path.basename(normalized) == "figure":
|
||||
return normalized
|
||||
return os.path.join(os.path.dirname(normalized), "figure")
|
||||
|
||||
|
||||
def render_pdf_to_images(pdf_path, dpi=150):
|
||||
"""Render a PDF to RGB images using Ghostscript."""
|
||||
gs_path = shutil.which("gs")
|
||||
if gs_path is None:
|
||||
raise RuntimeError("Ghostscript executable 'gs' was not found in PATH")
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="amss_verify_pdf_") as temp_dir:
|
||||
output_pattern = os.path.join(temp_dir, "page-%03d.ppm")
|
||||
cmd = [
|
||||
gs_path,
|
||||
"-q",
|
||||
"-dSAFER",
|
||||
"-dBATCH",
|
||||
"-dNOPAUSE",
|
||||
"-sDEVICE=ppmraw",
|
||||
f"-r{dpi}",
|
||||
f"-o{output_pattern}",
|
||||
pdf_path
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
message = exc.stderr.strip() or str(exc)
|
||||
raise RuntimeError(f"Failed to render PDF '{pdf_path}': {message}") from exc
|
||||
|
||||
ppm_files = sorted(
|
||||
os.path.join(temp_dir, filename)
|
||||
for filename in os.listdir(temp_dir)
|
||||
if filename.endswith(".ppm")
|
||||
)
|
||||
|
||||
if not ppm_files:
|
||||
raise RuntimeError(f"No rendered pages were produced for '{pdf_path}'")
|
||||
|
||||
images = []
|
||||
for ppm_file in ppm_files:
|
||||
with Image.open(ppm_file) as img:
|
||||
images.append(np.array(img.convert("RGB"), dtype=np.uint8))
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def compare_rendered_pages(ref_img, target_img):
|
||||
"""Return (different_pixels, total_pixels) for two rendered RGB pages."""
|
||||
ref_h, ref_w = ref_img.shape[:2]
|
||||
tgt_h, tgt_w = target_img.shape[:2]
|
||||
total_pixels = max(ref_h, tgt_h) * max(ref_w, tgt_w)
|
||||
|
||||
if ref_h == tgt_h and ref_w == tgt_w:
|
||||
different_pixels = int(np.count_nonzero(np.any(ref_img != target_img, axis=2)))
|
||||
return different_pixels, total_pixels
|
||||
|
||||
diff_mask = np.ones((max(ref_h, tgt_h), max(ref_w, tgt_w)), dtype=bool)
|
||||
overlap_h = min(ref_h, tgt_h)
|
||||
overlap_w = min(ref_w, tgt_w)
|
||||
overlap_diff = np.any(ref_img[:overlap_h, :overlap_w] != target_img[:overlap_h, :overlap_w], axis=2)
|
||||
diff_mask[:overlap_h, :overlap_w] = overlap_diff
|
||||
different_pixels = int(np.count_nonzero(diff_mask))
|
||||
return different_pixels, total_pixels
|
||||
|
||||
|
||||
def compare_pdf_images(ref_pdf, target_pdf, dpi=150, threshold_percent=0.001):
|
||||
"""Compare two PDFs by rasterizing them and counting differing pixels."""
|
||||
ref_pages = render_pdf_to_images(ref_pdf, dpi=dpi)
|
||||
target_pages = render_pdf_to_images(target_pdf, dpi=dpi)
|
||||
|
||||
total_pixels = 0
|
||||
different_pixels = 0
|
||||
max_pages = max(len(ref_pages), len(target_pages))
|
||||
|
||||
for page_idx in range(max_pages):
|
||||
if page_idx < len(ref_pages) and page_idx < len(target_pages):
|
||||
page_diff, page_total = compare_rendered_pages(ref_pages[page_idx], target_pages[page_idx])
|
||||
else:
|
||||
existing_page = ref_pages[page_idx] if page_idx < len(ref_pages) else target_pages[page_idx]
|
||||
page_total = existing_page.shape[0] * existing_page.shape[1]
|
||||
page_diff = page_total
|
||||
|
||||
total_pixels += page_total
|
||||
different_pixels += page_diff
|
||||
|
||||
diff_percent = (different_pixels / total_pixels * 100.0) if total_pixels else 0.0
|
||||
return {
|
||||
"different_pixels": different_pixels,
|
||||
"total_pixels": total_pixels,
|
||||
"diff_percent": diff_percent,
|
||||
"pages_ref": len(ref_pages),
|
||||
"pages_target": len(target_pages),
|
||||
"passed": diff_percent < threshold_percent
|
||||
}
|
||||
|
||||
|
||||
def compare_required_figures(reference_figure_dir, target_figure_dir):
|
||||
"""Compare the required GW150914 figure PDFs."""
|
||||
figure_names = [
|
||||
"ADM_Constraint_Grid_Level_0.pdf",
|
||||
"BH_Trajectory_21_XY.pdf",
|
||||
"BH_Trajectory_XY.pdf"
|
||||
]
|
||||
|
||||
results = []
|
||||
for figure_name in figure_names:
|
||||
ref_pdf = os.path.join(reference_figure_dir, figure_name)
|
||||
target_pdf = os.path.join(target_figure_dir, figure_name)
|
||||
|
||||
if not os.path.exists(ref_pdf):
|
||||
raise FileNotFoundError(f"Reference figure not found: {ref_pdf}")
|
||||
if not os.path.exists(target_pdf):
|
||||
raise FileNotFoundError(f"Target figure not found: {target_pdf}")
|
||||
|
||||
comparison = compare_pdf_images(ref_pdf, target_pdf)
|
||||
comparison["name"] = figure_name
|
||||
results.append(comparison)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def calculate_rms_error(bh_data_ref, bh_data_target):
|
||||
def calculate_all_rms_errors(bh_data_ref, bh_data_target):
|
||||
"""
|
||||
Calculate trajectory-based RMS error on the XY plane between baseline and optimized simulations.
|
||||
|
||||
This function computes the RMS error independently for BH1 and BH2 trajectories,
|
||||
then returns the maximum of the two as the final RMS error metric.
|
||||
|
||||
For each black hole, the RMS is calculated as:
|
||||
RMS = sqrt( (1/M) * sum( (Δr_i / r_i^max)^2 ) ) × 100%
|
||||
|
||||
where:
|
||||
Δr_i = sqrt((x_ref,i - x_new,i)^2 + (y_ref,i - y_new,i)^2)
|
||||
r_i^max = max(sqrt(x_ref,i^2 + y_ref,i^2), sqrt(x_new,i^2 + y_new,i^2))
|
||||
|
||||
Args:
|
||||
bh_data_ref: Reference (baseline) trajectory data
|
||||
bh_data_target: Target (optimized) trajectory data
|
||||
|
||||
Returns:
|
||||
rms_value: Final RMS error as a percentage (max of BH1 and BH2)
|
||||
error: Error message if any
|
||||
Calculate 3D Vector RMS and component-wise RMS (X, Y, Z) independently.
|
||||
Uses r = sqrt(x^2 + y^2) as the denominator for all error normalizations.
|
||||
Returns the maximum error between BH1 and BH2 for each category.
|
||||
"""
|
||||
# Align data: truncate to the length of the shorter dataset
|
||||
M = min(len(bh_data_ref['time']), len(bh_data_target['time']))
|
||||
|
||||
if M < 10:
|
||||
return None, "Insufficient data points for comparison"
|
||||
|
||||
# Extract XY coordinates for both black holes
|
||||
x1_ref = bh_data_ref['x1'][:M]
|
||||
y1_ref = bh_data_ref['y1'][:M]
|
||||
x2_ref = bh_data_ref['x2'][:M]
|
||||
y2_ref = bh_data_ref['y2'][:M]
|
||||
results = {}
|
||||
|
||||
x1_new = bh_data_target['x1'][:M]
|
||||
y1_new = bh_data_target['y1'][:M]
|
||||
x2_new = bh_data_target['x2'][:M]
|
||||
y2_new = bh_data_target['y2'][:M]
|
||||
for bh in ['1', '2']:
|
||||
x_r, y_r, z_r = bh_data_ref[f'x{bh}'][:M], bh_data_ref[f'y{bh}'][:M], bh_data_ref[f'z{bh}'][:M]
|
||||
x_n, y_n, z_n = bh_data_target[f'x{bh}'][:M], bh_data_target[f'y{bh}'][:M], bh_data_target[f'z{bh}'][:M]
|
||||
|
||||
# Calculate RMS for BH1
|
||||
delta_r1 = np.sqrt((x1_ref - x1_new)**2 + (y1_ref - y1_new)**2)
|
||||
r1_ref = np.sqrt(x1_ref**2 + y1_ref**2)
|
||||
r1_new = np.sqrt(x1_new**2 + y1_new**2)
|
||||
r1_max = np.maximum(r1_ref, r1_new)
|
||||
# 核心修改:根据组委会的邮件指示,分母统一使用 r = sqrt(x^2 + y^2)
|
||||
r_ref = np.sqrt(x_r**2 + y_r**2)
|
||||
r_new = np.sqrt(x_n**2 + y_n**2)
|
||||
denom_max = np.maximum(r_ref, r_new)
|
||||
|
||||
# Calculate RMS for BH2
|
||||
delta_r2 = np.sqrt((x2_ref - x2_new)**2 + (y2_ref - y2_new)**2)
|
||||
r2_ref = np.sqrt(x2_ref**2 + y2_ref**2)
|
||||
r2_new = np.sqrt(x2_new**2 + y2_new**2)
|
||||
r2_max = np.maximum(r2_ref, r2_new)
|
||||
valid = denom_max > 1e-15
|
||||
if np.sum(valid) < 10:
|
||||
results[f'BH{bh}'] = { '3D_Vector': 0.0, 'X_Component': 0.0, 'Y_Component': 0.0, 'Z_Component': 0.0 }
|
||||
continue
|
||||
|
||||
# Avoid division by zero for BH1
|
||||
valid_mask1 = r1_max > 1e-15
|
||||
if np.sum(valid_mask1) < 10:
|
||||
return None, "Insufficient valid data points for BH1"
|
||||
def calc_rms(delta):
|
||||
# 将对应分量的偏差除以统一的轨道半径分母 denom_max
|
||||
return np.sqrt(np.mean((delta[valid] / denom_max[valid])**2)) * 100
|
||||
|
||||
terms1 = (delta_r1[valid_mask1] / r1_max[valid_mask1])**2
|
||||
rms_bh1 = np.sqrt(np.mean(terms1)) * 100
|
||||
# 1. Total 3D Vector RMS
|
||||
delta_vec = np.sqrt((x_r - x_n)**2 + (y_r - y_n)**2 + (z_r - z_n)**2)
|
||||
rms_3d = calc_rms(delta_vec)
|
||||
|
||||
# Avoid division by zero for BH2
|
||||
valid_mask2 = r2_max > 1e-15
|
||||
if np.sum(valid_mask2) < 10:
|
||||
return None, "Insufficient valid data points for BH2"
|
||||
# 2. Component-wise RMS (分离计算各轴,但共用半径分母)
|
||||
rms_x = calc_rms(np.abs(x_r - x_n))
|
||||
rms_y = calc_rms(np.abs(y_r - y_n))
|
||||
rms_z = calc_rms(np.abs(z_r - z_n))
|
||||
|
||||
terms2 = (delta_r2[valid_mask2] / r2_max[valid_mask2])**2
|
||||
rms_bh2 = np.sqrt(np.mean(terms2)) * 100
|
||||
results[f'BH{bh}'] = {
|
||||
'3D_Vector': rms_3d,
|
||||
'X_Component': rms_x,
|
||||
'Y_Component': rms_y,
|
||||
'Z_Component': rms_z
|
||||
}
|
||||
|
||||
# Final RMS is the maximum of BH1 and BH2
|
||||
rms_final = max(rms_bh1, rms_bh2)
|
||||
|
||||
return rms_final, None
|
||||
# 获取 BH1 和 BH2 中的最大误差
|
||||
max_rms = {
|
||||
'3D_Vector': max(results['BH1']['3D_Vector'], results['BH2']['3D_Vector']),
|
||||
'X_Component': max(results['BH1']['X_Component'], results['BH2']['X_Component']),
|
||||
'Y_Component': max(results['BH1']['Y_Component'], results['BH2']['Y_Component']),
|
||||
'Z_Component': max(results['BH1']['Z_Component'], results['BH2']['Z_Component'])
|
||||
}
|
||||
|
||||
return max_rms, None
|
||||
|
||||
def analyze_constraint_violation(constraint_data, n_levels=9):
|
||||
"""
|
||||
@@ -155,34 +277,32 @@ def analyze_constraint_violation(constraint_data, n_levels=9):
|
||||
|
||||
|
||||
def print_header():
|
||||
"""Print report header"""
|
||||
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
print(Color.BOLD + " AMSS-NCKU GW150914 Simulation Regression Test Report" + Color.RESET)
|
||||
print(Color.BOLD + " AMSS-NCKU GW150914 Comprehensive Regression Test" + Color.RESET)
|
||||
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
|
||||
|
||||
def print_rms_results(rms_rel, error, threshold=1.0):
|
||||
"""Print RMS error results"""
|
||||
print(f"\n{Color.BOLD}1. RMS Error Analysis (Baseline vs Optimized){Color.RESET}")
|
||||
print("-" * 45)
|
||||
def print_rms_results(rms_dict, error, threshold=1.0):
|
||||
print(f"\n{Color.BOLD}1. RMS Error Analysis (Maximums of BH1 & BH2){Color.RESET}")
|
||||
print("-" * 65)
|
||||
|
||||
if error:
|
||||
print(f" {Color.RED}Error: {error}{Color.RESET}")
|
||||
return False
|
||||
|
||||
passed = rms_rel < threshold
|
||||
all_passed = True
|
||||
print(f" Requirement: < {threshold}%\n")
|
||||
|
||||
print(f" RMS relative error: {rms_rel:.4f}%")
|
||||
print(f" Requirement: < {threshold}%")
|
||||
print(f" Status: {get_status_text(passed)}")
|
||||
for key, val in rms_dict.items():
|
||||
passed = val < threshold
|
||||
all_passed = all_passed and passed
|
||||
status = get_status_text(passed)
|
||||
print(f" {key:15}: {val:8.4f}% | Status: {status}")
|
||||
|
||||
return passed
|
||||
return all_passed
|
||||
|
||||
|
||||
def print_constraint_results(results, threshold=2.0):
|
||||
"""Print constraint violation results"""
|
||||
def print_constraint_results(results, threshold=2.0):
|
||||
print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}")
|
||||
print("-" * 45)
|
||||
print("-" * 65)
|
||||
|
||||
names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz']
|
||||
for i, name in enumerate(names):
|
||||
@@ -195,23 +315,49 @@ def print_constraint_results(results, threshold=2.0):
|
||||
print(f"\n Maximum violation: {results['max_violation']:.6f}")
|
||||
print(f" Requirement: < {threshold}")
|
||||
print(f" Status: {get_status_text(passed)}")
|
||||
|
||||
return passed
|
||||
|
||||
|
||||
def print_summary(rms_passed, constraint_passed):
|
||||
"""Print summary"""
|
||||
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
print(Color.BOLD + "Verification Summary" + Color.RESET)
|
||||
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
|
||||
all_passed = rms_passed and constraint_passed
|
||||
|
||||
res_rms = get_status_text(rms_passed)
|
||||
res_con = get_status_text(constraint_passed)
|
||||
|
||||
print(f" [1] RMS trajectory check: {res_rms}")
|
||||
print(f" [2] ADM constraint check: {res_con}")
|
||||
|
||||
return passed
|
||||
|
||||
|
||||
def print_figure_results(results, threshold_percent=0.001):
|
||||
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
|
||||
print("-" * 65)
|
||||
print(f" Requirement: < {threshold_percent:.3f}% differing pixels\n")
|
||||
|
||||
all_passed = True
|
||||
for result in results:
|
||||
passed = result["passed"]
|
||||
all_passed = all_passed and passed
|
||||
status = get_status_text(passed)
|
||||
print(f" {result['name']:32}: {result['diff_percent']:10.6f}% | Status: {status}")
|
||||
|
||||
if result["pages_ref"] != result["pages_target"]:
|
||||
print(f" {'':32} pages(ref/target): {result['pages_ref']}/{result['pages_target']}")
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def print_figure_error(error_message):
|
||||
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
|
||||
print("-" * 65)
|
||||
print(f" {Color.RED}Error: {error_message}{Color.RESET}")
|
||||
return False
|
||||
|
||||
|
||||
def print_summary(rms_passed, constraint_passed, figure_passed):
|
||||
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
print(Color.BOLD + "Verification Summary" + Color.RESET)
|
||||
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||
|
||||
all_passed = rms_passed and constraint_passed and figure_passed
|
||||
|
||||
res_rms = get_status_text(rms_passed)
|
||||
res_con = get_status_text(constraint_passed)
|
||||
res_fig = get_status_text(figure_passed)
|
||||
|
||||
print(f" [1] Comprehensive RMS check: {res_rms}")
|
||||
print(f" [2] ADM constraint check: {res_con}")
|
||||
print(f" [3] Figure pixel comparison: {res_fig}")
|
||||
|
||||
final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
|
||||
print(f"\n Overall result: {final_status}")
|
||||
@@ -219,61 +365,58 @@ def print_summary(rms_passed, constraint_passed):
|
||||
|
||||
return all_passed
|
||||
|
||||
|
||||
def main():
|
||||
# Determine target (optimized) output directory
|
||||
if len(sys.argv) > 1:
|
||||
target_dir = sys.argv[1]
|
||||
else:
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")
|
||||
|
||||
# Determine reference (baseline) directory
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
|
||||
target_figure_dir = resolve_figure_dir(target_dir)
|
||||
reference_figure_dir = os.path.join(script_dir, "GW150914-origin/figure")
|
||||
|
||||
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
|
||||
bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
|
||||
constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
|
||||
|
||||
# Data file paths
|
||||
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
|
||||
bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
|
||||
constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
|
||||
|
||||
# Check if files exist
|
||||
if not os.path.exists(bh_file_ref):
|
||||
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(bh_file_target):
|
||||
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(constraint_file):
|
||||
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}")
|
||||
sys.exit(1)
|
||||
|
||||
# Print header
|
||||
print_header()
|
||||
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
|
||||
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
|
||||
print_header()
|
||||
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
|
||||
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
|
||||
print(f"{Color.BOLD}Reference Figures: {Color.RESET} {Color.BLUE}{reference_figure_dir}{Color.RESET}")
|
||||
print(f"{Color.BOLD}Target Figures: {Color.RESET} {Color.BLUE}{target_figure_dir}{Color.RESET}")
|
||||
|
||||
# Load data
|
||||
bh_data_ref = load_bh_trajectory(bh_file_ref)
|
||||
bh_data_target = load_bh_trajectory(bh_file_target)
|
||||
constraint_data = load_constraint_data(constraint_file)
|
||||
|
||||
# Calculate RMS error
|
||||
rms_rel, error = calculate_rms_error(bh_data_ref, bh_data_target)
|
||||
rms_passed = print_rms_results(rms_rel, error)
|
||||
|
||||
# Analyze constraint violation
|
||||
constraint_results = analyze_constraint_violation(constraint_data)
|
||||
constraint_passed = print_constraint_results(constraint_results)
|
||||
|
||||
# Print summary
|
||||
all_passed = print_summary(rms_passed, constraint_passed)
|
||||
|
||||
# Return exit code
|
||||
sys.exit(0 if all_passed else 1)
|
||||
# Output modified RMS results
|
||||
rms_dict, error = calculate_all_rms_errors(bh_data_ref, bh_data_target)
|
||||
rms_passed = print_rms_results(rms_dict, error)
|
||||
|
||||
# Output constraint results
|
||||
constraint_results = analyze_constraint_violation(constraint_data)
|
||||
constraint_passed = print_constraint_results(constraint_results)
|
||||
|
||||
try:
|
||||
figure_results = compare_required_figures(reference_figure_dir, target_figure_dir)
|
||||
figure_passed = print_figure_results(figure_results)
|
||||
except (FileNotFoundError, RuntimeError) as exc:
|
||||
figure_passed = print_figure_error(str(exc))
|
||||
|
||||
all_passed = print_summary(rms_passed, constraint_passed, figure_passed)
|
||||
sys.exit(0 if all_passed else 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <new>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
#include "misc.h"
|
||||
@@ -17,6 +18,168 @@ using namespace std;
|
||||
#include "interp_lb_profile.h"
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
struct InterpBlockView
|
||||
{
|
||||
Block *bp;
|
||||
double llb[dim];
|
||||
double uub[dim];
|
||||
};
|
||||
|
||||
struct BlockBinIndex
|
||||
{
|
||||
int bins[dim];
|
||||
double lo[dim];
|
||||
double inv[dim];
|
||||
vector<InterpBlockView> views;
|
||||
vector<vector<int>> bin_to_blocks;
|
||||
bool valid;
|
||||
|
||||
BlockBinIndex() : valid(false)
|
||||
{
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
bins[i] = 1;
|
||||
lo[i] = 0.0;
|
||||
inv[i] = 0.0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline int clamp_int(int v, int lo, int hi)
|
||||
{
|
||||
return (v < lo) ? lo : ((v > hi) ? hi : v);
|
||||
}
|
||||
|
||||
inline int coord_to_bin(double x, double lo, double inv, int nb)
|
||||
{
|
||||
if (nb <= 1 || inv <= 0.0)
|
||||
return 0;
|
||||
int b = int(floor((x - lo) * inv));
|
||||
return clamp_int(b, 0, nb - 1);
|
||||
}
|
||||
|
||||
inline int bin_loc(const BlockBinIndex &index, int b0, int b1, int b2)
|
||||
{
|
||||
return b0 + index.bins[0] * (b1 + index.bins[1] * b2);
|
||||
}
|
||||
|
||||
inline bool point_in_block_view(const InterpBlockView &view, const double *pox, const double *DH)
|
||||
{
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
if (pox[i] - view.llb[i] < -DH[i] / 2 || pox[i] - view.uub[i] > DH[i] / 2)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void build_block_bin_index(Patch *patch, const double *DH, BlockBinIndex &index)
|
||||
{
|
||||
index = BlockBinIndex();
|
||||
|
||||
MyList<Block> *Bp = patch->blb;
|
||||
while (Bp)
|
||||
{
|
||||
Block *BP = Bp->data;
|
||||
InterpBlockView view;
|
||||
view.bp = BP;
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
|
||||
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
|
||||
#else
|
||||
#ifdef Cell
|
||||
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
|
||||
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
index.views.push_back(view);
|
||||
if (Bp == patch->ble)
|
||||
break;
|
||||
Bp = Bp->next;
|
||||
}
|
||||
|
||||
const int nblocks = int(index.views.size());
|
||||
if (nblocks <= 0)
|
||||
return;
|
||||
|
||||
int bins_1d = int(ceil(pow(double(nblocks), 1.0 / 3.0)));
|
||||
bins_1d = clamp_int(bins_1d, 1, 32);
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
index.bins[i] = bins_1d;
|
||||
index.lo[i] = patch->bbox[i] + patch->lli[i] * DH[i];
|
||||
const double hi = patch->bbox[dim + i] - patch->uui[i] * DH[i];
|
||||
if (hi > index.lo[i] && bins_1d > 1)
|
||||
index.inv[i] = bins_1d / (hi - index.lo[i]);
|
||||
else
|
||||
index.inv[i] = 0.0;
|
||||
}
|
||||
|
||||
index.bin_to_blocks.resize(index.bins[0] * index.bins[1] * index.bins[2]);
|
||||
|
||||
for (int bi = 0; bi < nblocks; bi++)
|
||||
{
|
||||
const InterpBlockView &view = index.views[bi];
|
||||
int bmin[dim], bmax[dim];
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
const double low = view.llb[d] - DH[d] / 2;
|
||||
const double up = view.uub[d] + DH[d] / 2;
|
||||
bmin[d] = coord_to_bin(low, index.lo[d], index.inv[d], index.bins[d]);
|
||||
bmax[d] = coord_to_bin(up, index.lo[d], index.inv[d], index.bins[d]);
|
||||
if (bmax[d] < bmin[d])
|
||||
{
|
||||
int t = bmin[d];
|
||||
bmin[d] = bmax[d];
|
||||
bmax[d] = t;
|
||||
}
|
||||
}
|
||||
|
||||
for (int bz = bmin[2]; bz <= bmax[2]; bz++)
|
||||
for (int by = bmin[1]; by <= bmax[1]; by++)
|
||||
for (int bx = bmin[0]; bx <= bmax[0]; bx++)
|
||||
index.bin_to_blocks[bin_loc(index, bx, by, bz)].push_back(bi);
|
||||
}
|
||||
|
||||
index.valid = true;
|
||||
}
|
||||
|
||||
int find_block_index_for_point(const BlockBinIndex &index, const double *pox, const double *DH)
|
||||
{
|
||||
if (!index.valid)
|
||||
return -1;
|
||||
|
||||
const int bx = coord_to_bin(pox[0], index.lo[0], index.inv[0], index.bins[0]);
|
||||
const int by = coord_to_bin(pox[1], index.lo[1], index.inv[1], index.bins[1]);
|
||||
const int bz = coord_to_bin(pox[2], index.lo[2], index.inv[2], index.bins[2]);
|
||||
const vector<int> &cand = index.bin_to_blocks[bin_loc(index, bx, by, bz)];
|
||||
|
||||
for (size_t ci = 0; ci < cand.size(); ci++)
|
||||
{
|
||||
const int bi = cand[ci];
|
||||
if (point_in_block_view(index.views[bi], pox, DH))
|
||||
return bi;
|
||||
}
|
||||
|
||||
// Fallback to full scan for numerical edge cases around bin boundaries.
|
||||
for (size_t bi = 0; bi < index.views.size(); bi++)
|
||||
if (point_in_block_view(index.views[bi], pox, DH))
|
||||
return int(bi);
|
||||
|
||||
return -1;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
|
||||
{
|
||||
|
||||
@@ -367,9 +530,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
for (int j = 0; j < NN; j++)
|
||||
owner_rank[j] = -1;
|
||||
|
||||
double DH[dim], llb[dim], uub[dim];
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
|
||||
for (int j = 0; j < NN; j++) // run along points
|
||||
{
|
||||
@@ -392,57 +557,24 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
}
|
||||
}
|
||||
|
||||
MyList<Block> *Bp = blb;
|
||||
bool notfind = true;
|
||||
while (notfind && Bp) // run along Blocks
|
||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||
if (block_i >= 0)
|
||||
{
|
||||
Block *BP = Bp->data;
|
||||
|
||||
bool flag = true;
|
||||
for (int i = 0; i < dim; i++)
|
||||
Block *BP = block_index.views[block_i].bp;
|
||||
owner_rank[j] = BP->rank;
|
||||
if (myrank == BP->rank)
|
||||
{
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
|
||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
|
||||
#else
|
||||
#ifdef Cell
|
||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
|
||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
|
||||
//---> interpolation
|
||||
varl = VarList;
|
||||
int k = 0;
|
||||
while (varl) // run along variables
|
||||
{
|
||||
flag = false;
|
||||
break;
|
||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||
varl = varl->next;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
if (flag)
|
||||
{
|
||||
notfind = false;
|
||||
owner_rank[j] = BP->rank;
|
||||
if (myrank == BP->rank)
|
||||
{
|
||||
//---> interpolation
|
||||
varl = VarList;
|
||||
int k = 0;
|
||||
while (varl) // run along variables
|
||||
{
|
||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||
varl = varl->next;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Bp == ble)
|
||||
break;
|
||||
Bp = Bp->next;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -535,9 +667,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
for (int j = 0; j < NN; j++)
|
||||
owner_rank[j] = -1;
|
||||
|
||||
double DH[dim], llb[dim], uub[dim];
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
|
||||
// --- Interpolation phase (identical to original) ---
|
||||
for (int j = 0; j < NN; j++)
|
||||
@@ -561,56 +695,23 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
}
|
||||
}
|
||||
|
||||
MyList<Block> *Bp = blb;
|
||||
bool notfind = true;
|
||||
while (notfind && Bp)
|
||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||
if (block_i >= 0)
|
||||
{
|
||||
Block *BP = Bp->data;
|
||||
|
||||
bool flag = true;
|
||||
for (int i = 0; i < dim; i++)
|
||||
Block *BP = block_index.views[block_i].bp;
|
||||
owner_rank[j] = BP->rank;
|
||||
if (myrank == BP->rank)
|
||||
{
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
|
||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
|
||||
#else
|
||||
#ifdef Cell
|
||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
|
||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
|
||||
varl = VarList;
|
||||
int k = 0;
|
||||
while (varl)
|
||||
{
|
||||
flag = false;
|
||||
break;
|
||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||
varl = varl->next;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
if (flag)
|
||||
{
|
||||
notfind = false;
|
||||
owner_rank[j] = BP->rank;
|
||||
if (myrank == BP->rank)
|
||||
{
|
||||
varl = VarList;
|
||||
int k = 0;
|
||||
while (varl)
|
||||
{
|
||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||
varl = varl->next;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Bp == ble)
|
||||
break;
|
||||
Bp = Bp->next;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -833,9 +934,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
|
||||
MPI_Comm_group(Comm_here, &local_group);
|
||||
|
||||
double DH[dim], llb[dim], uub[dim];
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
|
||||
for (int j = 0; j < NN; j++) // run along points
|
||||
{
|
||||
@@ -858,57 +961,24 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
}
|
||||
}
|
||||
|
||||
MyList<Block> *Bp = blb;
|
||||
bool notfind = true;
|
||||
while (notfind && Bp) // run along Blocks
|
||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||
if (block_i >= 0)
|
||||
{
|
||||
Block *BP = Bp->data;
|
||||
|
||||
bool flag = true;
|
||||
for (int i = 0; i < dim; i++)
|
||||
Block *BP = block_index.views[block_i].bp;
|
||||
owner_rank[j] = BP->rank;
|
||||
if (myrank == BP->rank)
|
||||
{
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
|
||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
|
||||
#else
|
||||
#ifdef Cell
|
||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
|
||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
|
||||
//---> interpolation
|
||||
varl = VarList;
|
||||
int k = 0;
|
||||
while (varl) // run along variables
|
||||
{
|
||||
flag = false;
|
||||
break;
|
||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||
varl = varl->next;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
if (flag)
|
||||
{
|
||||
notfind = false;
|
||||
owner_rank[j] = BP->rank;
|
||||
if (myrank == BP->rank)
|
||||
{
|
||||
//---> interpolation
|
||||
varl = VarList;
|
||||
int k = 0;
|
||||
while (varl) // run along variables
|
||||
{
|
||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||
varl = varl->next;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Bp == ble)
|
||||
break;
|
||||
Bp = Bp->next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -100,28 +100,37 @@ namespace Parallel
|
||||
MyList<gridseg> **combined_dst;
|
||||
int *send_lengths;
|
||||
int *recv_lengths;
|
||||
double **send_bufs;
|
||||
double **recv_bufs;
|
||||
int *send_buf_caps;
|
||||
int *recv_buf_caps;
|
||||
MPI_Request *reqs;
|
||||
MPI_Status *stats;
|
||||
double **send_bufs;
|
||||
double **recv_bufs;
|
||||
int *send_buf_caps;
|
||||
int *recv_buf_caps;
|
||||
unsigned char *send_buf_pinned;
|
||||
unsigned char *recv_buf_pinned;
|
||||
MPI_Request *reqs;
|
||||
MPI_Status *stats;
|
||||
int max_reqs;
|
||||
bool lengths_valid;
|
||||
int *tc_req_node;
|
||||
int *tc_req_is_recv;
|
||||
int *tc_completed;
|
||||
SyncCache();
|
||||
void invalidate();
|
||||
void destroy();
|
||||
};
|
||||
|
||||
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
|
||||
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
|
||||
void Sync_ensure_cache(MyList<Patch> *PatL, int Symmetry, SyncCache &cache);
|
||||
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
|
||||
struct AsyncSyncState {
|
||||
int req_no;
|
||||
bool active;
|
||||
AsyncSyncState() : req_no(0), active(false) {}
|
||||
int *req_node;
|
||||
int *req_is_recv;
|
||||
int pending_recv;
|
||||
AsyncSyncState() : req_no(0), active(false), req_node(0), req_is_recv(0), pending_recv(0) {}
|
||||
};
|
||||
|
||||
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
|
||||
@@ -173,12 +182,13 @@ namespace Parallel
|
||||
MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only);
|
||||
MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue
|
||||
MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat);
|
||||
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
||||
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||
double L2Norm(Patch *Pat, var *vf);
|
||||
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
||||
void checkvarl(MyList<var> *pp, bool first_only);
|
||||
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
||||
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||
double L2Norm(Patch *Pat, var *vf);
|
||||
void L2Norm7(Patch *Pat, var **vf, double *norms);
|
||||
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
||||
void checkvarl(MyList<var> *pp, bool first_only);
|
||||
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||
MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||
void prepare_inter_time_level(Patch *Pat,
|
||||
@@ -210,11 +220,12 @@ namespace Parallel
|
||||
void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape);
|
||||
bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl);
|
||||
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
|
||||
|
||||
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||
|
||||
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
||||
void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here);
|
||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||
bool periodic, int start_rank, int end_rank, int nodes = 0);
|
||||
|
||||
@@ -3439,10 +3439,10 @@ void ShellPatch::write_Pablo_file_ss(int *ext, double xmin, double xmax, double
|
||||
delete[] Z;
|
||||
}
|
||||
|
||||
double ShellPatch::L2Norm(var *vf)
|
||||
{
|
||||
double tvf, dtvf = 0;
|
||||
int BDW = overghost;
|
||||
double ShellPatch::L2Norm(var *vf)
|
||||
{
|
||||
double tvf, dtvf = 0;
|
||||
int BDW = overghost;
|
||||
|
||||
MyList<ss_patch> *sPp = PatL;
|
||||
while (sPp)
|
||||
@@ -3469,13 +3469,50 @@ double ShellPatch::L2Norm(var *vf)
|
||||
MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
tvf = sqrt(tvf);
|
||||
|
||||
return tvf;
|
||||
}
|
||||
|
||||
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
|
||||
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,
|
||||
double *Shellf)
|
||||
|
||||
return tvf;
|
||||
}
|
||||
void ShellPatch::L2Norm7(var **vf, double *norms)
|
||||
{
|
||||
double tvf[7], dtvf[7];
|
||||
int BDW = overghost;
|
||||
for (int i = 0; i < 7; i++)
|
||||
dtvf[i] = 0;
|
||||
|
||||
MyList<ss_patch> *sPp = PatL;
|
||||
while (sPp)
|
||||
{
|
||||
MyList<Block> *Bp = sPp->data->blb;
|
||||
while (Bp)
|
||||
{
|
||||
Block *cg = Bp->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
|
||||
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
|
||||
cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn],
|
||||
cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn],
|
||||
cg->fgfs[vf[6]->sgfn], tvf, BDW);
|
||||
for (int i = 0; i < 7; i++)
|
||||
dtvf[i] += tvf[i];
|
||||
}
|
||||
if (Bp == sPp->data->ble)
|
||||
break;
|
||||
Bp = Bp->next;
|
||||
}
|
||||
sPp = sPp->next;
|
||||
}
|
||||
|
||||
MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
for (int i = 0; i < 7; i++)
|
||||
norms[i] = sqrt(tvf[i]);
|
||||
}
|
||||
|
||||
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
|
||||
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,
|
||||
double *Shellf)
|
||||
{
|
||||
MyList<var> *varl;
|
||||
int num_var = 0;
|
||||
|
||||
@@ -195,10 +195,11 @@ public:
|
||||
bool Interp_One_Point(MyList<var> *VarList,
|
||||
double *XX, /*input global Cartesian coordinate*/
|
||||
double *Shellf, int Symmetry);
|
||||
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
|
||||
char *filename, int sst);
|
||||
double L2Norm(var *vf);
|
||||
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
|
||||
};
|
||||
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
|
||||
char *filename, int sst);
|
||||
double L2Norm(var *vf);
|
||||
void L2Norm7(var **vf, double *norms);
|
||||
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
|
||||
};
|
||||
|
||||
#endif /* SHELLPATCH_H */
|
||||
|
||||
@@ -26,12 +26,20 @@ using namespace std;
|
||||
#include "shellfunctions.h"
|
||||
#include "cpbc.h"
|
||||
#include "kodiss.h"
|
||||
#include "parameters.h"
|
||||
|
||||
#ifdef With_AHF
|
||||
#include "derivatives.h"
|
||||
#include "myglobal.h"
|
||||
#endif
|
||||
#include "parameters.h"
|
||||
|
||||
#ifndef USE_CUDA_Z4C
|
||||
#define USE_CUDA_Z4C 0
|
||||
#endif
|
||||
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#include "z4c_rhs_cuda.h"
|
||||
#endif
|
||||
|
||||
#ifdef With_AHF
|
||||
#include "derivatives.h"
|
||||
#include "myglobal.h"
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -167,12 +175,554 @@ Z4c_class::~Z4c_class()
|
||||
|
||||
#define MRBD 0 // 0: fix BD for meshrefinement level; 1: sommerfeld_bam for them; 2: sommerfeld_yo for them
|
||||
|
||||
#ifndef CPBC
|
||||
// for sommerfeld boundary
|
||||
|
||||
void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifndef CPBC
|
||||
// for sommerfeld boundary
|
||||
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#ifdef WithShell
|
||||
#error "USE_CUDA_Z4C resident path currently supports Cartesian non-shell Z4C only"
|
||||
#endif
|
||||
#if (MRBD == 2)
|
||||
#error "USE_CUDA_Z4C resident path does not support MRBD == 2"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
static const int k_z4c_cuda_bh_state_indices[3] = {18, 19, 20};
|
||||
|
||||
bool fill_z4c_cuda_views(Block *cg, MyList<var> *vars,
|
||||
double **host_views,
|
||||
double *propspeeds = 0,
|
||||
double *soa_flat = 0)
|
||||
{
|
||||
int idx = 0;
|
||||
while (vars && idx < Z4C_CUDA_STATE_COUNT)
|
||||
{
|
||||
host_views[idx] = cg->fgfs[vars->data->sgfn];
|
||||
if (propspeeds)
|
||||
propspeeds[idx] = vars->data->propspeed;
|
||||
if (soa_flat)
|
||||
{
|
||||
soa_flat[3 * idx + 0] = vars->data->SoA[0];
|
||||
soa_flat[3 * idx + 1] = vars->data->SoA[1];
|
||||
soa_flat[3 * idx + 2] = vars->data->SoA[2];
|
||||
}
|
||||
vars = vars->next;
|
||||
++idx;
|
||||
}
|
||||
return idx == Z4C_CUDA_STATE_COUNT && vars == 0;
|
||||
}
|
||||
|
||||
void z4c_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars, int myrank, bool release_ctx)
|
||||
{
|
||||
MyList<Patch> *Pp = PatL;
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank && z4c_cuda_has_resident_state(cg))
|
||||
{
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, vars, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on resident state download" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if (z4c_cuda_download_resident_state(cg, cg->shape, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C resident state download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if (release_ctx)
|
||||
z4c_cuda_release_step_ctx(cg);
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
}
|
||||
|
||||
bool z4c_cuda_patch_contains_point(Patch *patch, const double *point)
|
||||
{
|
||||
if (!patch)
|
||||
return false;
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
const double h = patch->getdX(d);
|
||||
const double lo = patch->bbox[d] + patch->lli[d] * h;
|
||||
const double hi = patch->bbox[dim + d] - patch->uui[d] * h;
|
||||
if (point[d] < lo || point[d] > hi)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool z4c_cuda_point_in_block(Patch *patch, Block *block,
|
||||
const double *point, const double *DH)
|
||||
{
|
||||
if (!patch || !block)
|
||||
return false;
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
double llb;
|
||||
double uub;
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
llb = (feq(block->bbox[d], patch->bbox[d], DH[d] / 2))
|
||||
? block->bbox[d] + patch->lli[d] * DH[d]
|
||||
: block->bbox[d] + (ghost_width - 0.5) * DH[d];
|
||||
uub = (feq(block->bbox[dim + d], patch->bbox[dim + d], DH[d] / 2))
|
||||
? block->bbox[dim + d] - patch->uui[d] * DH[d]
|
||||
: block->bbox[dim + d] - (ghost_width - 0.5) * DH[d];
|
||||
#else
|
||||
#ifdef Cell
|
||||
llb = (feq(block->bbox[d], patch->bbox[d], DH[d] / 2))
|
||||
? block->bbox[d] + patch->lli[d] * DH[d]
|
||||
: block->bbox[d] + ghost_width * DH[d];
|
||||
uub = (feq(block->bbox[dim + d], patch->bbox[dim + d], DH[d] / 2))
|
||||
? block->bbox[dim + d] - patch->uui[d] * DH[d]
|
||||
: block->bbox[dim + d] - ghost_width * DH[d];
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
if (point[d] - llb < -DH[d] / 2 || point[d] - uub > DH[d] / 2)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int z4c_cuda_interp_tile_start(const double *coords, int n, double x, double dx, int ordn)
|
||||
{
|
||||
if (!coords || n <= ordn)
|
||||
return 0;
|
||||
int cxi = int((x - coords[0]) / dx + 0.4) + 1;
|
||||
int start = cxi - ordn / 2;
|
||||
if (start < 0)
|
||||
start = 0;
|
||||
const int max_start = n - ordn;
|
||||
if (start > max_start)
|
||||
start = max_start;
|
||||
return start;
|
||||
}
|
||||
|
||||
bool z4c_cuda_interp_bh_point_resident(MyList<Patch> *PatL,
|
||||
int myrank,
|
||||
const double *point,
|
||||
var *forx, var *fory, var *forz,
|
||||
int Symmetry,
|
||||
double *shellf)
|
||||
{
|
||||
const int ordn = 2 * ghost_width;
|
||||
int owner_rank = -1;
|
||||
|
||||
shellf[0] = shellf[1] = shellf[2] = 0.0;
|
||||
|
||||
MyList<Patch> *PL = PatL;
|
||||
while (PL)
|
||||
{
|
||||
Patch *patch = PL->data;
|
||||
if (!z4c_cuda_patch_contains_point(patch, point))
|
||||
{
|
||||
PL = PL->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
double DH[dim];
|
||||
for (int d = 0; d < dim; d++)
|
||||
DH[d] = patch->getdX(d);
|
||||
|
||||
MyList<Block> *BP = patch->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *block = BP->data;
|
||||
if (z4c_cuda_point_in_block(patch, block, point, DH))
|
||||
{
|
||||
owner_rank = block->rank;
|
||||
if (myrank == owner_rank)
|
||||
{
|
||||
int interp_ordn = ordn;
|
||||
int interp_sym = Symmetry;
|
||||
double x = point[0];
|
||||
double y = point[1];
|
||||
double z = point[2];
|
||||
|
||||
if (z4c_cuda_has_resident_state(block) &&
|
||||
block->shape[0] >= ordn && block->shape[1] >= ordn && block->shape[2] >= ordn)
|
||||
{
|
||||
const int sx = ordn;
|
||||
const int sy = ordn;
|
||||
const int sz = ordn;
|
||||
const int region_all = sx * sy * sz;
|
||||
const int i0 = z4c_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
|
||||
const int j0 = z4c_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
|
||||
const int k0 = z4c_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
|
||||
double *packed_fields = new double[3 * region_all];
|
||||
var *vars[3] = {forx, fory, forz};
|
||||
for (int f = 0; f < 3; f++)
|
||||
{
|
||||
if (z4c_cuda_pack_state_region_to_host_buffer(block,
|
||||
k_z4c_cuda_bh_state_indices[f],
|
||||
packed_fields + f * region_all,
|
||||
block->shape,
|
||||
i0, j0, k0,
|
||||
sx, sy, sz) != 0)
|
||||
{
|
||||
delete[] packed_fields;
|
||||
cout << "CUDA Z4C BH tile download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int tile_shape[3] = {sx, sy, sz};
|
||||
f_global_interp(tile_shape,
|
||||
block->X[0] + i0,
|
||||
block->X[1] + j0,
|
||||
block->X[2] + k0,
|
||||
packed_fields + f * region_all,
|
||||
shellf[f],
|
||||
x, y, z,
|
||||
interp_ordn,
|
||||
vars[f]->SoA,
|
||||
interp_sym);
|
||||
}
|
||||
delete[] packed_fields;
|
||||
}
|
||||
else
|
||||
{
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[forx->sgfn], shellf[0],
|
||||
x, y, z, interp_ordn, forx->SoA, interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[fory->sgfn], shellf[1],
|
||||
x, y, z, interp_ordn, fory->SoA, interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[forz->sgfn], shellf[2],
|
||||
x, y, z, interp_ordn, forz->SoA, interp_sym);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (BP == patch->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
|
||||
if (owner_rank >= 0)
|
||||
break;
|
||||
PL = PL->next;
|
||||
}
|
||||
|
||||
if (owner_rank < 0)
|
||||
return false;
|
||||
|
||||
MPI_Bcast(shellf, 3, MPI_DOUBLE, owner_rank, MPI_COMM_WORLD);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool z4c_cuda_compute_porg_rhs_resident(cgh *GH,
|
||||
int ilev,
|
||||
int myrank,
|
||||
int BH_num,
|
||||
double **BH_PS,
|
||||
double **BH_RHS,
|
||||
var *forx, var *fory, var *forz,
|
||||
int Symmetry)
|
||||
{
|
||||
for (int n = 0; n < BH_num; n++)
|
||||
{
|
||||
double shellf[3] = {0.0, 0.0, 0.0};
|
||||
int lev = ilev;
|
||||
while (lev >= 0 &&
|
||||
!z4c_cuda_interp_bh_point_resident(GH->PatL[lev], myrank, BH_PS[n],
|
||||
forx, fory, forz, Symmetry, shellf))
|
||||
{
|
||||
--lev;
|
||||
}
|
||||
if (lev < 0)
|
||||
return false;
|
||||
BH_RHS[n][0] = -shellf[0];
|
||||
BH_RHS[n][1] = -shellf[1];
|
||||
BH_RHS[n][2] = -shellf[2];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
||||
void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifdef With_AHF
|
||||
AH_Step_Find(lev, dT_lev);
|
||||
#endif
|
||||
bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
|
||||
double ndeps = numepss;
|
||||
if (lev < GH->movls)
|
||||
ndeps = numepsb;
|
||||
double TRK4 = PhysTime;
|
||||
int iter_count = 0;
|
||||
int pre = 0, cor = 1;
|
||||
int ERROR = 0;
|
||||
|
||||
MyList<Patch> *Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
double *state_in[Z4C_CUDA_STATE_COUNT];
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
double propspeed[Z4C_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, StateList, state_in, propspeed, soa_flat) ||
|
||||
!fill_z4c_cuda_views(cg, SynchList_pre, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on predictor step" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int apply_bam_bc = 0;
|
||||
#if (MRBD == 0)
|
||||
#if (SommerType == 0)
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#elif (MRBD == 1)
|
||||
apply_bam_bc = 1;
|
||||
#endif
|
||||
int keep_resident_state = 1;
|
||||
int apply_enforce_ga = 0;
|
||||
#if (AGM == 0)
|
||||
apply_enforce_ga = 1;
|
||||
#endif
|
||||
if (z4c_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out,
|
||||
propspeed, soa_flat, Pp->data->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, pre,
|
||||
keep_resident_state, apply_enforce_ga, chitiny))
|
||||
{
|
||||
cout << "CUDA Z4C predictor substep failed in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed in predictor at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count);
|
||||
if (Symmetry > 0)
|
||||
Porg[ithBH][2] = fabs(Porg[ithBH][2]);
|
||||
if (Symmetry == 2)
|
||||
{
|
||||
Porg[ithBH][0] = fabs(Porg[ithBH][0]);
|
||||
Porg[ithBH][1] = fabs(Porg[ithBH][1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((lev == a_lev) && (LastAnas + dT_lev >= AnasTime))
|
||||
z4c_cuda_download_level_state(GH->PatL[lev], SynchList_pre, myrank, false);
|
||||
if (lev == a_lev)
|
||||
AnalysisStuff(lev, dT_lev);
|
||||
|
||||
for (iter_count = 1; iter_count < 4; iter_count++)
|
||||
{
|
||||
if (iter_count == 1 || iter_count == 3)
|
||||
TRK4 += dT_lev / 2;
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
double *state_in[Z4C_CUDA_STATE_COUNT];
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
double propspeed[Z4C_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, SynchList_pre, state_in, propspeed, soa_flat) ||
|
||||
!fill_z4c_cuda_views(cg, SynchList_cor, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on corrector step" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int apply_bam_bc = 0;
|
||||
#if (MRBD == 0)
|
||||
#if (SommerType == 0)
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#elif (MRBD == 1)
|
||||
apply_bam_bc = 1;
|
||||
#endif
|
||||
int keep_resident_state = 1;
|
||||
int apply_enforce_ga = 0;
|
||||
#if (AGM == 0)
|
||||
apply_enforce_ga = 1;
|
||||
#elif (AGM == 1)
|
||||
apply_enforce_ga = (iter_count == 3) ? 1 : 0;
|
||||
#endif
|
||||
if (z4c_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out,
|
||||
propspeed, soa_flat, Pp->data->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, cor,
|
||||
keep_resident_state, apply_enforce_ga, chitiny))
|
||||
{
|
||||
cout << "CUDA Z4C corrector substep failed in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed in RK4 substep#" << iter_count
|
||||
<< " at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
if (!z4c_cuda_compute_porg_rhs_resident(GH, lev, myrank, BH_num,
|
||||
Porg, Porg1,
|
||||
Sfx, Sfy, Sfz, Symmetry))
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed to interpolate black-hole shift at t = "
|
||||
<< PhysTime << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count);
|
||||
if (Symmetry > 0)
|
||||
Porg1[ithBH][2] = fabs(Porg1[ithBH][2]);
|
||||
if (Symmetry == 2)
|
||||
{
|
||||
Porg1[ithBH][0] = fabs(Porg1[ithBH][0]);
|
||||
Porg1[ithBH][1] = fabs(Porg1[ithBH][1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (iter_count < 3)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
cg->swapList(SynchList_pre, SynchList_cor, myrank);
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
Porg[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
z4c_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true);
|
||||
|
||||
#if (RPS == 0)
|
||||
RestrictProlong(lev, YN, BB);
|
||||
#endif
|
||||
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
cg->swapList(StateList, SynchList_cor, myrank);
|
||||
cg->swapList(OldStateList, SynchList_cor, myrank);
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
Porg0[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg0[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
#else
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifdef With_AHF
|
||||
AH_Step_Find(lev, dT_lev);
|
||||
#endif
|
||||
@@ -1039,15 +1589,19 @@ void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
Porg0[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg0[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// for constraint preserving boundary (CPBC)
|
||||
#ifndef WithShell
|
||||
#error "CPBC only supports Shell"
|
||||
#endif
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
// for constraint preserving boundary (CPBC)
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#error "USE_CUDA_Z4C resident path does not support CPBC"
|
||||
#endif
|
||||
#ifndef WithShell
|
||||
#error "CPBC only supports Shell"
|
||||
#endif
|
||||
|
||||
// 0: extroplate rhs, 1: extroplate variable
|
||||
// 2: extroplate variable but before RHS calculation
|
||||
|
||||
@@ -94,29 +94,31 @@
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry,Lev,eps,co)
|
||||
|
||||
#if (ABV == 0)
|
||||
call ricci_gamma(ex, X, Y, Z, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry)
|
||||
#endif
|
||||
call constraint_bssn(ex, X, Y, Z,&
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry)
|
||||
if (co == 0) then
|
||||
#if (ABV == 0)
|
||||
call ricci_gamma(ex, X, Y, Z, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry)
|
||||
#endif
|
||||
call constraint_bssn(ex, X, Y, Z,&
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry)
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
@@ -226,11 +228,12 @@
|
||||
|
||||
call get_Z4cparameters(kappa1,kappa2,kappa3,FF,eta)
|
||||
|
||||
!!! sanity check
|
||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
|
||||
!!! sanity check
|
||||
#ifdef DEBUG
|
||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
|
||||
+sum(TZ)
|
||||
if(dX.ne.dX) then
|
||||
if(sum(chi).ne.sum(chi))write(*,*)"Z4c_rhs.f90: find NaN in chi"
|
||||
@@ -257,10 +260,11 @@
|
||||
if(sum(dtSfx).ne.sum(dtSfx))write(*,*)"Z4c_rhs.f90: find NaN in dtSfx"
|
||||
if(sum(dtSfy).ne.sum(dtSfy))write(*,*)"Z4c_rhs.f90: find NaN in dtSfy"
|
||||
if(sum(dtSfz).ne.sum(dtSfz))write(*,*)"Z4c_rhs.f90: find NaN in dtSfz"
|
||||
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs.f90: find NaN in TZ"
|
||||
gont = 1
|
||||
return
|
||||
endif
|
||||
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs.f90: find NaN in TZ"
|
||||
gont = 1
|
||||
return
|
||||
endif
|
||||
#endif
|
||||
|
||||
PI = dacos(-ONE)
|
||||
|
||||
@@ -1263,30 +1267,32 @@
|
||||
|
||||
endif
|
||||
|
||||
#if (ABV == 0)
|
||||
call ricci_gamma(ex, X, Y, Z, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry)
|
||||
#endif
|
||||
|
||||
call constraint_bssn(ex, X, Y, Z,&
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry)
|
||||
if (co == 0) then
|
||||
#if (ABV == 0)
|
||||
call ricci_gamma(ex, X, Y, Z, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry)
|
||||
#endif
|
||||
|
||||
call constraint_bssn(ex, X, Y, Z,&
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry)
|
||||
endif
|
||||
|
||||
gont = 0
|
||||
|
||||
|
||||
@@ -121,11 +121,12 @@
|
||||
|
||||
call get_Z4cparameters(kappa1,kappa2,kappa3,FF,eta)
|
||||
|
||||
!!! sanity check
|
||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
|
||||
!!! sanity check
|
||||
#ifdef DEBUG
|
||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
|
||||
+sum(TZ)
|
||||
if(dX.ne.dX) then
|
||||
if(sum(chi).ne.sum(chi))write(*,*)"Z4c_rhs_ss.f90: find NaN in chi"
|
||||
@@ -152,10 +153,11 @@
|
||||
if(sum(dtSfx).ne.sum(dtSfx))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfx"
|
||||
if(sum(dtSfy).ne.sum(dtSfy))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfy"
|
||||
if(sum(dtSfz).ne.sum(dtSfz))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfz"
|
||||
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs_ss.f90: find NaN in TZ"
|
||||
gont = 1
|
||||
return
|
||||
endif
|
||||
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs_ss.f90: find NaN in TZ"
|
||||
gont = 1
|
||||
return
|
||||
endif
|
||||
#endif
|
||||
|
||||
PI = dacos(-ONE)
|
||||
|
||||
@@ -1388,41 +1390,43 @@
|
||||
call kodis_sh(ex,crho,sigma,R,TZ,TZ_rhs,SSS,Symmetry,eps,sst)
|
||||
endif
|
||||
|
||||
#if (ABV == 1)
|
||||
call ricci_gamma_ss(ex,crho,sigma,R,X, Y, Z, &
|
||||
drhodx, drhody, drhodz, &
|
||||
dsigmadx,dsigmady,dsigmadz, &
|
||||
dRdx,dRdy,dRdz, &
|
||||
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
|
||||
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
|
||||
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry,Lev,sst)
|
||||
call constraint_bssn_ss(ex,crho,sigma,R,X, Y, Z, &
|
||||
drhodx, drhody, drhodz, &
|
||||
dsigmadx,dsigmady,dsigmadz, &
|
||||
dRdx,dRdy,dRdz, &
|
||||
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
|
||||
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
|
||||
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry,Lev,sst)
|
||||
#endif
|
||||
if (co == 0) then
|
||||
#if (ABV == 1)
|
||||
call ricci_gamma_ss(ex,crho,sigma,R,X, Y, Z, &
|
||||
drhodx, drhody, drhodz, &
|
||||
dsigmadx,dsigmady,dsigmadz, &
|
||||
dRdx,dRdy,dRdz, &
|
||||
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
|
||||
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
|
||||
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
|
||||
chi, &
|
||||
dxx , gxy , gxz , dyy , gyz , dzz,&
|
||||
Gamx , Gamy , Gamz , &
|
||||
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
|
||||
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
|
||||
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
|
||||
Symmetry,Lev,sst)
|
||||
#endif
|
||||
call constraint_bssn_ss(ex,crho,sigma,R,X, Y, Z, &
|
||||
drhodx, drhody, drhodz, &
|
||||
dsigmadx,dsigmady,dsigmadz, &
|
||||
dRdx,dRdy,dRdz, &
|
||||
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
|
||||
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
|
||||
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
|
||||
chi,trK, &
|
||||
dxx,gxy,gxz,dyy,gyz,dzz, &
|
||||
Axx,Axy,Axz,Ayy,Ayz,Azz, &
|
||||
Gamx,Gamy,Gamz,&
|
||||
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
|
||||
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
|
||||
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
|
||||
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
|
||||
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
||||
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
|
||||
Symmetry,Lev,sst)
|
||||
endif
|
||||
|
||||
gont = 0
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -45,10 +45,12 @@ public:
|
||||
int checkrun;
|
||||
char checkfilename[50];
|
||||
int Steps;
|
||||
double StartTime, TotalTime;
|
||||
double AnasTime, DumpTime, d2DumpTime, CheckTime;
|
||||
double LastAnas, LastConsOut;
|
||||
double Courant;
|
||||
double StartTime, TotalTime;
|
||||
double AnasTime, DumpTime, d2DumpTime, CheckTime;
|
||||
double LastAnas, LastConsOut;
|
||||
bool cuda_level0_constraint_cache_valid;
|
||||
int *ConstraintRefreshLevels;
|
||||
double Courant;
|
||||
double numepss, numepsb, numepsh;
|
||||
int Symmetry;
|
||||
int maxl, decn;
|
||||
@@ -130,10 +132,12 @@ public:
|
||||
Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync
|
||||
Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1]
|
||||
Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev]
|
||||
Parallel::SyncCache *sync_cache_restrict; // cached Restrict in RestrictProlong
|
||||
Parallel::SyncCache *sync_cache_outbd; // cached OutBdLow2Hi in RestrictProlong
|
||||
|
||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||
monitor *ConVMonitor;
|
||||
surface_integral *Waveshell;
|
||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||
monitor *ConVMonitor, *TimingMonitor;
|
||||
surface_integral *Waveshell;
|
||||
checkpoint *CheckPoint;
|
||||
|
||||
public:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,73 +0,0 @@
|
||||
|
||||
#ifndef BSSN_GPU_H_
|
||||
#define BSSN_GPU_H_
|
||||
#include "bssn_macro.h"
|
||||
#include "macrodef.fh"
|
||||
|
||||
#define DEVICE_ID 0
|
||||
// #define DEVICE_ID_BY_MPI_RANK
|
||||
#define GRID_DIM 256
|
||||
#define BLOCK_DIM 128
|
||||
|
||||
#define _FH2_(i, j, k) fh[(i) + (j) * _1D_SIZE[2] + (k) * _2D_SIZE[2]]
|
||||
#define _FH3_(i, j, k) fh[(i) + (j) * _1D_SIZE[3] + (k) * _2D_SIZE[3]]
|
||||
#define pow2(x) ((x) * (x))
|
||||
#define TimeBetween(a, b) ((b.tv_sec - a.tv_sec) + (b.tv_usec - a.tv_usec) / 1000000.0f)
|
||||
#define M_ metac.
|
||||
#define Mh_ meta->
|
||||
#define Ms_ metassc.
|
||||
#define Msh_ metass->
|
||||
|
||||
// #define TIMING
|
||||
|
||||
#define RHS_SS_PARA int calledby, int mpi_rank, int *ex, double &T, double *crho, double *sigma, double *R, double *X, double *Y, double *Z, double *drhodx, double *drhody, double *drhodz, double *dsigmadx, double *dsigmady, double *dsigmadz, double *dRdx, double *dRdy, double *dRdz, double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, double *chi, double *trK, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Gamx, double *Gamy, double *Gamz, double *Lap, double *betax, double *betay, double *betaz, double *dtSfx, double *dtSfy, double *dtSfz, double *chi_rhs, double *trK_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, int &Symmetry, int &Lev, double &eps, int &sst, int &co
|
||||
|
||||
/** main function */
|
||||
int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
|
||||
double *chi, double *trK,
|
||||
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
|
||||
double *rho, double *Sx, double *Sy, double *Sz, double *Sxx,
|
||||
double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
|
||||
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
||||
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
||||
int &Symmetry, int &Lev, double &eps, int &co);
|
||||
|
||||
int gpu_rhs_ss(RHS_SS_PARA);
|
||||
|
||||
/** Init GPU side data in GPUMeta. */
|
||||
// void init_fluid_meta_gpu(GPUMeta *gpu_meta);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,210 +0,0 @@
|
||||
|
||||
#ifndef BSSN_GPU_CLASS_H
|
||||
#define BSSN_GPU_CLASS_H
|
||||
|
||||
#ifdef newc
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
using namespace std;
|
||||
#else
|
||||
#include <iostream.h>
|
||||
#include <iomanip.h>
|
||||
#include <fstream.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#endif
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#include "macrodef.h"
|
||||
#include "cgh.h"
|
||||
#include "ShellPatch.h"
|
||||
#include "misc.h"
|
||||
#include "var.h"
|
||||
#include "MyList.h"
|
||||
#include "monitor.h"
|
||||
#include "surface_integral.h"
|
||||
#include "checkpoint.h"
|
||||
|
||||
// added by yangquan
|
||||
#include "bssn_macro.h"
|
||||
|
||||
extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
|
||||
|
||||
class bssn_class
|
||||
{
|
||||
public:
|
||||
// added by yangquan
|
||||
//----------------------
|
||||
int gpu_num_mynode;
|
||||
int cpu_core_num_mynode;
|
||||
int mpi_process_num_mynode;
|
||||
int my_sequence_mynode;
|
||||
int mynode_id;
|
||||
int use_gpu;
|
||||
|
||||
virtual void Step_GPU(int lev, int YN);
|
||||
virtual void Get_runtime_envirment();
|
||||
// virtual void Step_OPENMP(int lev,int YN);
|
||||
//----------------------
|
||||
|
||||
int ngfs;
|
||||
int nprocs, myrank;
|
||||
cgh *GH;
|
||||
ShellPatch *SH;
|
||||
double PhysTime;
|
||||
|
||||
int checkrun;
|
||||
char checkfilename[50];
|
||||
int Steps;
|
||||
double StartTime, TotalTime;
|
||||
double AnasTime, DumpTime, d2DumpTime, CheckTime;
|
||||
double LastAnas, LastConsOut;
|
||||
double Courant;
|
||||
double numepss, numepsb, numepsh;
|
||||
int Symmetry;
|
||||
int maxl, decn;
|
||||
double maxrex, drex;
|
||||
int trfls, a_lev;
|
||||
|
||||
double dT;
|
||||
double chitiny;
|
||||
|
||||
double **Porg0, **Porgbr, **Porg, **Porg1, **Porg_rhs;
|
||||
int BH_num, BH_num_input;
|
||||
double *Mass, *Pmom, *Spin;
|
||||
double ADMMass;
|
||||
|
||||
var *phio, *trKo;
|
||||
var *gxxo, *gxyo, *gxzo, *gyyo, *gyzo, *gzzo;
|
||||
var *Axxo, *Axyo, *Axzo, *Ayyo, *Ayzo, *Azzo;
|
||||
var *Gmxo, *Gmyo, *Gmzo;
|
||||
var *Lapo, *Sfxo, *Sfyo, *Sfzo;
|
||||
var *dtSfxo, *dtSfyo, *dtSfzo;
|
||||
|
||||
var *phi0, *trK0;
|
||||
var *gxx0, *gxy0, *gxz0, *gyy0, *gyz0, *gzz0;
|
||||
var *Axx0, *Axy0, *Axz0, *Ayy0, *Ayz0, *Azz0;
|
||||
var *Gmx0, *Gmy0, *Gmz0;
|
||||
var *Lap0, *Sfx0, *Sfy0, *Sfz0;
|
||||
var *dtSfx0, *dtSfy0, *dtSfz0;
|
||||
|
||||
var *phi, *trK;
|
||||
var *gxx, *gxy, *gxz, *gyy, *gyz, *gzz;
|
||||
var *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;
|
||||
var *Gmx, *Gmy, *Gmz;
|
||||
var *Lap, *Sfx, *Sfy, *Sfz;
|
||||
var *dtSfx, *dtSfy, *dtSfz;
|
||||
|
||||
var *phi1, *trK1;
|
||||
var *gxx1, *gxy1, *gxz1, *gyy1, *gyz1, *gzz1;
|
||||
var *Axx1, *Axy1, *Axz1, *Ayy1, *Ayz1, *Azz1;
|
||||
var *Gmx1, *Gmy1, *Gmz1;
|
||||
var *Lap1, *Sfx1, *Sfy1, *Sfz1;
|
||||
var *dtSfx1, *dtSfy1, *dtSfz1;
|
||||
|
||||
var *phi_rhs, *trK_rhs;
|
||||
var *gxx_rhs, *gxy_rhs, *gxz_rhs, *gyy_rhs, *gyz_rhs, *gzz_rhs;
|
||||
var *Axx_rhs, *Axy_rhs, *Axz_rhs, *Ayy_rhs, *Ayz_rhs, *Azz_rhs;
|
||||
var *Gmx_rhs, *Gmy_rhs, *Gmz_rhs;
|
||||
var *Lap_rhs, *Sfx_rhs, *Sfy_rhs, *Sfz_rhs;
|
||||
var *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;
|
||||
|
||||
var *rho, *Sx, *Sy, *Sz, *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;
|
||||
|
||||
var *Gamxxx, *Gamxxy, *Gamxxz, *Gamxyy, *Gamxyz, *Gamxzz;
|
||||
var *Gamyxx, *Gamyxy, *Gamyxz, *Gamyyy, *Gamyyz, *Gamyzz;
|
||||
var *Gamzxx, *Gamzxy, *Gamzxz, *Gamzyy, *Gamzyz, *Gamzzz;
|
||||
|
||||
var *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz;
|
||||
|
||||
var *Rpsi4, *Ipsi4;
|
||||
var *t1Rpsi4, *t1Ipsi4, *t2Rpsi4, *t2Ipsi4;
|
||||
|
||||
var *Cons_Ham, *Cons_Px, *Cons_Py, *Cons_Pz, *Cons_Gx, *Cons_Gy, *Cons_Gz;
|
||||
|
||||
#ifdef Point_Psi4
|
||||
var *phix, *phiy, *phiz;
|
||||
var *trKx, *trKy, *trKz;
|
||||
var *Axxx, *Axxy, *Axxz;
|
||||
var *Axyx, *Axyy, *Axyz;
|
||||
var *Axzx, *Axzy, *Axzz;
|
||||
var *Ayyx, *Ayyy, *Ayyz;
|
||||
var *Ayzx, *Ayzy, *Ayzz;
|
||||
var *Azzx, *Azzy, *Azzz;
|
||||
#endif
|
||||
// FIXME: uc = StateList, up = OldStateList, upp = SynchList_cor; so never touch these three data
|
||||
MyList<var> *StateList, *SynchList_pre, *SynchList_cor, *RHSList;
|
||||
MyList<var> *OldStateList, *DumpList;
|
||||
MyList<var> *ConstraintList;
|
||||
|
||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||
monitor *ConVMonitor;
|
||||
surface_integral *Waveshell;
|
||||
checkpoint *CheckPoint;
|
||||
|
||||
public:
|
||||
bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
|
||||
int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
|
||||
int a_levi, int maxli, int decni, double maxrexi, double drexi);
|
||||
~bssn_class();
|
||||
|
||||
void Evolve(int Steps);
|
||||
void RecursiveStep(int lev);
|
||||
#if (PSTR == 1)
|
||||
void ParallelStep();
|
||||
void SHStep();
|
||||
#endif
|
||||
void RestrictProlong(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
|
||||
void RestrictProlong_aux(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
|
||||
void RestrictProlong(int lev, int YN, bool BB);
|
||||
void ProlongRestrict(int lev, int YN, bool BB);
|
||||
void Setup_Black_Hole_position();
|
||||
void compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int lev);
|
||||
bool read_Pablo_file(int *ext, double *datain, char *filename);
|
||||
void write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
|
||||
char *filename);
|
||||
void AnalysisStuff(int lev, double dT_lev);
|
||||
void Setup_KerrSchild();
|
||||
void Enforce_algcon(int lev, int fg);
|
||||
|
||||
void testRestrict();
|
||||
void testOutBd();
|
||||
|
||||
virtual void Setup_Initial_Data_Lousto();
|
||||
virtual void Setup_Initial_Data_Cao();
|
||||
virtual void Initialize();
|
||||
virtual void Read_Ansorg();
|
||||
virtual void Read_Pablo() {};
|
||||
virtual void Compute_Psi4(int lev);
|
||||
virtual void Step(int lev, int YN);
|
||||
virtual void Interp_Constraint(bool infg);
|
||||
virtual void Constraint_Out();
|
||||
virtual void Compute_Constraint();
|
||||
|
||||
#ifdef With_AHF
|
||||
protected:
|
||||
MyList<var> *AHList, *AHDList, *GaugeList;
|
||||
int AHfindevery;
|
||||
double AHdumptime;
|
||||
int *lastahdumpid, HN_num; // number of possible horizons
|
||||
int *findeveryl;
|
||||
double *xc, *yc, *zc, *xr, *yr, *zr;
|
||||
bool *trigger;
|
||||
double *dTT;
|
||||
int *dumpid;
|
||||
|
||||
public:
|
||||
void AH_Prepare_derivatives();
|
||||
bool AH_Interp_Points(MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetryi);
|
||||
void AH_Step_Find(int lev, double dT_lev);
|
||||
#endif
|
||||
};
|
||||
#endif /* BSSN_GPU_CLASS_H */
|
||||
@@ -59,9 +59,10 @@
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
|
||||
real*8,intent(in) :: eps
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: ham_Res, movx_Res, movy_Res, movz_Res
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
|
||||
! gont = 0: success; gont = 1: something wrong
|
||||
integer::gont
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
|
||||
! gont = 0: success; gont = 1: something wrong
|
||||
integer::gont
|
||||
integer :: i,j,k
|
||||
|
||||
!~~~~~~> Other variables:
|
||||
|
||||
@@ -83,11 +84,18 @@
|
||||
real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
|
||||
real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
|
||||
|
||||
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
|
||||
real*8 :: dX, dY, dZ, PI
|
||||
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
|
||||
real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
|
||||
real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
|
||||
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
|
||||
real*8 :: dX, dY, dZ, PI
|
||||
real*8 :: divb_loc,det_loc
|
||||
real*8 :: gupxx_loc,gupxy_loc,gupxz_loc,gupyy_loc,gupyz_loc,gupzz_loc
|
||||
real*8 :: Rxx_loc,Rxy_loc,Rxz_loc,Ryy_loc,Ryz_loc,Rzz_loc
|
||||
real*8 :: fxx_loc,fxy_loc,fxz_loc
|
||||
real*8 :: Gamxa_loc,Gamya_loc,Gamza_loc
|
||||
real*8 :: f_loc,chin_loc
|
||||
real*8 :: l_fxx,l_fxy,l_fxz,l_fyy,l_fyz,l_fzz,S_loc
|
||||
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
|
||||
real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
|
||||
real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
|
||||
double precision,parameter::FF = 0.75d0,eta=2.d0
|
||||
real*8, parameter :: F1o3 = 1.D0/3.D0, F2o3 = 2.D0/3.D0,F3o2=1.5d0, F1o6 = 1.D0/6.D0
|
||||
real*8, parameter :: F16=1.6d1,F8=8.d0
|
||||
@@ -96,11 +104,11 @@
|
||||
real*8, dimension(ex(1),ex(2),ex(3)) :: reta
|
||||
#endif
|
||||
|
||||
#if (GAUGE == 6 || GAUGE == 7)
|
||||
integer :: BHN,i,j,k
|
||||
real*8, dimension(9) :: Porg
|
||||
real*8, dimension(3) :: Mass
|
||||
real*8 :: r1,r2,M,A,w1,w2,C1,C2
|
||||
#if (GAUGE == 6 || GAUGE == 7)
|
||||
integer :: BHN
|
||||
real*8, dimension(9) :: Porg
|
||||
real*8, dimension(3) :: Mass
|
||||
real*8 :: r1,r2,M,A,w1,w2,C1,C2
|
||||
real*8, dimension(ex(1),ex(2),ex(3)) :: reta
|
||||
|
||||
call getpbh(BHN,Porg,Mass)
|
||||
@@ -145,174 +153,204 @@
|
||||
dY = Y(2) - Y(1)
|
||||
dZ = Z(2) - Z(1)
|
||||
|
||||
alpn1 = Lap + ONE
|
||||
chin1 = chi + ONE
|
||||
gxx = dxx + ONE
|
||||
gyy = dyy + ONE
|
||||
gzz = dzz + ONE
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
alpn1(i,j,k) = Lap(i,j,k) + ONE
|
||||
chin1(i,j,k) = chi(i,j,k) + ONE
|
||||
gxx(i,j,k) = dxx(i,j,k) + ONE
|
||||
gyy(i,j,k) = dyy(i,j,k) + ONE
|
||||
gzz(i,j,k) = dzz(i,j,k) + ONE
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
|
||||
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
|
||||
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
|
||||
|
||||
div_beta = betaxx + betayy + betazz
|
||||
|
||||
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
||||
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
||||
|
||||
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
|
||||
|
||||
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
|
||||
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
|
||||
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
||||
|
||||
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
|
||||
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
|
||||
|
||||
gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
|
||||
TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
|
||||
|
||||
gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
|
||||
TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
|
||||
|
||||
gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
|
||||
gxx * betaxy + gxz * betazy + &
|
||||
gyy * betayx + gyz * betazx &
|
||||
- gxy * betazz
|
||||
|
||||
gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
|
||||
gxy * betaxz + gyy * betayz + &
|
||||
gxz * betaxy + gzz * betazy &
|
||||
- gyz * betaxx
|
||||
|
||||
gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
|
||||
gxx * betaxz + gxy * betayz + &
|
||||
gyz * betayx + gzz * betazx &
|
||||
- gxz * betayy !rhs for gij
|
||||
|
||||
! invert tilted metric
|
||||
gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
|
||||
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
|
||||
gupxx = ( gyy * gzz - gyz * gyz ) / gupzz
|
||||
gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
|
||||
gupxz = ( gxy * gyz - gyy * gxz ) / gupzz
|
||||
gupyy = ( gxx * gzz - gxz * gxz ) / gupzz
|
||||
gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
|
||||
gupzz = ( gxx * gyy - gxy * gxy ) / gupzz
|
||||
|
||||
if(co == 0)then
|
||||
! Gam^i_Res = Gam^i + gup^ij_,j
|
||||
Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)&
|
||||
+gupxy*(gupxx*gxyx+gupxy*gyyx+gupxz*gyzx)&
|
||||
+gupxz*(gupxx*gxzx+gupxy*gyzx+gupxz*gzzx)&
|
||||
+gupxx*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
|
||||
+gupxy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
|
||||
+gupxz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
|
||||
+gupxx*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
|
||||
+gupxy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
|
||||
+gupxz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
|
||||
Gmy_Res = Gamy - (gupxx*(gupxy*gxxx+gupyy*gxyx+gupyz*gxzx)&
|
||||
+gupxy*(gupxy*gxyx+gupyy*gyyx+gupyz*gyzx)&
|
||||
+gupxz*(gupxy*gxzx+gupyy*gyzx+gupyz*gzzx)&
|
||||
+gupxy*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
|
||||
+gupyy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
|
||||
+gupyz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
|
||||
+gupxy*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
|
||||
+gupyy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
|
||||
+gupyz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
|
||||
Gmz_Res = Gamz - (gupxx*(gupxz*gxxx+gupyz*gxyx+gupzz*gxzx)&
|
||||
+gupxy*(gupxz*gxyx+gupyz*gyyx+gupzz*gyzx)&
|
||||
+gupxz*(gupxz*gxzx+gupyz*gyzx+gupzz*gzzx)&
|
||||
+gupxy*(gupxz*gxxy+gupyz*gxyy+gupzz*gxzy)&
|
||||
+gupyy*(gupxz*gxyy+gupyz*gyyy+gupzz*gyzy)&
|
||||
+gupyz*(gupxz*gxzy+gupyz*gyzy+gupzz*gzzy)&
|
||||
+gupxz*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
|
||||
+gupyz*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
|
||||
+gupzz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
|
||||
endif
|
||||
|
||||
! second kind of connection
|
||||
Gamxxx =HALF*( gupxx*gxxx + gupxy*(TWO*gxyx - gxxy ) + gupxz*(TWO*gxzx - gxxz ))
|
||||
Gamyxx =HALF*( gupxy*gxxx + gupyy*(TWO*gxyx - gxxy ) + gupyz*(TWO*gxzx - gxxz ))
|
||||
Gamzxx =HALF*( gupxz*gxxx + gupyz*(TWO*gxyx - gxxy ) + gupzz*(TWO*gxzx - gxxz ))
|
||||
|
||||
Gamxyy =HALF*( gupxx*(TWO*gxyy - gyyx ) + gupxy*gyyy + gupxz*(TWO*gyzy - gyyz ))
|
||||
Gamyyy =HALF*( gupxy*(TWO*gxyy - gyyx ) + gupyy*gyyy + gupyz*(TWO*gyzy - gyyz ))
|
||||
Gamzyy =HALF*( gupxz*(TWO*gxyy - gyyx ) + gupyz*gyyy + gupzz*(TWO*gyzy - gyyz ))
|
||||
|
||||
Gamxzz =HALF*( gupxx*(TWO*gxzz - gzzx ) + gupxy*(TWO*gyzz - gzzy ) + gupxz*gzzz)
|
||||
Gamyzz =HALF*( gupxy*(TWO*gxzz - gzzx ) + gupyy*(TWO*gyzz - gzzy ) + gupyz*gzzz)
|
||||
Gamzzz =HALF*( gupxz*(TWO*gxzz - gzzx ) + gupyz*(TWO*gyzz - gzzy ) + gupzz*gzzz)
|
||||
|
||||
Gamxxy =HALF*( gupxx*gxxy + gupxy*gyyx + gupxz*( gxzy + gyzx - gxyz ) )
|
||||
Gamyxy =HALF*( gupxy*gxxy + gupyy*gyyx + gupyz*( gxzy + gyzx - gxyz ) )
|
||||
Gamzxy =HALF*( gupxz*gxxy + gupyz*gyyx + gupzz*( gxzy + gyzx - gxyz ) )
|
||||
|
||||
Gamxxz =HALF*( gupxx*gxxz + gupxy*( gxyz + gyzx - gxzy ) + gupxz*gzzx )
|
||||
Gamyxz =HALF*( gupxy*gxxz + gupyy*( gxyz + gyzx - gxzy ) + gupyz*gzzx )
|
||||
Gamzxz =HALF*( gupxz*gxxz + gupyz*( gxyz + gyzx - gxzy ) + gupzz*gzzx )
|
||||
|
||||
Gamxyz =HALF*( gupxx*( gxyz + gxzy - gyzx ) + gupxy*gyyz + gupxz*gzzy )
|
||||
Gamyyz =HALF*( gupxy*( gxyz + gxzy - gyzx ) + gupyy*gyyz + gupyz*gzzy )
|
||||
Gamzyz =HALF*( gupxz*( gxyz + gxzy - gyzx ) + gupyz*gyyz + gupzz*gzzy )
|
||||
! Raise indices of \tilde A_{ij} and store in R_ij
|
||||
|
||||
Rxx = gupxx * gupxx * Axx + gupxy * gupxy * Ayy + gupxz * gupxz * Azz + &
|
||||
TWO*(gupxx * gupxy * Axy + gupxx * gupxz * Axz + gupxy * gupxz * Ayz)
|
||||
|
||||
Ryy = gupxy * gupxy * Axx + gupyy * gupyy * Ayy + gupyz * gupyz * Azz + &
|
||||
TWO*(gupxy * gupyy * Axy + gupxy * gupyz * Axz + gupyy * gupyz * Ayz)
|
||||
|
||||
Rzz = gupxz * gupxz * Axx + gupyz * gupyz * Ayy + gupzz * gupzz * Azz + &
|
||||
TWO*(gupxz * gupyz * Axy + gupxz * gupzz * Axz + gupyz * gupzz * Ayz)
|
||||
|
||||
Rxy = gupxx * gupxy * Axx + gupxy * gupyy * Ayy + gupxz * gupyz * Azz + &
|
||||
(gupxx * gupyy + gupxy * gupxy)* Axy + &
|
||||
(gupxx * gupyz + gupxz * gupxy)* Axz + &
|
||||
(gupxy * gupyz + gupxz * gupyy)* Ayz
|
||||
|
||||
Rxz = gupxx * gupxz * Axx + gupxy * gupyz * Ayy + gupxz * gupzz * Azz + &
|
||||
(gupxx * gupyz + gupxy * gupxz)* Axy + &
|
||||
(gupxx * gupzz + gupxz * gupxz)* Axz + &
|
||||
(gupxy * gupzz + gupxz * gupyz)* Ayz
|
||||
|
||||
Ryz = gupxy * gupxz * Axx + gupyy * gupyz * Ayy + gupyz * gupzz * Azz + &
|
||||
(gupxy * gupyz + gupyy * gupxz)* Axy + &
|
||||
(gupxy * gupzz + gupyz * gupxz)* Axz + &
|
||||
(gupyy * gupzz + gupyz * gupyz)* Ayz
|
||||
|
||||
! Right hand side for Gam^i without shift terms...
|
||||
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
||||
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
||||
|
||||
Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
|
||||
TWO * alpn1 * ( &
|
||||
-F3o2/chin1 * ( chix * Rxx + chiy * Rxy + chiz * Rxz ) - &
|
||||
gupxx * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
|
||||
gupxy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
|
||||
gupxz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
|
||||
Gamxxx * Rxx + Gamxyy * Ryy + Gamxzz * Rzz + &
|
||||
TWO * ( Gamxxy * Rxy + Gamxxz * Rxz + Gamxyz * Ryz ) )
|
||||
|
||||
Gamy_rhs = - TWO * ( Lapx * Rxy + Lapy * Ryy + Lapz * Ryz ) + &
|
||||
TWO * alpn1 * ( &
|
||||
-F3o2/chin1 * ( chix * Rxy + chiy * Ryy + chiz * Ryz ) - &
|
||||
gupxy * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
|
||||
gupyy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
|
||||
gupyz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
|
||||
Gamyxx * Rxx + Gamyyy * Ryy + Gamyzz * Rzz + &
|
||||
TWO * ( Gamyxy * Rxy + Gamyxz * Rxz + Gamyyz * Ryz ) )
|
||||
|
||||
Gamz_rhs = - TWO * ( Lapx * Rxz + Lapy * Ryz + Lapz * Rzz ) + &
|
||||
TWO * alpn1 * ( &
|
||||
-F3o2/chin1 * ( chix * Rxz + chiy * Ryz + chiz * Rzz ) - &
|
||||
gupxz * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
|
||||
gupyz * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
|
||||
gupzz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
|
||||
Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
|
||||
TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
|
||||
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
|
||||
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
|
||||
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
||||
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
divb_loc = betaxx(i,j,k) + betayy(i,j,k) + betazz(i,j,k)
|
||||
div_beta(i,j,k) = divb_loc
|
||||
|
||||
chi_rhs(i,j,k) = F2o3 * chin1(i,j,k) * (alpn1(i,j,k) * trK(i,j,k) - divb_loc)
|
||||
|
||||
gxx_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axx(i,j,k) - F2o3 * gxx(i,j,k) * divb_loc + &
|
||||
TWO * ( gxx(i,j,k) * betaxx(i,j,k) + gxy(i,j,k) * betayx(i,j,k) + gxz(i,j,k) * betazx(i,j,k) )
|
||||
|
||||
gyy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayy(i,j,k) - F2o3 * gyy(i,j,k) * divb_loc + &
|
||||
TWO * ( gxy(i,j,k) * betaxy(i,j,k) + gyy(i,j,k) * betayy(i,j,k) + gyz(i,j,k) * betazy(i,j,k) )
|
||||
|
||||
gzz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Azz(i,j,k) - F2o3 * gzz(i,j,k) * divb_loc + &
|
||||
TWO * ( gxz(i,j,k) * betaxz(i,j,k) + gyz(i,j,k) * betayz(i,j,k) + gzz(i,j,k) * betazz(i,j,k) )
|
||||
|
||||
gxy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axy(i,j,k) + F1o3 * gxy(i,j,k) * divb_loc + &
|
||||
gxx(i,j,k) * betaxy(i,j,k) + gxz(i,j,k) * betazy(i,j,k) + gyy(i,j,k) * betayx(i,j,k) + &
|
||||
gyz(i,j,k) * betazx(i,j,k) - gxy(i,j,k) * betazz(i,j,k)
|
||||
|
||||
gyz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayz(i,j,k) + F1o3 * gyz(i,j,k) * divb_loc + &
|
||||
gxy(i,j,k) * betaxz(i,j,k) + gyy(i,j,k) * betayz(i,j,k) + gxz(i,j,k) * betaxy(i,j,k) + &
|
||||
gzz(i,j,k) * betazy(i,j,k) - gyz(i,j,k) * betaxx(i,j,k)
|
||||
|
||||
gxz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axz(i,j,k) + F1o3 * gxz(i,j,k) * divb_loc + &
|
||||
gxx(i,j,k) * betaxz(i,j,k) + gxy(i,j,k) * betayz(i,j,k) + gyz(i,j,k) * betayx(i,j,k) + &
|
||||
gzz(i,j,k) * betazx(i,j,k) - gxz(i,j,k) * betayy(i,j,k)
|
||||
|
||||
det_loc = gxx(i,j,k) * gyy(i,j,k) * gzz(i,j,k) + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) + &
|
||||
gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) - gxz(i,j,k) * gyy(i,j,k) * gxz(i,j,k) - &
|
||||
gxy(i,j,k) * gxy(i,j,k) * gzz(i,j,k) - gxx(i,j,k) * gyz(i,j,k) * gyz(i,j,k)
|
||||
gupxx_loc = ( gyy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gyz(i,j,k) ) / det_loc
|
||||
gupxy_loc = - ( gxy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gxz(i,j,k) ) / det_loc
|
||||
gupxz_loc = ( gxy(i,j,k) * gyz(i,j,k) - gyy(i,j,k) * gxz(i,j,k) ) / det_loc
|
||||
gupyy_loc = ( gxx(i,j,k) * gzz(i,j,k) - gxz(i,j,k) * gxz(i,j,k) ) / det_loc
|
||||
gupyz_loc = - ( gxx(i,j,k) * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / det_loc
|
||||
gupzz_loc = ( gxx(i,j,k) * gyy(i,j,k) - gxy(i,j,k) * gxy(i,j,k) ) / det_loc
|
||||
gupxx(i,j,k) = gupxx_loc
|
||||
gupxy(i,j,k) = gupxy_loc
|
||||
gupxz(i,j,k) = gupxz_loc
|
||||
gupyy(i,j,k) = gupyy_loc
|
||||
gupyz(i,j,k) = gupyz_loc
|
||||
gupzz(i,j,k) = gupzz_loc
|
||||
|
||||
if(co == 0)then
|
||||
Gmx_Res(i,j,k) = Gamx(i,j,k) - ( &
|
||||
gupxx_loc*(gupxx_loc*gxxx(i,j,k)+gupxy_loc*gxyx(i,j,k)+gupxz_loc*gxzx(i,j,k)) + &
|
||||
gupxy_loc*(gupxx_loc*gxyx(i,j,k)+gupxy_loc*gyyx(i,j,k)+gupxz_loc*gyzx(i,j,k)) + &
|
||||
gupxz_loc*(gupxx_loc*gxzx(i,j,k)+gupxy_loc*gyzx(i,j,k)+gupxz_loc*gzzx(i,j,k)) + &
|
||||
gupxx_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + &
|
||||
gupxy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + &
|
||||
gupxz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + &
|
||||
gupxx_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
|
||||
gupxy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
|
||||
gupxz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
|
||||
Gmy_Res(i,j,k) = Gamy(i,j,k) - ( &
|
||||
gupxx_loc*(gupxy_loc*gxxx(i,j,k)+gupyy_loc*gxyx(i,j,k)+gupyz_loc*gxzx(i,j,k)) + &
|
||||
gupxy_loc*(gupxy_loc*gxyx(i,j,k)+gupyy_loc*gyyx(i,j,k)+gupyz_loc*gyzx(i,j,k)) + &
|
||||
gupxz_loc*(gupxy_loc*gxzx(i,j,k)+gupyy_loc*gyzx(i,j,k)+gupyz_loc*gzzx(i,j,k)) + &
|
||||
gupxy_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + &
|
||||
gupyy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + &
|
||||
gupyz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + &
|
||||
gupxy_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
|
||||
gupyy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
|
||||
gupyz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
|
||||
Gmz_Res(i,j,k) = Gamz(i,j,k) - ( &
|
||||
gupxx_loc*(gupxz_loc*gxxx(i,j,k)+gupyz_loc*gxyx(i,j,k)+gupzz_loc*gxzx(i,j,k)) + &
|
||||
gupxy_loc*(gupxz_loc*gxyx(i,j,k)+gupyz_loc*gyyx(i,j,k)+gupzz_loc*gyzx(i,j,k)) + &
|
||||
gupxz_loc*(gupxz_loc*gxzx(i,j,k)+gupyz_loc*gyzx(i,j,k)+gupzz_loc*gzzx(i,j,k)) + &
|
||||
gupxy_loc*(gupxz_loc*gxxy(i,j,k)+gupyz_loc*gxyy(i,j,k)+gupzz_loc*gxzy(i,j,k)) + &
|
||||
gupyy_loc*(gupxz_loc*gxyy(i,j,k)+gupyz_loc*gyyy(i,j,k)+gupzz_loc*gyzy(i,j,k)) + &
|
||||
gupyz_loc*(gupxz_loc*gxzy(i,j,k)+gupyz_loc*gyzy(i,j,k)+gupzz_loc*gzzy(i,j,k)) + &
|
||||
gupxz_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
|
||||
gupyz_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
|
||||
gupzz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
|
||||
endif
|
||||
|
||||
Gamxxx(i,j,k)=HALF*( gupxx_loc*gxxx(i,j,k) + gupxy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupxz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k)))
|
||||
Gamyxx(i,j,k)=HALF*( gupxy_loc*gxxx(i,j,k) + gupyy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupyz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k)))
|
||||
Gamzxx(i,j,k)=HALF*( gupxz_loc*gxxx(i,j,k) + gupyz_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupzz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k)))
|
||||
|
||||
Gamxyy(i,j,k)=HALF*( gupxx_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupxy_loc*gyyy(i,j,k) + gupxz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k)))
|
||||
Gamyyy(i,j,k)=HALF*( gupxy_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyy_loc*gyyy(i,j,k) + gupyz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k)))
|
||||
Gamzyy(i,j,k)=HALF*( gupxz_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyz_loc*gyyy(i,j,k) + gupzz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k)))
|
||||
|
||||
Gamxzz(i,j,k)=HALF*( gupxx_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupxy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupxz_loc*gzzz(i,j,k))
|
||||
Gamyzz(i,j,k)=HALF*( gupxy_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupyz_loc*gzzz(i,j,k))
|
||||
Gamzzz(i,j,k)=HALF*( gupxz_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyz_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupzz_loc*gzzz(i,j,k))
|
||||
|
||||
Gamxxy(i,j,k)=HALF*( gupxx_loc*gxxy(i,j,k) + gupxy_loc*gyyx(i,j,k) + gupxz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) )
|
||||
Gamyxy(i,j,k)=HALF*( gupxy_loc*gxxy(i,j,k) + gupyy_loc*gyyx(i,j,k) + gupyz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) )
|
||||
Gamzxy(i,j,k)=HALF*( gupxz_loc*gxxy(i,j,k) + gupyz_loc*gyyx(i,j,k) + gupzz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) )
|
||||
|
||||
Gamxxz(i,j,k)=HALF*( gupxx_loc*gxxz(i,j,k) + gupxy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupxz_loc*gzzx(i,j,k) )
|
||||
Gamyxz(i,j,k)=HALF*( gupxy_loc*gxxz(i,j,k) + gupyy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupyz_loc*gzzx(i,j,k) )
|
||||
Gamzxz(i,j,k)=HALF*( gupxz_loc*gxxz(i,j,k) + gupyz_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupzz_loc*gzzx(i,j,k) )
|
||||
|
||||
Gamxyz(i,j,k)=HALF*( gupxx_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupxy_loc*gyyz(i,j,k) + gupxz_loc*gzzy(i,j,k) )
|
||||
Gamyyz(i,j,k)=HALF*( gupxy_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyy_loc*gyyz(i,j,k) + gupyz_loc*gzzy(i,j,k) )
|
||||
Gamzyz(i,j,k)=HALF*( gupxz_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyz_loc*gyyz(i,j,k) + gupzz_loc*gzzy(i,j,k) )
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
! Raise indices of \tilde A_{ij} and store in R_ij
|
||||
|
||||
! Right hand side for Gam^i without shift terms...
|
||||
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
||||
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
gupxx_loc = gupxx(i,j,k)
|
||||
gupxy_loc = gupxy(i,j,k)
|
||||
gupxz_loc = gupxz(i,j,k)
|
||||
gupyy_loc = gupyy(i,j,k)
|
||||
gupyz_loc = gupyz(i,j,k)
|
||||
gupzz_loc = gupzz(i,j,k)
|
||||
|
||||
Rxx_loc = gupxx_loc * gupxx_loc * Axx(i,j,k) + gupxy_loc * gupxy_loc * Ayy(i,j,k) + gupxz_loc * gupxz_loc * Azz(i,j,k) + &
|
||||
TWO * (gupxx_loc * gupxy_loc * Axy(i,j,k) + gupxx_loc * gupxz_loc * Axz(i,j,k) + gupxy_loc * gupxz_loc * Ayz(i,j,k))
|
||||
Ryy_loc = gupxy_loc * gupxy_loc * Axx(i,j,k) + gupyy_loc * gupyy_loc * Ayy(i,j,k) + gupyz_loc * gupyz_loc * Azz(i,j,k) + &
|
||||
TWO * (gupxy_loc * gupyy_loc * Axy(i,j,k) + gupxy_loc * gupyz_loc * Axz(i,j,k) + gupyy_loc * gupyz_loc * Ayz(i,j,k))
|
||||
Rzz_loc = gupxz_loc * gupxz_loc * Axx(i,j,k) + gupyz_loc * gupyz_loc * Ayy(i,j,k) + gupzz_loc * gupzz_loc * Azz(i,j,k) + &
|
||||
TWO * (gupxz_loc * gupyz_loc * Axy(i,j,k) + gupxz_loc * gupzz_loc * Axz(i,j,k) + gupyz_loc * gupzz_loc * Ayz(i,j,k))
|
||||
Rxy_loc = gupxx_loc * gupxy_loc * Axx(i,j,k) + gupxy_loc * gupyy_loc * Ayy(i,j,k) + gupxz_loc * gupyz_loc * Azz(i,j,k) + &
|
||||
(gupxx_loc * gupyy_loc + gupxy_loc * gupxy_loc) * Axy(i,j,k) + &
|
||||
(gupxx_loc * gupyz_loc + gupxz_loc * gupxy_loc) * Axz(i,j,k) + &
|
||||
(gupxy_loc * gupyz_loc + gupxz_loc * gupyy_loc) * Ayz(i,j,k)
|
||||
Rxz_loc = gupxx_loc * gupxz_loc * Axx(i,j,k) + gupxy_loc * gupyz_loc * Ayy(i,j,k) + gupxz_loc * gupzz_loc * Azz(i,j,k) + &
|
||||
(gupxx_loc * gupyz_loc + gupxy_loc * gupxz_loc) * Axy(i,j,k) + &
|
||||
(gupxx_loc * gupzz_loc + gupxz_loc * gupxz_loc) * Axz(i,j,k) + &
|
||||
(gupxy_loc * gupzz_loc + gupxz_loc * gupyz_loc) * Ayz(i,j,k)
|
||||
Ryz_loc = gupxy_loc * gupxz_loc * Axx(i,j,k) + gupyy_loc * gupyz_loc * Ayy(i,j,k) + gupyz_loc * gupzz_loc * Azz(i,j,k) + &
|
||||
(gupxy_loc * gupyz_loc + gupyy_loc * gupxz_loc) * Axy(i,j,k) + &
|
||||
(gupxy_loc * gupzz_loc + gupyz_loc * gupxz_loc) * Axz(i,j,k) + &
|
||||
(gupyy_loc * gupzz_loc + gupyz_loc * gupyz_loc) * Ayz(i,j,k)
|
||||
Rxx(i,j,k) = Rxx_loc
|
||||
Ryy(i,j,k) = Ryy_loc
|
||||
Rzz(i,j,k) = Rzz_loc
|
||||
Rxy(i,j,k) = Rxy_loc
|
||||
Rxz(i,j,k) = Rxz_loc
|
||||
Ryz(i,j,k) = Ryz_loc
|
||||
|
||||
Gamx_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxx_loc + Lapy(i,j,k) * Rxy_loc + Lapz(i,j,k) * Rxz_loc) + &
|
||||
TWO * alpn1(i,j,k) * ( &
|
||||
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxx_loc + chiy(i,j,k) * Rxy_loc + chiz(i,j,k) * Rxz_loc) - &
|
||||
gupxx_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
|
||||
gupxy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
|
||||
gupxz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
|
||||
Gamxxx(i,j,k) * Rxx_loc + Gamxyy(i,j,k) * Ryy_loc + Gamxzz(i,j,k) * Rzz_loc + &
|
||||
TWO * (Gamxxy(i,j,k) * Rxy_loc + Gamxxz(i,j,k) * Rxz_loc + Gamxyz(i,j,k) * Ryz_loc))
|
||||
|
||||
Gamy_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxy_loc + Lapy(i,j,k) * Ryy_loc + Lapz(i,j,k) * Ryz_loc) + &
|
||||
TWO * alpn1(i,j,k) * ( &
|
||||
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxy_loc + chiy(i,j,k) * Ryy_loc + chiz(i,j,k) * Ryz_loc) - &
|
||||
gupxy_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
|
||||
gupyy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
|
||||
gupyz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
|
||||
Gamyxx(i,j,k) * Rxx_loc + Gamyyy(i,j,k) * Ryy_loc + Gamyzz(i,j,k) * Rzz_loc + &
|
||||
TWO * (Gamyxy(i,j,k) * Rxy_loc + Gamyxz(i,j,k) * Rxz_loc + Gamyyz(i,j,k) * Ryz_loc))
|
||||
|
||||
Gamz_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxz_loc + Lapy(i,j,k) * Ryz_loc + Lapz(i,j,k) * Rzz_loc) + &
|
||||
TWO * alpn1(i,j,k) * ( &
|
||||
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxz_loc + chiy(i,j,k) * Ryz_loc + chiz(i,j,k) * Rzz_loc) - &
|
||||
gupxz_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
|
||||
gupyz_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
|
||||
gupzz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
|
||||
Gamzxx(i,j,k) * Rxx_loc + Gamzyy(i,j,k) * Ryy_loc + Gamzzz(i,j,k) * Rzz_loc + &
|
||||
TWO * (Gamzxy(i,j,k) * Rxy_loc + Gamzxz(i,j,k) * Rxz_loc + Gamzyz(i,j,k) * Ryz_loc))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
|
||||
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
|
||||
@@ -321,38 +359,54 @@
|
||||
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
|
||||
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
|
||||
|
||||
fxx = gxxx + gxyy + gxzz
|
||||
fxy = gxyx + gyyy + gyzz
|
||||
fxz = gxzx + gyzy + gzzz
|
||||
|
||||
Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz + &
|
||||
TWO*( gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz )
|
||||
Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz + &
|
||||
TWO*( gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz )
|
||||
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
|
||||
TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
|
||||
|
||||
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
|
||||
|
||||
Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
|
||||
Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
|
||||
F1o3 * (gupxx * fxx + gupxy * fxy + gupxz * fxz ) + &
|
||||
gupxx * gxxx + gupyy * gyyx + gupzz * gzzx + &
|
||||
TWO * (gupxy * gxyx + gupxz * gxzx + gupyz * gyzx )
|
||||
|
||||
Gamy_rhs = Gamy_rhs + F2o3 * Gamya * div_beta - &
|
||||
Gamxa * betayx - Gamya * betayy - Gamza * betayz + &
|
||||
F1o3 * (gupxy * fxx + gupyy * fxy + gupyz * fxz ) + &
|
||||
gupxx * gxxy + gupyy * gyyy + gupzz * gzzy + &
|
||||
TWO * (gupxy * gxyy + gupxz * gxzy + gupyz * gyzy )
|
||||
|
||||
Gamz_rhs = Gamz_rhs + F2o3 * Gamza * div_beta - &
|
||||
Gamxa * betazx - Gamya * betazy - Gamza * betazz + &
|
||||
F1o3 * (gupxz * fxx + gupyz * fxy + gupzz * fxz ) + &
|
||||
gupxx * gxxz + gupyy * gyyz + gupzz * gzzz + &
|
||||
TWO * (gupxy * gxyz + gupxz * gxzz + gupyz * gyzz ) !rhs for Gam^i
|
||||
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
|
||||
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
divb_loc = div_beta(i,j,k)
|
||||
fxx_loc = gxxx(i,j,k) + gxyy(i,j,k) + gxzz(i,j,k)
|
||||
fxy_loc = gxyx(i,j,k) + gyyy(i,j,k) + gyzz(i,j,k)
|
||||
fxz_loc = gxzx(i,j,k) + gyzy(i,j,k) + gzzz(i,j,k)
|
||||
|
||||
gupxx_loc = gupxx(i,j,k)
|
||||
gupxy_loc = gupxy(i,j,k)
|
||||
gupxz_loc = gupxz(i,j,k)
|
||||
gupyy_loc = gupyy(i,j,k)
|
||||
gupyz_loc = gupyz(i,j,k)
|
||||
gupzz_loc = gupzz(i,j,k)
|
||||
|
||||
Gamxa_loc = gupxx_loc * Gamxxx(i,j,k) + gupyy_loc * Gamxyy(i,j,k) + gupzz_loc * Gamxzz(i,j,k) + &
|
||||
TWO * (gupxy_loc * Gamxxy(i,j,k) + gupxz_loc * Gamxxz(i,j,k) + gupyz_loc * Gamxyz(i,j,k))
|
||||
Gamya_loc = gupxx_loc * Gamyxx(i,j,k) + gupyy_loc * Gamyyy(i,j,k) + gupzz_loc * Gamyzz(i,j,k) + &
|
||||
TWO * (gupxy_loc * Gamyxy(i,j,k) + gupxz_loc * Gamyxz(i,j,k) + gupyz_loc * Gamyyz(i,j,k))
|
||||
Gamza_loc = gupxx_loc * Gamzxx(i,j,k) + gupyy_loc * Gamzyy(i,j,k) + gupzz_loc * Gamzzz(i,j,k) + &
|
||||
TWO * (gupxy_loc * Gamzxy(i,j,k) + gupxz_loc * Gamzxz(i,j,k) + gupyz_loc * Gamzyz(i,j,k))
|
||||
Gamxa(i,j,k) = Gamxa_loc
|
||||
Gamya(i,j,k) = Gamya_loc
|
||||
Gamza(i,j,k) = Gamza_loc
|
||||
|
||||
Gamx_rhs(i,j,k) = Gamx_rhs(i,j,k) + F2o3 * Gamxa_loc * divb_loc - &
|
||||
Gamxa_loc * betaxx(i,j,k) - Gamya_loc * betaxy(i,j,k) - Gamza_loc * betaxz(i,j,k) + &
|
||||
F1o3 * (gupxx_loc * fxx_loc + gupxy_loc * fxy_loc + gupxz_loc * fxz_loc) + &
|
||||
gupxx_loc * gxxx(i,j,k) + gupyy_loc * gyyx(i,j,k) + gupzz_loc * gzzx(i,j,k) + &
|
||||
TWO * (gupxy_loc * gxyx(i,j,k) + gupxz_loc * gxzx(i,j,k) + gupyz_loc * gyzx(i,j,k))
|
||||
|
||||
Gamy_rhs(i,j,k) = Gamy_rhs(i,j,k) + F2o3 * Gamya_loc * divb_loc - &
|
||||
Gamxa_loc * betayx(i,j,k) - Gamya_loc * betayy(i,j,k) - Gamza_loc * betayz(i,j,k) + &
|
||||
F1o3 * (gupxy_loc * fxx_loc + gupyy_loc * fxy_loc + gupyz_loc * fxz_loc) + &
|
||||
gupxx_loc * gxxy(i,j,k) + gupyy_loc * gyyy(i,j,k) + gupzz_loc * gzzy(i,j,k) + &
|
||||
TWO * (gupxy_loc * gxyy(i,j,k) + gupxz_loc * gxzy(i,j,k) + gupyz_loc * gyzy(i,j,k))
|
||||
|
||||
Gamz_rhs(i,j,k) = Gamz_rhs(i,j,k) + F2o3 * Gamza_loc * divb_loc - &
|
||||
Gamxa_loc * betazx(i,j,k) - Gamya_loc * betazy(i,j,k) - Gamza_loc * betazz(i,j,k) + &
|
||||
F1o3 * (gupxz_loc * fxx_loc + gupyz_loc * fxy_loc + gupzz_loc * fxz_loc) + &
|
||||
gupxx_loc * gxxz(i,j,k) + gupyy_loc * gyyz(i,j,k) + gupzz_loc * gzzz(i,j,k) + &
|
||||
TWO * (gupxy_loc * gxyz(i,j,k) + gupxz_loc * gxzz(i,j,k) + gupyz_loc * gyzz(i,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!first kind of connection stored in gij,k
|
||||
gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx
|
||||
@@ -601,192 +655,190 @@
|
||||
Gamxyz * gxzz + Gamyyz * gyzz + Gamzyz * gzzz + &
|
||||
Gamxzz * gxzy + Gamyzz * gyzy + Gamzzz * gzzy + &
|
||||
Gamxyz * gzzx + Gamyyz * gzzy + Gamzyz * gzzz )
|
||||
!covariant second derivative of chi respect to tilted metric
|
||||
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
||||
|
||||
fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
|
||||
fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
|
||||
fxz = fxz - Gamxxz * chix - Gamyxz * chiy - Gamzxz * chiz
|
||||
fyy = fyy - Gamxyy * chix - Gamyyy * chiy - Gamzyy * chiz
|
||||
fyz = fyz - Gamxyz * chix - Gamyyz * chiy - Gamzyz * chiz
|
||||
fzz = fzz - Gamxzz * chix - Gamyzz * chiy - Gamzzz * chiz
|
||||
! Store D^l D_l chi - 3/(2*chi) D^l chi D_l chi in f
|
||||
|
||||
f = gupxx * ( fxx - F3o2/chin1 * chix * chix ) + &
|
||||
gupyy * ( fyy - F3o2/chin1 * chiy * chiy ) + &
|
||||
gupzz * ( fzz - F3o2/chin1 * chiz * chiz ) + &
|
||||
TWO * gupxy * ( fxy - F3o2/chin1 * chix * chiy ) + &
|
||||
TWO * gupxz * ( fxz - F3o2/chin1 * chix * chiz ) + &
|
||||
TWO * gupyz * ( fyz - F3o2/chin1 * chiy * chiz )
|
||||
! Add chi part to Ricci tensor:
|
||||
|
||||
Rxx = Rxx + (fxx - chix*chix/chin1/TWO + gxx * f)/chin1/TWO
|
||||
Ryy = Ryy + (fyy - chiy*chiy/chin1/TWO + gyy * f)/chin1/TWO
|
||||
Rzz = Rzz + (fzz - chiz*chiz/chin1/TWO + gzz * f)/chin1/TWO
|
||||
Rxy = Rxy + (fxy - chix*chiy/chin1/TWO + gxy * f)/chin1/TWO
|
||||
Rxz = Rxz + (fxz - chix*chiz/chin1/TWO + gxz * f)/chin1/TWO
|
||||
Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
|
||||
|
||||
! covariant second derivatives of the lapse respect to physical metric
|
||||
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
|
||||
SYM,SYM,SYM,symmetry,Lev)
|
||||
|
||||
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
|
||||
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
|
||||
gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
|
||||
! now get physical second kind of connection
|
||||
Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
|
||||
Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
|
||||
Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF
|
||||
Gamxyy = Gamxyy - ( - gyy * gxxx )*HALF
|
||||
Gamyyy = Gamyyy - ( (chiy + chiy)/chin1 - gyy * gxxy )*HALF
|
||||
Gamzyy = Gamzyy - ( - gyy * gxxz )*HALF
|
||||
Gamxzz = Gamxzz - ( - gzz * gxxx )*HALF
|
||||
Gamyzz = Gamyzz - ( - gzz * gxxy )*HALF
|
||||
Gamzzz = Gamzzz - ( (chiz + chiz)/chin1 - gzz * gxxz )*HALF
|
||||
Gamxxy = Gamxxy - ( chiy /chin1 - gxy * gxxx )*HALF
|
||||
Gamyxy = Gamyxy - ( chix /chin1 - gxy * gxxy )*HALF
|
||||
Gamzxy = Gamzxy - ( - gxy * gxxz )*HALF
|
||||
Gamxxz = Gamxxz - ( chiz /chin1 - gxz * gxxx )*HALF
|
||||
Gamyxz = Gamyxz - ( - gxz * gxxy )*HALF
|
||||
Gamzxz = Gamzxz - ( chix /chin1 - gxz * gxxz )*HALF
|
||||
Gamxyz = Gamxyz - ( - gyz * gxxx )*HALF
|
||||
Gamyyz = Gamyyz - ( chiz /chin1 - gyz * gxxy )*HALF
|
||||
Gamzyz = Gamzyz - ( chiy /chin1 - gyz * gxxz )*HALF
|
||||
|
||||
fxx = fxx - Gamxxx*Lapx - Gamyxx*Lapy - Gamzxx*Lapz
|
||||
fyy = fyy - Gamxyy*Lapx - Gamyyy*Lapy - Gamzyy*Lapz
|
||||
fzz = fzz - Gamxzz*Lapx - Gamyzz*Lapy - Gamzzz*Lapz
|
||||
fxy = fxy - Gamxxy*Lapx - Gamyxy*Lapy - Gamzxy*Lapz
|
||||
fxz = fxz - Gamxxz*Lapx - Gamyxz*Lapy - Gamzxz*Lapz
|
||||
fyz = fyz - Gamxyz*Lapx - Gamyyz*Lapy - Gamzyz*Lapz
|
||||
|
||||
! store D^i D_i Lap in trK_rhs upto chi
|
||||
trK_rhs = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz )
|
||||
#if 1
|
||||
!! follow bam code
|
||||
S = chin1 * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
|
||||
TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
|
||||
f = F2o3 * trK * trK -(&
|
||||
gupxx * ( &
|
||||
gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
|
||||
TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz) ) + &
|
||||
gupyy * ( &
|
||||
gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
|
||||
TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz) ) + &
|
||||
gupzz * ( &
|
||||
gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
|
||||
TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz) ) + &
|
||||
TWO * ( &
|
||||
gupxy * ( &
|
||||
gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
|
||||
gupxy * (Axx * Ayy + Axy * Axy) + &
|
||||
gupxz * (Axx * Ayz + Axz * Axy) + &
|
||||
gupyz * (Axy * Ayz + Axz * Ayy) ) + &
|
||||
gupxz * ( &
|
||||
gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
|
||||
gupxy * (Axx * Ayz + Axy * Axz) + &
|
||||
gupxz * (Axx * Azz + Axz * Axz) + &
|
||||
gupyz * (Axy * Azz + Axz * Ayz) ) + &
|
||||
gupyz * ( &
|
||||
gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
|
||||
gupxy * (Axy * Ayz + Ayy * Axz) + &
|
||||
gupxz * (Axy * Azz + Ayz * Axz) + &
|
||||
gupyz * (Ayy * Azz + Ayz * Ayz) ) )) -1.6d1*PI*rho + EIGHT * PI * S
|
||||
f = - F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + alpn1/chin1*f)
|
||||
|
||||
fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
|
||||
fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
|
||||
fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
|
||||
fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
|
||||
fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
|
||||
fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
|
||||
#else
|
||||
! Add lapse and S_ij parts to Ricci tensor:
|
||||
|
||||
fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
|
||||
fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
|
||||
fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
|
||||
fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
|
||||
fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
|
||||
fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
|
||||
|
||||
! Compute trace-free part (note: chi^-1 and chi cancel!):
|
||||
|
||||
f = F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) )
|
||||
#endif
|
||||
|
||||
Axx_rhs = fxx - gxx * f
|
||||
Ayy_rhs = fyy - gyy * f
|
||||
Azz_rhs = fzz - gzz * f
|
||||
Axy_rhs = fxy - gxy * f
|
||||
Axz_rhs = fxz - gxz * f
|
||||
Ayz_rhs = fyz - gyz * f
|
||||
|
||||
! Now: store A_il A^l_j into fij:
|
||||
|
||||
fxx = gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
|
||||
TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz)
|
||||
fyy = gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
|
||||
TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz)
|
||||
fzz = gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
|
||||
TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz)
|
||||
fxy = gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
|
||||
gupxy *(Axx * Ayy + Axy * Axy) + &
|
||||
gupxz *(Axx * Ayz + Axz * Axy) + &
|
||||
gupyz *(Axy * Ayz + Axz * Ayy)
|
||||
fxz = gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
|
||||
gupxy *(Axx * Ayz + Axy * Axz) + &
|
||||
gupxz *(Axx * Azz + Axz * Axz) + &
|
||||
gupyz *(Axy * Azz + Axz * Ayz)
|
||||
fyz = gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
|
||||
gupxy *(Axy * Ayz + Ayy * Axz) + &
|
||||
gupxz *(Axy * Azz + Ayz * Axz) + &
|
||||
gupyz *(Ayy * Azz + Ayz * Ayz)
|
||||
|
||||
f = chin1
|
||||
! store D^i D_i Lap in trK_rhs
|
||||
trK_rhs = f*trK_rhs
|
||||
|
||||
Axx_rhs = f * Axx_rhs+ alpn1 * (trK * Axx - TWO * fxx) + &
|
||||
TWO * ( Axx * betaxx + Axy * betayx + Axz * betazx )- &
|
||||
F2o3 * Axx * div_beta
|
||||
|
||||
Ayy_rhs = f * Ayy_rhs+ alpn1 * (trK * Ayy - TWO * fyy) + &
|
||||
TWO * ( Axy * betaxy + Ayy * betayy + Ayz * betazy )- &
|
||||
F2o3 * Ayy * div_beta
|
||||
|
||||
Azz_rhs = f * Azz_rhs+ alpn1 * (trK * Azz - TWO * fzz) + &
|
||||
TWO * ( Axz * betaxz + Ayz * betayz + Azz * betazz )- &
|
||||
F2o3 * Azz * div_beta
|
||||
|
||||
Axy_rhs = f * Axy_rhs+ alpn1 *( trK * Axy - TWO * fxy )+ &
|
||||
Axx * betaxy + Axz * betazy + &
|
||||
Ayy * betayx + Ayz * betazx + &
|
||||
F1o3 * Axy * div_beta - Axy * betazz
|
||||
|
||||
Ayz_rhs = f * Ayz_rhs+ alpn1 *( trK * Ayz - TWO * fyz )+ &
|
||||
Axy * betaxz + Ayy * betayz + &
|
||||
Axz * betaxy + Azz * betazy + &
|
||||
F1o3 * Ayz * div_beta - Ayz * betaxx
|
||||
|
||||
Axz_rhs = f * Axz_rhs+ alpn1 *( trK * Axz - TWO * fxz )+ &
|
||||
Axx * betaxz + Axy * betayz + &
|
||||
Ayz * betayx + Azz * betazx + &
|
||||
F1o3 * Axz * div_beta - Axz * betayy !rhs for Aij
|
||||
|
||||
! Compute trace of S_ij
|
||||
|
||||
S = f * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
|
||||
TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
|
||||
|
||||
trK_rhs = - trK_rhs + alpn1 *( F1o3 * trK * trK + &
|
||||
gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||
TWO * ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + &
|
||||
FOUR * PI * ( rho + S )) !rhs for trK
|
||||
!covariant second derivative of chi respect to tilted metric
|
||||
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
||||
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k) * chix(i,j,k) - Gamyxx(i,j,k) * chiy(i,j,k) - Gamzxx(i,j,k) * chiz(i,j,k)
|
||||
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k) * chix(i,j,k) - Gamyxy(i,j,k) * chiy(i,j,k) - Gamzxy(i,j,k) * chiz(i,j,k)
|
||||
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k) * chix(i,j,k) - Gamyxz(i,j,k) * chiy(i,j,k) - Gamzxz(i,j,k) * chiz(i,j,k)
|
||||
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k) * chix(i,j,k) - Gamyyy(i,j,k) * chiy(i,j,k) - Gamzyy(i,j,k) * chiz(i,j,k)
|
||||
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k) * chix(i,j,k) - Gamyyz(i,j,k) * chiy(i,j,k) - Gamzyz(i,j,k) * chiz(i,j,k)
|
||||
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k) * chix(i,j,k) - Gamyzz(i,j,k) * chiy(i,j,k) - Gamzzz(i,j,k) * chiz(i,j,k)
|
||||
|
||||
chin_loc = chin1(i,j,k)
|
||||
f_loc = gupxx(i,j,k) * (fxx(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chix(i,j,k)) + &
|
||||
gupyy(i,j,k) * (fyy(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiy(i,j,k)) + &
|
||||
gupzz(i,j,k) * (fzz(i,j,k) - F3o2/chin_loc * chiz(i,j,k) * chiz(i,j,k)) + &
|
||||
TWO * gupxy(i,j,k) * (fxy(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiy(i,j,k)) + &
|
||||
TWO * gupxz(i,j,k) * (fxz(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiz(i,j,k)) + &
|
||||
TWO * gupyz(i,j,k) * (fyz(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiz(i,j,k))
|
||||
f(i,j,k) = f_loc
|
||||
|
||||
Rxx(i,j,k) = Rxx(i,j,k) + (fxx(i,j,k) - chix(i,j,k)*chix(i,j,k)/chin_loc/TWO + gxx(i,j,k) * f_loc)/chin_loc/TWO
|
||||
Ryy(i,j,k) = Ryy(i,j,k) + (fyy(i,j,k) - chiy(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gyy(i,j,k) * f_loc)/chin_loc/TWO
|
||||
Rzz(i,j,k) = Rzz(i,j,k) + (fzz(i,j,k) - chiz(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gzz(i,j,k) * f_loc)/chin_loc/TWO
|
||||
Rxy(i,j,k) = Rxy(i,j,k) + (fxy(i,j,k) - chix(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gxy(i,j,k) * f_loc)/chin_loc/TWO
|
||||
Rxz(i,j,k) = Rxz(i,j,k) + (fxz(i,j,k) - chix(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gxz(i,j,k) * f_loc)/chin_loc/TWO
|
||||
Ryz(i,j,k) = Ryz(i,j,k) + (fyz(i,j,k) - chiy(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gyz(i,j,k) * f_loc)/chin_loc/TWO
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! covariant second derivatives of the lapse respect to physical metric
|
||||
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
|
||||
SYM,SYM,SYM,symmetry,Lev)
|
||||
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
chin_loc = chin1(i,j,k)
|
||||
gxxx(i,j,k) = (gupxx(i,j,k) * chix(i,j,k) + gupxy(i,j,k) * chiy(i,j,k) + gupxz(i,j,k) * chiz(i,j,k)) / chin_loc
|
||||
gxxy(i,j,k) = (gupxy(i,j,k) * chix(i,j,k) + gupyy(i,j,k) * chiy(i,j,k) + gupyz(i,j,k) * chiz(i,j,k)) / chin_loc
|
||||
gxxz(i,j,k) = (gupxz(i,j,k) * chix(i,j,k) + gupyz(i,j,k) * chiy(i,j,k) + gupzz(i,j,k) * chiz(i,j,k)) / chin_loc
|
||||
|
||||
Gamxxx(i,j,k) = Gamxxx(i,j,k) - ( (chix(i,j,k) + chix(i,j,k))/chin_loc - gxx(i,j,k) * gxxx(i,j,k) )*HALF
|
||||
Gamyxx(i,j,k) = Gamyxx(i,j,k) - ( - gxx(i,j,k) * gxxy(i,j,k) )*HALF
|
||||
Gamzxx(i,j,k) = Gamzxx(i,j,k) - ( - gxx(i,j,k) * gxxz(i,j,k) )*HALF
|
||||
Gamxyy(i,j,k) = Gamxyy(i,j,k) - ( - gyy(i,j,k) * gxxx(i,j,k) )*HALF
|
||||
Gamyyy(i,j,k) = Gamyyy(i,j,k) - ( (chiy(i,j,k) + chiy(i,j,k))/chin_loc - gyy(i,j,k) * gxxy(i,j,k) )*HALF
|
||||
Gamzyy(i,j,k) = Gamzyy(i,j,k) - ( - gyy(i,j,k) * gxxz(i,j,k) )*HALF
|
||||
Gamxzz(i,j,k) = Gamxzz(i,j,k) - ( - gzz(i,j,k) * gxxx(i,j,k) )*HALF
|
||||
Gamyzz(i,j,k) = Gamyzz(i,j,k) - ( - gzz(i,j,k) * gxxy(i,j,k) )*HALF
|
||||
Gamzzz(i,j,k) = Gamzzz(i,j,k) - ( (chiz(i,j,k) + chiz(i,j,k))/chin_loc - gzz(i,j,k) * gxxz(i,j,k) )*HALF
|
||||
Gamxxy(i,j,k) = Gamxxy(i,j,k) - ( chiy(i,j,k) /chin_loc - gxy(i,j,k) * gxxx(i,j,k) )*HALF
|
||||
Gamyxy(i,j,k) = Gamyxy(i,j,k) - ( chix(i,j,k) /chin_loc - gxy(i,j,k) * gxxy(i,j,k) )*HALF
|
||||
Gamzxy(i,j,k) = Gamzxy(i,j,k) - ( - gxy(i,j,k) * gxxz(i,j,k) )*HALF
|
||||
Gamxxz(i,j,k) = Gamxxz(i,j,k) - ( chiz(i,j,k) /chin_loc - gxz(i,j,k) * gxxx(i,j,k) )*HALF
|
||||
Gamyxz(i,j,k) = Gamyxz(i,j,k) - ( - gxz(i,j,k) * gxxy(i,j,k) )*HALF
|
||||
Gamzxz(i,j,k) = Gamzxz(i,j,k) - ( chix(i,j,k) /chin_loc - gxz(i,j,k) * gxxz(i,j,k) )*HALF
|
||||
Gamxyz(i,j,k) = Gamxyz(i,j,k) - ( - gyz(i,j,k) * gxxx(i,j,k) )*HALF
|
||||
Gamyyz(i,j,k) = Gamyyz(i,j,k) - ( chiz(i,j,k) /chin_loc - gyz(i,j,k) * gxxy(i,j,k) )*HALF
|
||||
Gamzyz(i,j,k) = Gamzyz(i,j,k) - ( chiy(i,j,k) /chin_loc - gyz(i,j,k) * gxxz(i,j,k) )*HALF
|
||||
|
||||
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k)*Lapx(i,j,k) - Gamyxx(i,j,k)*Lapy(i,j,k) - Gamzxx(i,j,k)*Lapz(i,j,k)
|
||||
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k)*Lapx(i,j,k) - Gamyyy(i,j,k)*Lapy(i,j,k) - Gamzyy(i,j,k)*Lapz(i,j,k)
|
||||
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k)*Lapx(i,j,k) - Gamyzz(i,j,k)*Lapy(i,j,k) - Gamzzz(i,j,k)*Lapz(i,j,k)
|
||||
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k)*Lapx(i,j,k) - Gamyxy(i,j,k)*Lapy(i,j,k) - Gamzxy(i,j,k)*Lapz(i,j,k)
|
||||
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k)*Lapx(i,j,k) - Gamyxz(i,j,k)*Lapy(i,j,k) - Gamzxz(i,j,k)*Lapz(i,j,k)
|
||||
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k)*Lapx(i,j,k) - Gamyyz(i,j,k)*Lapy(i,j,k) - Gamzyz(i,j,k)*Lapz(i,j,k)
|
||||
|
||||
trK_rhs(i,j,k) = gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + &
|
||||
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
divb_loc = div_beta(i,j,k)
|
||||
chin_loc = chin1(i,j,k)
|
||||
|
||||
S_loc = chin_loc * ( gupxx(i,j,k) * Sxx(i,j,k) + gupyy(i,j,k) * Syy(i,j,k) + gupzz(i,j,k) * Szz(i,j,k) + &
|
||||
TWO * (gupxy(i,j,k) * Sxy(i,j,k) + gupxz(i,j,k) * Sxz(i,j,k) + gupyz(i,j,k) * Syz(i,j,k)) )
|
||||
S(i,j,k) = S_loc
|
||||
|
||||
f_loc = F2o3 * trK(i,j,k) * trK(i,j,k) - ( &
|
||||
gupxx(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + &
|
||||
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + &
|
||||
TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + &
|
||||
gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) ) + &
|
||||
gupyy(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + &
|
||||
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
|
||||
TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
|
||||
gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) ) + &
|
||||
gupzz(i,j,k) * ( gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
|
||||
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + &
|
||||
TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
|
||||
gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) ) + &
|
||||
TWO * ( gupxy(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
|
||||
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + &
|
||||
gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + &
|
||||
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
|
||||
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) ) + &
|
||||
gupxz(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
|
||||
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
|
||||
gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + &
|
||||
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
|
||||
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) ) + &
|
||||
gupyz(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + &
|
||||
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + &
|
||||
gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + &
|
||||
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
|
||||
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k)) ) ) ) - &
|
||||
F16 * PI * rho(i,j,k) + EIGHT * PI * S_loc
|
||||
|
||||
f_loc = -F1o3 * ( gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + &
|
||||
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + &
|
||||
alpn1(i,j,k)/chin_loc * f_loc )
|
||||
f(i,j,k) = f_loc
|
||||
|
||||
l_fxx = alpn1(i,j,k) * (Rxx(i,j,k) - EIGHT * PI * Sxx(i,j,k)) - fxx(i,j,k)
|
||||
l_fxy = alpn1(i,j,k) * (Rxy(i,j,k) - EIGHT * PI * Sxy(i,j,k)) - fxy(i,j,k)
|
||||
l_fxz = alpn1(i,j,k) * (Rxz(i,j,k) - EIGHT * PI * Sxz(i,j,k)) - fxz(i,j,k)
|
||||
l_fyy = alpn1(i,j,k) * (Ryy(i,j,k) - EIGHT * PI * Syy(i,j,k)) - fyy(i,j,k)
|
||||
l_fyz = alpn1(i,j,k) * (Ryz(i,j,k) - EIGHT * PI * Syz(i,j,k)) - fyz(i,j,k)
|
||||
l_fzz = alpn1(i,j,k) * (Rzz(i,j,k) - EIGHT * PI * Szz(i,j,k)) - fzz(i,j,k)
|
||||
|
||||
Axx_rhs(i,j,k) = l_fxx - gxx(i,j,k) * f_loc
|
||||
Ayy_rhs(i,j,k) = l_fyy - gyy(i,j,k) * f_loc
|
||||
Azz_rhs(i,j,k) = l_fzz - gzz(i,j,k) * f_loc
|
||||
Axy_rhs(i,j,k) = l_fxy - gxy(i,j,k) * f_loc
|
||||
Axz_rhs(i,j,k) = l_fxz - gxz(i,j,k) * f_loc
|
||||
Ayz_rhs(i,j,k) = l_fyz - gyz(i,j,k) * f_loc
|
||||
|
||||
fxx(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + &
|
||||
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + &
|
||||
gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k))
|
||||
fyy(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + &
|
||||
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
|
||||
gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k))
|
||||
fzz(i,j,k) = gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
|
||||
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + &
|
||||
gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k))
|
||||
fxy(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
|
||||
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + &
|
||||
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
|
||||
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k))
|
||||
fxz(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
|
||||
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + &
|
||||
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
|
||||
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k))
|
||||
fyz(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + &
|
||||
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + &
|
||||
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
|
||||
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k))
|
||||
|
||||
trK_rhs(i,j,k) = chin_loc * trK_rhs(i,j,k)
|
||||
|
||||
Axx_rhs(i,j,k) = chin_loc * Axx_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axx(i,j,k) - TWO * fxx(i,j,k)) + &
|
||||
TWO * (Axx(i,j,k) * betaxx(i,j,k) + Axy(i,j,k) * betayx(i,j,k) + Axz(i,j,k) * betazx(i,j,k)) - &
|
||||
F2o3 * Axx(i,j,k) * divb_loc
|
||||
Ayy_rhs(i,j,k) = chin_loc * Ayy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayy(i,j,k) - TWO * fyy(i,j,k)) + &
|
||||
TWO * (Axy(i,j,k) * betaxy(i,j,k) + Ayy(i,j,k) * betayy(i,j,k) + Ayz(i,j,k) * betazy(i,j,k)) - &
|
||||
F2o3 * Ayy(i,j,k) * divb_loc
|
||||
Azz_rhs(i,j,k) = chin_loc * Azz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Azz(i,j,k) - TWO * fzz(i,j,k)) + &
|
||||
TWO * (Axz(i,j,k) * betaxz(i,j,k) + Ayz(i,j,k) * betayz(i,j,k) + Azz(i,j,k) * betazz(i,j,k)) - &
|
||||
F2o3 * Azz(i,j,k) * divb_loc
|
||||
Axy_rhs(i,j,k) = chin_loc * Axy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axy(i,j,k) - TWO * fxy(i,j,k)) + &
|
||||
Axx(i,j,k) * betaxy(i,j,k) + Axz(i,j,k) * betazy(i,j,k) + Ayy(i,j,k) * betayx(i,j,k) + &
|
||||
Ayz(i,j,k) * betazx(i,j,k) + F1o3 * Axy(i,j,k) * divb_loc - Axy(i,j,k) * betazz(i,j,k)
|
||||
Ayz_rhs(i,j,k) = chin_loc * Ayz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayz(i,j,k) - TWO * fyz(i,j,k)) + &
|
||||
Axy(i,j,k) * betaxz(i,j,k) + Ayy(i,j,k) * betayz(i,j,k) + Axz(i,j,k) * betaxy(i,j,k) + &
|
||||
Azz(i,j,k) * betazy(i,j,k) + F1o3 * Ayz(i,j,k) * divb_loc - Ayz(i,j,k) * betaxx(i,j,k)
|
||||
Axz_rhs(i,j,k) = chin_loc * Axz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axz(i,j,k) - TWO * fxz(i,j,k)) + &
|
||||
Axx(i,j,k) * betaxz(i,j,k) + Axy(i,j,k) * betayz(i,j,k) + Ayz(i,j,k) * betayx(i,j,k) + &
|
||||
Azz(i,j,k) * betazx(i,j,k) + F1o3 * Axz(i,j,k) * divb_loc - Axz(i,j,k) * betayy(i,j,k)
|
||||
|
||||
trK_rhs(i,j,k) = - trK_rhs(i,j,k) + alpn1(i,j,k) * ( F1o3 * trK(i,j,k) * trK(i,j,k) + &
|
||||
gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + &
|
||||
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + &
|
||||
FOUR * PI * (rho(i,j,k) + S_loc) )
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!!!! gauge variable part
|
||||
|
||||
@@ -948,15 +1000,15 @@
|
||||
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
|
||||
! lopsided_kodis shares the symmetry_bd buffer between advection and
|
||||
! dissipation, eliminating redundant full-grid copies. For metric variables
|
||||
! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
|
||||
! so the constant offset has no effect on dissipation.
|
||||
|
||||
call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
! gxx/gyy/gzz (=dxx/dyy/dzz+1): stencil coefficients sum to zero,
|
||||
! so the constant offset has no effect on dissipation.
|
||||
|
||||
call lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
|
||||
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
||||
|
||||
@@ -22,19 +22,32 @@
|
||||
#define f_compute_rhs_Z4c_ss COMPUTE_RHS_Z4C_SS
|
||||
#define f_compute_constraint_fr COMPUTE_CONSTRAINT_FR
|
||||
#endif
|
||||
#ifdef fortran3
|
||||
#define f_compute_rhs_bssn compute_rhs_bssn_
|
||||
#ifdef fortran3
|
||||
#define f_compute_rhs_bssn compute_rhs_bssn_
|
||||
#define f_compute_rhs_bssn_ss compute_rhs_bssn_ss_
|
||||
#define f_compute_rhs_bssn_escalar compute_rhs_bssn_escalar_
|
||||
#define f_compute_rhs_bssn_escalar_ss compute_rhs_bssn_escalar_ss_
|
||||
#define f_compute_rhs_Z4c compute_rhs_z4c_
|
||||
#define f_compute_rhs_Z4cnot compute_rhs_z4cnot_
|
||||
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
|
||||
#define f_compute_constraint_fr compute_constraint_fr_
|
||||
#endif
|
||||
extern "C"
|
||||
{
|
||||
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
||||
#define f_compute_constraint_fr compute_constraint_fr_
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
void f_bssn_rhs_kernel_timing_reset();
|
||||
int f_bssn_rhs_kernel_timing_bucket_count();
|
||||
const double *f_bssn_rhs_kernel_timing_local_seconds();
|
||||
const char *f_bssn_rhs_kernel_timing_label(int);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
{
|
||||
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
||||
double *, double *, // chi, trK
|
||||
double *, double *, double *, double *, double *, double *, // gij
|
||||
double *, double *, double *, double *, double *, double *, // Aij
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
5957
AMSS_NCKU_source/bssn_rhs_cuda.cu
Normal file
5957
AMSS_NCKU_source/bssn_rhs_cuda.cu
Normal file
File diff suppressed because it is too large
Load Diff
127
AMSS_NCKU_source/bssn_rhs_cuda.h
Normal file
127
AMSS_NCKU_source/bssn_rhs_cuda.h
Normal file
@@ -0,0 +1,127 @@
|
||||
#ifndef BSSN_RHS_CUDA_H
|
||||
#define BSSN_RHS_CUDA_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
BSSN_CUDA_STATE_COUNT = 24,
|
||||
BSSN_CUDA_MATTER_COUNT = 10
|
||||
};
|
||||
|
||||
int f_compute_rhs_bssn(int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
double *chi, double *trK,
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
double *rho, double *Sx, double *Sy, double *Sz,
|
||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
||||
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
||||
int &Symmetry, int &Lev, double &eps, int &co);
|
||||
|
||||
int bssn_cuda_rk4_substep(void *block_tag,
|
||||
int *ex, double *X, double *Y, double *Z,
|
||||
double **state_host_in,
|
||||
double **state_host_out,
|
||||
double **matter_host,
|
||||
const double *propspeed,
|
||||
const double *soa_flat,
|
||||
const double *bbox,
|
||||
double &dT,
|
||||
double &T,
|
||||
int &RK4,
|
||||
int &apply_bam_bc,
|
||||
int &Symmetry,
|
||||
int &Lev,
|
||||
double &eps,
|
||||
int &co,
|
||||
int &use_zero_matter,
|
||||
int &keep_resident_state,
|
||||
int &apply_enforce_ga,
|
||||
double &chitiny);
|
||||
|
||||
int bssn_cuda_copy_state_region_to_host(void *block_tag,
|
||||
int state_index,
|
||||
double *host_state,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_copy_state_region_from_host(void *block_tag,
|
||||
int state_index,
|
||||
double *host_state,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_download_resident_state(void *block_tag,
|
||||
int *ex,
|
||||
double **state_host_out);
|
||||
|
||||
int bssn_cuda_download_constraint_outputs(int *ex,
|
||||
double **constraint_host_out);
|
||||
|
||||
int bssn_cuda_pack_state_region_to_host_buffer(void *block_tag,
|
||||
int state_index,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_unpack_state_region_from_host_buffer(void *block_tag,
|
||||
int state_index,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_pack_state_batch_to_host_buffer(void *block_tag,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_unpack_state_batch_from_host_buffer(void *block_tag,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int bssn_cuda_download_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
const int *state_indices,
|
||||
double **state_host_out);
|
||||
|
||||
int bssn_cuda_upload_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
const int *state_indices,
|
||||
double **state_host_in);
|
||||
|
||||
int bssn_cuda_has_resident_state(void *block_tag);
|
||||
|
||||
void bssn_cuda_release_step_ctx(void *block_tag);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -33,7 +33,7 @@
|
||||
real*8 :: dX,dY,dZ
|
||||
real*8,dimension(0:ex(1),0:ex(2),0:ex(3)) :: fh
|
||||
real*8, dimension(3) :: SoA
|
||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||
real*8 :: d2dx,d2dy,d2dz
|
||||
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
|
||||
@@ -137,7 +137,7 @@
|
||||
real*8 :: dX
|
||||
real*8,dimension(0:ex(1),0:ex(2),0:ex(3)) :: fh
|
||||
real*8, dimension(3) :: SoA
|
||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||
real*8 :: d2dx
|
||||
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
|
||||
@@ -1512,8 +1512,9 @@
|
||||
real*8 :: dX,dY,dZ
|
||||
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
|
||||
real*8, dimension(3) :: SoA
|
||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
|
||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||
integer :: i_core_min,i_core_max,j_core_min,j_core_max,k_core_min,k_core_max
|
||||
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
|
||||
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
|
||||
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||
real*8, parameter :: ZEO=0.d0, ONE=1.d0, TWO=2.d0, F1o4=2.5d-1, F9=9.d0, F45=4.5d1
|
||||
@@ -1560,17 +1561,55 @@
|
||||
|
||||
fxx = ZEO
|
||||
fyy = ZEO
|
||||
fzz = ZEO
|
||||
fxy = ZEO
|
||||
fxz = ZEO
|
||||
fyz = ZEO
|
||||
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
!~~~~~~ fxx
|
||||
if(i+2 <= imax .and. i-2 >= imin)then
|
||||
!
|
||||
fzz = ZEO
|
||||
fxy = ZEO
|
||||
fxz = ZEO
|
||||
fyz = ZEO
|
||||
|
||||
i_core_min = max(1, imin+2)
|
||||
i_core_max = min(ex(1), imax-2)
|
||||
j_core_min = max(1, jmin+2)
|
||||
j_core_max = min(ex(2), jmax-2)
|
||||
k_core_min = max(1, kmin+2)
|
||||
k_core_max = min(ex(3), kmax-2)
|
||||
|
||||
if(i_core_min <= i_core_max .and. j_core_min <= j_core_max .and. k_core_min <= k_core_max)then
|
||||
do k=k_core_min,k_core_max
|
||||
do j=j_core_min,j_core_max
|
||||
do i=i_core_min,i_core_max
|
||||
! interior points always use 4th-order stencils without branch checks
|
||||
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
|
||||
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
|
||||
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
|
||||
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
|
||||
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
|
||||
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
|
||||
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
|
||||
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
|
||||
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
|
||||
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
|
||||
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
|
||||
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
|
||||
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
|
||||
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
|
||||
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
|
||||
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
|
||||
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
|
||||
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
if(i>=i_core_min .and. i<=i_core_max .and. &
|
||||
j>=j_core_min .and. j<=j_core_max .and. &
|
||||
k>=k_core_min .and. k<=k_core_max) cycle
|
||||
!~~~~~~ fxx
|
||||
if(i+2 <= imax .and. i-2 >= imin)then
|
||||
!
|
||||
! - f(i-2) + 16 f(i-1) - 30 f(i) + 16 f(i+1) - f(i+2)
|
||||
! fxx(i) = ----------------------------------------------------------
|
||||
! 12 dx^2
|
||||
|
||||
@@ -71,149 +71,99 @@ void fdderivs(const int ex[3],
|
||||
const double Fdxdz = F1o144 / (dX * dZ);
|
||||
const double Fdydz = F1o144 / (dY * dZ);
|
||||
|
||||
/* 输出清零:fxx,fyy,fzz,fxy,fxz,fyz = 0 */
|
||||
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||
for (size_t p = 0; p < all; ++p) {
|
||||
fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
|
||||
fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
|
||||
/* 只清零不被主循环覆盖的边界面 */
|
||||
{
|
||||
/* 高边界:k0=ex3-1 */
|
||||
for (int j0 = 0; j0 < ex2; ++j0)
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
const size_t p = idx_ex(i0, j0, ex3 - 1, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
/* 高边界:j0=ex2-1 */
|
||||
for (int k0 = 0; k0 < ex3 - 1; ++k0)
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
const size_t p = idx_ex(i0, ex2 - 1, k0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
/* 高边界:i0=ex1-1 */
|
||||
for (int k0 = 0; k0 < ex3 - 1; ++k0)
|
||||
for (int j0 = 0; j0 < ex2 - 1; ++j0) {
|
||||
const size_t p = idx_ex(ex1 - 1, j0, k0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
|
||||
/* 低边界:当二阶模板也不可用时,对应 i0/j0/k0=0 面 */
|
||||
if (kminF == 1) {
|
||||
for (int j0 = 0; j0 < ex2; ++j0)
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
const size_t p = idx_ex(i0, j0, 0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
}
|
||||
if (jminF == 1) {
|
||||
for (int k0 = 0; k0 < ex3; ++k0)
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
const size_t p = idx_ex(i0, 0, k0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
}
|
||||
if (iminF == 1) {
|
||||
for (int k0 = 0; k0 < ex3; ++k0)
|
||||
for (int j0 = 0; j0 < ex2; ++j0) {
|
||||
const size_t p = idx_ex(0, j0, k0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fortran:
|
||||
* do k=1,ex3-1
|
||||
* do j=1,ex2-1
|
||||
* do i=1,ex1-1
|
||||
* 两段式:
|
||||
* 1) 二阶可用区域先计算二阶模板
|
||||
* 2) 高阶可用区域再覆盖四阶模板
|
||||
*/
|
||||
|
||||
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
const int i2_lo = (iminF > 0) ? iminF : 0;
|
||||
const int j2_lo = (jminF > 0) ? jminF : 0;
|
||||
const int k2_lo = (kminF > 0) ? kminF : 0;
|
||||
const int i2_hi = ex1 - 2;
|
||||
const int j2_hi = ex2 - 2;
|
||||
const int k2_hi = ex3 - 2;
|
||||
|
||||
/* 高阶分支:i±2,j±2,k±2 都在范围内 */
|
||||
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
||||
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
||||
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
||||
{
|
||||
fxx[p] = Fdxdx * (
|
||||
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||
);
|
||||
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
||||
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
||||
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
||||
const int i4_hi = ex1 - 3;
|
||||
const int j4_hi = ex2 - 3;
|
||||
const int k4_hi = ex3 - 3;
|
||||
|
||||
fyy[p] = Fdydy * (
|
||||
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||
);
|
||||
/*
|
||||
* Strategy A:
|
||||
* Avoid redundant work in overlap of 2nd/4th-order regions.
|
||||
* Only compute 2nd-order on shell points that are NOT overwritten by
|
||||
* the 4th-order pass.
|
||||
*/
|
||||
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
|
||||
|
||||
fzz[p] = Fdzdz * (
|
||||
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||
);
|
||||
|
||||
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
|
||||
{
|
||||
const double t_jm2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
|
||||
|
||||
const double t_jm1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
|
||||
|
||||
const double t_jp1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
|
||||
|
||||
const double t_jp2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
|
||||
|
||||
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
|
||||
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||
if (has4 &&
|
||||
i0 >= i4_lo && i0 <= i4_hi &&
|
||||
j0 >= j4_lo && j0 <= j4_hi &&
|
||||
k0 >= k4_lo && k0 <= k4_hi) {
|
||||
continue;
|
||||
}
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
/* fxz 高阶 */
|
||||
{
|
||||
const double t_km2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
|
||||
|
||||
const double t_km1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
|
||||
|
||||
const double t_kp1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
|
||||
|
||||
const double t_kp2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
|
||||
|
||||
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||
}
|
||||
|
||||
/* fyz 高阶 */
|
||||
{
|
||||
const double t_km2 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
|
||||
|
||||
const double t_km1 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
|
||||
|
||||
const double t_kp1 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
|
||||
|
||||
const double t_kp2 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
|
||||
|
||||
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||
}
|
||||
}
|
||||
/* 二阶分支:i±1,j±1,k±1 在范围内 */
|
||||
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
||||
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
||||
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
||||
{
|
||||
fxx[p] = Sdxdx * (
|
||||
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||
@@ -252,17 +202,131 @@ void fdderivs(const int ex[3],
|
||||
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||
);
|
||||
}else{
|
||||
fxx[p] = 0.0;
|
||||
fyy[p] = 0.0;
|
||||
fzz[p] = 0.0;
|
||||
fxy[p] = 0.0;
|
||||
fxz[p] = 0.0;
|
||||
fyz[p] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (has4) {
|
||||
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
fxx[p] = Fdxdx * (
|
||||
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||
);
|
||||
|
||||
fyy[p] = Fdydy * (
|
||||
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||
);
|
||||
|
||||
fzz[p] = Fdzdz * (
|
||||
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||
);
|
||||
|
||||
{
|
||||
const double t_jm2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
|
||||
|
||||
const double t_jm1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
|
||||
|
||||
const double t_jp1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
|
||||
|
||||
const double t_jp2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
|
||||
|
||||
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
|
||||
}
|
||||
|
||||
{
|
||||
const double t_km2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
|
||||
|
||||
const double t_km1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
|
||||
|
||||
const double t_kp1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
|
||||
|
||||
const double t_kp2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
|
||||
|
||||
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||
}
|
||||
|
||||
{
|
||||
const double t_km2 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
|
||||
|
||||
const double t_km1 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
|
||||
|
||||
const double t_kp1 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
|
||||
|
||||
const double t_kp2 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
|
||||
|
||||
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// free(fh);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,26 +81,63 @@ void fderivs(const int ex[3],
|
||||
}
|
||||
|
||||
/*
|
||||
* Fortran loops:
|
||||
* do k=1,ex3-1
|
||||
* do j=1,ex2-1
|
||||
* do i=1,ex1-1
|
||||
* 两段式:
|
||||
* 1) 先在二阶可用区域计算二阶模板
|
||||
* 2) 再在高阶可用区域覆盖为四阶模板
|
||||
*
|
||||
* C: k0=0..ex3-2, j0=0..ex2-2, i0=0..ex1-2
|
||||
* 与原 if/elseif 逻辑等价,但减少逐点分支判断。
|
||||
*/
|
||||
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
const int i2_lo = (iminF > 0) ? iminF : 0;
|
||||
const int j2_lo = (jminF > 0) ? jminF : 0;
|
||||
const int k2_lo = (kminF > 0) ? kminF : 0;
|
||||
const int i2_hi = ex1 - 2;
|
||||
const int j2_hi = ex2 - 2;
|
||||
const int k2_hi = ex3 - 2;
|
||||
|
||||
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
||||
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
||||
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
||||
const int i4_hi = ex1 - 3;
|
||||
const int j4_hi = ex2 - 3;
|
||||
const int k4_hi = ex3 - 3;
|
||||
|
||||
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
fx[p] = d2dx * (
|
||||
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||
);
|
||||
|
||||
fy[p] = d2dy * (
|
||||
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||
);
|
||||
|
||||
fz[p] = d2dz * (
|
||||
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
||||
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
|
||||
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
||||
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
||||
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
||||
{
|
||||
fx[p] = d12dx * (
|
||||
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
||||
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||
@@ -122,29 +159,9 @@ void fderivs(const int ex[3],
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
|
||||
);
|
||||
}
|
||||
// elseif(i+1 <= imax .and. i-1 >= imin ...)
|
||||
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
||||
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
||||
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
||||
{
|
||||
fx[p] = d2dx * (
|
||||
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||
);
|
||||
|
||||
fy[p] = d2dy * (
|
||||
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||
);
|
||||
|
||||
fz[p] = d2dz * (
|
||||
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// free(fh);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1111,27 +1111,177 @@ end subroutine d2dump
|
||||
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
! common code for cell and vertex
|
||||
!------------------------------------------------------------------------------
|
||||
! Lagrangian polynomial interpolation
|
||||
!------------------------------------------------------------------------------
|
||||
|
||||
!DIR$ ATTRIBUTES FORCEINLINE :: polint
|
||||
subroutine polint(xa, ya, x, y, dy, ordn)
|
||||
implicit none
|
||||
|
||||
integer, intent(in) :: ordn
|
||||
! common code for cell and vertex
|
||||
!------------------------------------------------------------------------------
|
||||
! Lagrangian polynomial interpolation
|
||||
!------------------------------------------------------------------------------
|
||||
#ifndef POLINT6_USE_BARYCENTRIC
|
||||
#define POLINT6_USE_BARYCENTRIC 1
|
||||
#endif
|
||||
|
||||
!DIR$ ATTRIBUTES FORCEINLINE :: polint6_neville
|
||||
subroutine polint6_neville(xa, ya, x, y, dy)
|
||||
implicit none
|
||||
|
||||
real*8, dimension(6), intent(in) :: xa, ya
|
||||
real*8, intent(in) :: x
|
||||
real*8, intent(out) :: y, dy
|
||||
|
||||
integer :: i, m, ns, n_m
|
||||
real*8, dimension(6) :: c, d, ho
|
||||
real*8 :: dif, dift, hp, h, den_val
|
||||
|
||||
c = ya
|
||||
d = ya
|
||||
ho = xa - x
|
||||
|
||||
ns = 1
|
||||
dif = abs(x - xa(1))
|
||||
|
||||
do i = 2, 6
|
||||
dift = abs(x - xa(i))
|
||||
if (dift < dif) then
|
||||
ns = i
|
||||
dif = dift
|
||||
end if
|
||||
end do
|
||||
|
||||
y = ya(ns)
|
||||
ns = ns - 1
|
||||
|
||||
do m = 1, 5
|
||||
n_m = 6 - m
|
||||
do i = 1, n_m
|
||||
hp = ho(i)
|
||||
h = ho(i+m)
|
||||
den_val = hp - h
|
||||
|
||||
if (den_val == 0.0d0) then
|
||||
write(*,*) 'failure in polint for point',x
|
||||
write(*,*) 'with input points: ',xa
|
||||
stop
|
||||
end if
|
||||
|
||||
den_val = (c(i+1) - d(i)) / den_val
|
||||
|
||||
d(i) = h * den_val
|
||||
c(i) = hp * den_val
|
||||
end do
|
||||
|
||||
if (2 * ns < n_m) then
|
||||
dy = c(ns + 1)
|
||||
else
|
||||
dy = d(ns)
|
||||
ns = ns - 1
|
||||
end if
|
||||
y = y + dy
|
||||
end do
|
||||
|
||||
return
|
||||
end subroutine polint6_neville
|
||||
|
||||
!DIR$ ATTRIBUTES FORCEINLINE :: polint6_barycentric
|
||||
subroutine polint6_barycentric(xa, ya, x, y, dy)
|
||||
implicit none
|
||||
|
||||
real*8, dimension(6), intent(in) :: xa, ya
|
||||
real*8, intent(in) :: x
|
||||
real*8, intent(out) :: y, dy
|
||||
|
||||
integer :: i, j
|
||||
logical :: is_uniform
|
||||
real*8, dimension(6) :: lambda
|
||||
real*8 :: dx, den_i, term, num, den, step, tol
|
||||
real*8, parameter :: c_uniform(6) = (/ -1.d0, 5.d0, -10.d0, 10.d0, -5.d0, 1.d0 /)
|
||||
|
||||
do i = 1, 6
|
||||
if (x == xa(i)) then
|
||||
y = ya(i)
|
||||
dy = 0.d0
|
||||
return
|
||||
end if
|
||||
end do
|
||||
|
||||
step = xa(2) - xa(1)
|
||||
is_uniform = (step /= 0.d0)
|
||||
if (is_uniform) then
|
||||
tol = 64.d0 * epsilon(1.d0) * max(1.d0, abs(step))
|
||||
do i = 3, 6
|
||||
if (abs((xa(i) - xa(i-1)) - step) > tol) then
|
||||
is_uniform = .false.
|
||||
exit
|
||||
end if
|
||||
end do
|
||||
end if
|
||||
|
||||
if (is_uniform) then
|
||||
num = 0.d0
|
||||
den = 0.d0
|
||||
do i = 1, 6
|
||||
term = c_uniform(i) / (x - xa(i))
|
||||
num = num + term * ya(i)
|
||||
den = den + term
|
||||
end do
|
||||
y = num / den
|
||||
dy = 0.d0
|
||||
return
|
||||
end if
|
||||
|
||||
do i = 1, 6
|
||||
den_i = 1.d0
|
||||
do j = 1, 6
|
||||
if (j /= i) then
|
||||
dx = xa(i) - xa(j)
|
||||
if (dx == 0.0d0) then
|
||||
write(*,*) 'failure in polint for point',x
|
||||
write(*,*) 'with input points: ',xa
|
||||
stop
|
||||
end if
|
||||
den_i = den_i * dx
|
||||
end if
|
||||
end do
|
||||
lambda(i) = 1.d0 / den_i
|
||||
end do
|
||||
|
||||
num = 0.d0
|
||||
den = 0.d0
|
||||
do i = 1, 6
|
||||
term = lambda(i) / (x - xa(i))
|
||||
num = num + term * ya(i)
|
||||
den = den + term
|
||||
end do
|
||||
|
||||
y = num / den
|
||||
dy = 0.d0
|
||||
|
||||
return
|
||||
end subroutine polint6_barycentric
|
||||
|
||||
!DIR$ ATTRIBUTES FORCEINLINE :: polint
|
||||
subroutine polint(xa, ya, x, y, dy, ordn)
|
||||
implicit none
|
||||
|
||||
integer, intent(in) :: ordn
|
||||
real*8, dimension(ordn), intent(in) :: xa, ya
|
||||
real*8, intent(in) :: x
|
||||
real*8, intent(out) :: y, dy
|
||||
|
||||
integer :: i, m, ns, n_m
|
||||
real*8, dimension(ordn) :: c, d, ho
|
||||
real*8 :: dif, dift, hp, h, den_val
|
||||
|
||||
c = ya
|
||||
d = ya
|
||||
ho = xa - x
|
||||
integer :: i, m, ns, n_m
|
||||
real*8, dimension(ordn) :: c, d, ho
|
||||
real*8 :: dif, dift, hp, h, den_val
|
||||
|
||||
if (ordn == 6) then
|
||||
#if POLINT6_USE_BARYCENTRIC
|
||||
call polint6_barycentric(xa, ya, x, y, dy)
|
||||
#else
|
||||
call polint6_neville(xa, ya, x, y, dy)
|
||||
#endif
|
||||
return
|
||||
end if
|
||||
|
||||
c = ya
|
||||
d = ya
|
||||
ho = xa - x
|
||||
|
||||
ns = 1
|
||||
dif = abs(x - xa(1))
|
||||
@@ -1175,13 +1325,48 @@ end subroutine d2dump
|
||||
y = y + dy
|
||||
end do
|
||||
|
||||
return
|
||||
end subroutine polint
|
||||
!------------------------------------------------------------------------------
|
||||
!
|
||||
! interpolation in 2 dimensions, follow yx order
|
||||
!
|
||||
!------------------------------------------------------------------------------
|
||||
return
|
||||
end subroutine polint
|
||||
!------------------------------------------------------------------------------
|
||||
! Compute Lagrange interpolation basis weights for one target point.
|
||||
!------------------------------------------------------------------------------
|
||||
!DIR$ ATTRIBUTES FORCEINLINE :: polint_lagrange_weights
|
||||
subroutine polint_lagrange_weights(xa, x, w, ordn)
|
||||
implicit none
|
||||
|
||||
integer, intent(in) :: ordn
|
||||
real*8, dimension(1:ordn), intent(in) :: xa
|
||||
real*8, intent(in) :: x
|
||||
real*8, dimension(1:ordn), intent(out) :: w
|
||||
|
||||
integer :: i, j
|
||||
real*8 :: num, den, dx
|
||||
|
||||
do i = 1, ordn
|
||||
num = 1.d0
|
||||
den = 1.d0
|
||||
do j = 1, ordn
|
||||
if (j /= i) then
|
||||
dx = xa(i) - xa(j)
|
||||
if (dx == 0.0d0) then
|
||||
write(*,*) 'failure in polint for point',x
|
||||
write(*,*) 'with input points: ',xa
|
||||
stop
|
||||
end if
|
||||
num = num * (x - xa(j))
|
||||
den = den * dx
|
||||
end if
|
||||
end do
|
||||
w(i) = num / den
|
||||
end do
|
||||
|
||||
return
|
||||
end subroutine polint_lagrange_weights
|
||||
!------------------------------------------------------------------------------
|
||||
!
|
||||
! interpolation in 2 dimensions, follow yx order
|
||||
!
|
||||
!------------------------------------------------------------------------------
|
||||
subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn)
|
||||
implicit none
|
||||
|
||||
@@ -1229,11 +1414,11 @@ end subroutine d2dump
|
||||
real*8, intent(in) :: x1,x2,x3
|
||||
real*8, intent(out) :: y,dy
|
||||
|
||||
#ifdef POLINT_LEGACY_ORDER
|
||||
integer :: i,j,m,n
|
||||
real*8, dimension(ordn,ordn) :: yatmp
|
||||
real*8, dimension(ordn) :: ymtmp
|
||||
real*8, dimension(ordn) :: yntmp
|
||||
#ifdef POLINT_LEGACY_ORDER
|
||||
integer :: i,j,m,n
|
||||
real*8, dimension(ordn,ordn) :: yatmp
|
||||
real*8, dimension(ordn) :: ymtmp
|
||||
real*8, dimension(ordn) :: yntmp
|
||||
real*8, dimension(ordn) :: yqtmp
|
||||
|
||||
m=size(x1a)
|
||||
@@ -1243,29 +1428,36 @@ end subroutine d2dump
|
||||
yqtmp=ya(i,j,:)
|
||||
call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn)
|
||||
end do
|
||||
yntmp=yatmp(i,:)
|
||||
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
|
||||
end do
|
||||
call polint(x1a,ymtmp,x1,y,dy,ordn)
|
||||
#else
|
||||
integer :: j, k
|
||||
real*8, dimension(ordn,ordn) :: yatmp
|
||||
real*8, dimension(ordn) :: ymtmp
|
||||
real*8 :: dy_temp
|
||||
|
||||
do k=1,ordn
|
||||
do j=1,ordn
|
||||
call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn)
|
||||
end do
|
||||
end do
|
||||
do k=1,ordn
|
||||
call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn)
|
||||
end do
|
||||
call polint(x3a, ymtmp, x3, y, dy, ordn)
|
||||
#endif
|
||||
|
||||
return
|
||||
end subroutine polin3
|
||||
yntmp=yatmp(i,:)
|
||||
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
|
||||
end do
|
||||
call polint(x1a,ymtmp,x1,y,dy,ordn)
|
||||
#else
|
||||
integer :: i, j, k
|
||||
real*8, dimension(ordn) :: w1, w2
|
||||
real*8, dimension(ordn) :: ymtmp
|
||||
real*8 :: yx_sum, x_sum
|
||||
|
||||
call polint_lagrange_weights(x1a, x1, w1, ordn)
|
||||
call polint_lagrange_weights(x2a, x2, w2, ordn)
|
||||
|
||||
do k = 1, ordn
|
||||
yx_sum = 0.d0
|
||||
do j = 1, ordn
|
||||
x_sum = 0.d0
|
||||
do i = 1, ordn
|
||||
x_sum = x_sum + w1(i) * ya(i,j,k)
|
||||
end do
|
||||
yx_sum = yx_sum + w2(j) * x_sum
|
||||
end do
|
||||
ymtmp(k) = yx_sum
|
||||
end do
|
||||
|
||||
call polint(x3a, ymtmp, x3, y, dy, ordn)
|
||||
#endif
|
||||
|
||||
return
|
||||
end subroutine polin3
|
||||
!--------------------------------------------------------------------------------------
|
||||
! calculate L2norm
|
||||
subroutine l2normhelper(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||
@@ -1319,13 +1511,88 @@ deallocate(f_flat)
|
||||
|
||||
f_out = f_out*dX*dY*dZ
|
||||
|
||||
return
|
||||
|
||||
end subroutine l2normhelper
|
||||
!--------------------------------------------------------------------------------------
|
||||
! calculate L2norm especially for shell Blocks
|
||||
subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||
f,f_out,gw,ogw,Symmetry)
|
||||
return
|
||||
|
||||
end subroutine l2normhelper
|
||||
!--------------------------------------------------------------------------------------
|
||||
subroutine l2normhelper7(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||
f1,f2,f3,f4,f5,f6,f7,f_out,gw)
|
||||
|
||||
implicit none
|
||||
!~~~~~~> Input parameters:
|
||||
integer,intent(in ):: ex(1:3)
|
||||
real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3)),xmin,ymin,zmin,xmax,ymax,zmax
|
||||
integer,intent(in)::gw
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: f1,f2,f3,f4,f5,f6,f7
|
||||
real*8, intent(out) :: f_out(7)
|
||||
!~~~~~~> Other variables:
|
||||
|
||||
real*8 :: dX, dY, dZ
|
||||
integer::imin,jmin,kmin
|
||||
integer::imax,jmax,kmax
|
||||
integer::i,j,k
|
||||
real*8 :: s1,s2,s3,s4,s5,s6,s7
|
||||
|
||||
dX = X(2) - X(1)
|
||||
dY = Y(2) - Y(1)
|
||||
dZ = Z(2) - Z(1)
|
||||
|
||||
! for ghost zone
|
||||
imin = gw+1
|
||||
jmin = gw+1
|
||||
kmin = gw+1
|
||||
|
||||
imax = ex(1) - gw
|
||||
jmax = ex(2) - gw
|
||||
kmax = ex(3) - gw
|
||||
|
||||
!for patch boundary (i.e., not ghost boundary)
|
||||
|
||||
if(dabs(X(ex(1))-xmax) < dX) imax = ex(1)
|
||||
if(dabs(Y(ex(2))-ymax) < dY) jmax = ex(2)
|
||||
if(dabs(Z(ex(3))-zmax) < dZ) kmax = ex(3)
|
||||
if(dabs(X(1)-xmin) < dX) imin = 1
|
||||
if(dabs(Y(1)-ymin) < dY) jmin = 1
|
||||
if(dabs(Z(1)-zmin) < dZ) kmin = 1
|
||||
|
||||
s1 = 0.d0
|
||||
s2 = 0.d0
|
||||
s3 = 0.d0
|
||||
s4 = 0.d0
|
||||
s5 = 0.d0
|
||||
s6 = 0.d0
|
||||
s7 = 0.d0
|
||||
|
||||
do k=kmin,kmax
|
||||
do j=jmin,jmax
|
||||
!DIR$ SIMD REDUCTION(+:s1,s2,s3,s4,s5,s6,s7)
|
||||
do i=imin,imax
|
||||
s1 = s1 + f1(i,j,k)*f1(i,j,k)
|
||||
s2 = s2 + f2(i,j,k)*f2(i,j,k)
|
||||
s3 = s3 + f3(i,j,k)*f3(i,j,k)
|
||||
s4 = s4 + f4(i,j,k)*f4(i,j,k)
|
||||
s5 = s5 + f5(i,j,k)*f5(i,j,k)
|
||||
s6 = s6 + f6(i,j,k)*f6(i,j,k)
|
||||
s7 = s7 + f7(i,j,k)*f7(i,j,k)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
f_out(1) = s1*dX*dY*dZ
|
||||
f_out(2) = s2*dX*dY*dZ
|
||||
f_out(3) = s3*dX*dY*dZ
|
||||
f_out(4) = s4*dX*dY*dZ
|
||||
f_out(5) = s5*dX*dY*dZ
|
||||
f_out(6) = s6*dX*dY*dZ
|
||||
f_out(7) = s7*dX*dY*dZ
|
||||
|
||||
return
|
||||
|
||||
end subroutine l2normhelper7
|
||||
!--------------------------------------------------------------------------------------
|
||||
! calculate L2norm especially for shell Blocks
|
||||
subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||
f,f_out,gw,ogw,Symmetry)
|
||||
|
||||
implicit none
|
||||
!~~~~~~> Input parameters:
|
||||
@@ -1608,11 +1875,14 @@ deallocate(f_flat)
|
||||
! ^
|
||||
! f=3/8*f_1 + 3/4*f_2 - 1/8*f_3
|
||||
|
||||
real*8,parameter::C1=3.d0/8.d0,C2=3.d0/4.d0,C3=-1.d0/8.d0
|
||||
|
||||
fout = C1*f1+C2*f2+C3*f3
|
||||
|
||||
return
|
||||
real*8,parameter::C1=3.d0/8.d0,C2=3.d0/4.d0,C3=-1.d0/8.d0
|
||||
integer :: i,j,k
|
||||
|
||||
do concurrent (k=1:ext(3), j=1:ext(2), i=1:ext(1))
|
||||
fout(i,j,k) = C1*f1(i,j,k)+C2*f2(i,j,k)+C3*f3(i,j,k)
|
||||
end do
|
||||
|
||||
return
|
||||
|
||||
end subroutine average2
|
||||
!-----------------------------------------------------------------------------
|
||||
|
||||
@@ -12,9 +12,10 @@
|
||||
#define f_global_interpind global_interpind
|
||||
#define f_global_interpind2d global_interpind2d
|
||||
#define f_global_interpind1d global_interpind1d
|
||||
#define f_l2normhelper l2normhelper
|
||||
#define f_l2normhelper_sh l2normhelper_sh
|
||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms
|
||||
#define f_l2normhelper l2normhelper
|
||||
#define f_l2normhelper7 l2normhelper7
|
||||
#define f_l2normhelper_sh l2normhelper_sh
|
||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms
|
||||
#define f_average average
|
||||
#define f_average3 average3
|
||||
#define f_average2 average2
|
||||
@@ -41,9 +42,10 @@
|
||||
#define f_global_interpind GLOBAL_INTERPIND
|
||||
#define f_global_interpind2d GLOBAL_INTERPIND2D
|
||||
#define f_global_interpind1d GLOBAL_INTERPIND1D
|
||||
#define f_l2normhelper L2NORMHELPER
|
||||
#define f_l2normhelper_sh L2NORMHELPER_SH
|
||||
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS
|
||||
#define f_l2normhelper L2NORMHELPER
|
||||
#define f_l2normhelper7 L2NORMHELPER7
|
||||
#define f_l2normhelper_sh L2NORMHELPER_SH
|
||||
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS
|
||||
#define f_average AVERAGE
|
||||
#define f_average3 AVERAGE3
|
||||
#define f_average2 AVERAGE2
|
||||
@@ -70,9 +72,10 @@
|
||||
#define f_global_interpind global_interpind_
|
||||
#define f_global_interpind2d global_interpind2d_
|
||||
#define f_global_interpind1d global_interpind1d_
|
||||
#define f_l2normhelper l2normhelper_
|
||||
#define f_l2normhelper_sh l2normhelper_sh_
|
||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_
|
||||
#define f_l2normhelper l2normhelper_
|
||||
#define f_l2normhelper7 l2normhelper7_
|
||||
#define f_l2normhelper_sh l2normhelper_sh_
|
||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_
|
||||
#define f_average average_
|
||||
#define f_average3 average3_
|
||||
#define f_average2 average2_
|
||||
@@ -156,20 +159,29 @@ extern "C"
|
||||
int *, double *, int &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper_sh(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double &, int &, int &, int &);
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper7(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_l2normhelper_sh(int *, double *, double *, double *,
|
||||
double &, double &, double &,
|
||||
double &, double &, double &,
|
||||
double *, double &, int &, int &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
/* 本头文件由自订profile框架自动生成并非人工硬编码针对Case优化 */
|
||||
/* 更新:负载均衡问题已经通过优化插值函数解决,此profile静态均衡方案已弃用,本头文件现在未参与编译 */
|
||||
/* Auto-generated from interp_lb_profile.bin — do not edit */
|
||||
#ifndef INTERP_LB_PROFILE_DATA_H
|
||||
#define INTERP_LB_PROFILE_DATA_H
|
||||
|
||||
@@ -63,19 +63,28 @@ void kodis(const int ex[3],
|
||||
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
|
||||
* 并定义 Fortran index: iF=i0+1, ...
|
||||
*/
|
||||
for (int k0 = 0; k0 < ex3; ++k0) {
|
||||
// 收紧循环范围:只遍历满足 iF±3/jF±3/kF±3 条件的内部点
|
||||
// iF-3 >= iminF => iF >= iminF+3 => i0 >= iminF+2 (因为 iF=i0+1)
|
||||
// iF+3 <= imaxF => iF <= imaxF-3 => i0 <= imaxF-4
|
||||
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
|
||||
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
|
||||
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
|
||||
const int i0_hi = imaxF - 4; // inclusive
|
||||
const int j0_hi = jmaxF - 4;
|
||||
const int k0_hi = kmaxF - 4;
|
||||
|
||||
if (i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi) {
|
||||
free(fh);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = 0; j0 < ex2; ++j0) {
|
||||
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
|
||||
// Fortran if 条件:
|
||||
// i-3 >= imin .and. i+3 <= imax 等(都是 Fortran 索引)
|
||||
if ((iF - 3) >= iminF && (iF + 3) <= imaxF &&
|
||||
(jF - 3) >= jminF && (jF + 3) <= jmaxF &&
|
||||
(kF - 3) >= kminF && (kF + 3) <= kmaxF)
|
||||
{
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
|
||||
@@ -100,7 +109,6 @@ void kodis(const int ex[3],
|
||||
// Fortran:
|
||||
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
|
||||
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
248
AMSS_NCKU_source/lopsided_kodis_c.C
Normal file
248
AMSS_NCKU_source/lopsided_kodis_c.C
Normal file
@@ -0,0 +1,248 @@
|
||||
#include "tool.h"
|
||||
|
||||
/*
|
||||
* Combined advection (lopsided) + KO dissipation (kodis).
|
||||
* Uses one shared symmetry_bd buffer per call.
|
||||
*/
|
||||
void lopsided_kodis(const int ex[3],
|
||||
const double *X, const double *Y, const double *Z,
|
||||
const double *f, double *f_rhs,
|
||||
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||
int Symmetry, const double SoA[3], double eps)
|
||||
{
|
||||
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
|
||||
const double F6 = 6.0, F18 = 18.0;
|
||||
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
|
||||
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
|
||||
const double cof = 64.0; // 2^6
|
||||
|
||||
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||
|
||||
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||
|
||||
const double dX = X[1] - X[0];
|
||||
const double dY = Y[1] - Y[0];
|
||||
const double dZ = Z[1] - Z[0];
|
||||
|
||||
const double d12dx = ONE / F12 / dX;
|
||||
const double d12dy = ONE / F12 / dY;
|
||||
const double d12dz = ONE / F12 / dZ;
|
||||
|
||||
const int imaxF = ex1;
|
||||
const int jmaxF = ex2;
|
||||
const int kmaxF = ex3;
|
||||
|
||||
int iminF = 1, jminF = 1, kminF = 1;
|
||||
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
||||
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
||||
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
||||
|
||||
// fh for Fortran-style domain (-2:ex1,-2:ex2,-2:ex3)
|
||||
const size_t nx = (size_t)ex1 + 3;
|
||||
const size_t ny = (size_t)ex2 + 3;
|
||||
const size_t nz = (size_t)ex3 + 3;
|
||||
const size_t fh_size = nx * ny * nz;
|
||||
|
||||
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||
if (!fh) return;
|
||||
|
||||
symmetry_bd(3, ex, f, fh, SoA);
|
||||
|
||||
// Advection (same stencil logic as lopsided_c.C)
|
||||
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
const double sfx = Sfx[p];
|
||||
if (sfx > ZEO) {
|
||||
if (i0 <= ex1 - 4) {
|
||||
f_rhs[p] += sfx * d12dx *
|
||||
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
||||
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
||||
} else if (i0 <= ex1 - 3) {
|
||||
f_rhs[p] += sfx * d12dx *
|
||||
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
||||
} else if (i0 <= ex1 - 2) {
|
||||
f_rhs[p] -= sfx * d12dx *
|
||||
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
||||
}
|
||||
} else if (sfx < ZEO) {
|
||||
if ((i0 - 2) >= iminF) {
|
||||
f_rhs[p] -= sfx * d12dx *
|
||||
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
||||
} else if ((i0 - 1) >= iminF) {
|
||||
f_rhs[p] += sfx * d12dx *
|
||||
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
||||
} else if (i0 >= iminF) {
|
||||
f_rhs[p] += sfx * d12dx *
|
||||
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
||||
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
||||
}
|
||||
}
|
||||
|
||||
const double sfy = Sfy[p];
|
||||
if (sfy > ZEO) {
|
||||
if (j0 <= ex2 - 4) {
|
||||
f_rhs[p] += sfy * d12dy *
|
||||
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
||||
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
||||
} else if (j0 <= ex2 - 3) {
|
||||
f_rhs[p] += sfy * d12dy *
|
||||
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
||||
} else if (j0 <= ex2 - 2) {
|
||||
f_rhs[p] -= sfy * d12dy *
|
||||
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
||||
}
|
||||
} else if (sfy < ZEO) {
|
||||
if ((j0 - 2) >= jminF) {
|
||||
f_rhs[p] -= sfy * d12dy *
|
||||
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
||||
} else if ((j0 - 1) >= jminF) {
|
||||
f_rhs[p] += sfy * d12dy *
|
||||
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
||||
} else if (j0 >= jminF) {
|
||||
f_rhs[p] += sfy * d12dy *
|
||||
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
||||
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
||||
}
|
||||
}
|
||||
|
||||
const double sfz = Sfz[p];
|
||||
if (sfz > ZEO) {
|
||||
if (k0 <= ex3 - 4) {
|
||||
f_rhs[p] += sfz * d12dz *
|
||||
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
||||
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
||||
} else if (k0 <= ex3 - 3) {
|
||||
f_rhs[p] += sfz * d12dz *
|
||||
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
||||
} else if (k0 <= ex3 - 2) {
|
||||
f_rhs[p] -= sfz * d12dz *
|
||||
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
||||
}
|
||||
} else if (sfz < ZEO) {
|
||||
if ((k0 - 2) >= kminF) {
|
||||
f_rhs[p] -= sfz * d12dz *
|
||||
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
||||
} else if ((k0 - 1) >= kminF) {
|
||||
f_rhs[p] += sfz * d12dz *
|
||||
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
||||
} else if (k0 >= kminF) {
|
||||
f_rhs[p] += sfz * d12dz *
|
||||
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
||||
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// KO dissipation (same domain restriction as kodiss_c.C)
|
||||
if (eps > ZEO) {
|
||||
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
|
||||
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
|
||||
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
|
||||
const int i0_hi = imaxF - 4; // inclusive
|
||||
const int j0_hi = jmaxF - 4;
|
||||
const int k0_hi = kmaxF - 4;
|
||||
|
||||
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
|
||||
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
const double Dx_term =
|
||||
((fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
|
||||
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
|
||||
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
|
||||
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dX;
|
||||
|
||||
const double Dy_term =
|
||||
((fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
|
||||
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
|
||||
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
|
||||
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dY;
|
||||
|
||||
const double Dz_term =
|
||||
((fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
|
||||
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
|
||||
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
|
||||
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dZ;
|
||||
|
||||
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(fh);
|
||||
}
|
||||
@@ -29,6 +29,16 @@
|
||||
|
||||
#define REGLEV 0
|
||||
|
||||
#define BSSN_FINE_TIMING 0
|
||||
|
||||
#define BSSN_FINE_TIMING_EVERY 1
|
||||
|
||||
#define BSSN_FINE_TIMING_TOPN 8
|
||||
|
||||
#define BSSN_KERNEL_FINE_TIMING 0
|
||||
|
||||
#define BSSN_ENABLE_STDIN_ABORT_POLL 0
|
||||
|
||||
//#define USE_GPU
|
||||
|
||||
//#define CHECKDETAIL
|
||||
@@ -88,6 +98,21 @@
|
||||
// 0: for every level;
|
||||
// 1: for all
|
||||
//
|
||||
// define BSSN_FINE_TIMING
|
||||
// enable fine-grained per-timestep timing monitor
|
||||
//
|
||||
// define BSSN_FINE_TIMING_EVERY
|
||||
// report timing every N coarse timesteps
|
||||
//
|
||||
// define BSSN_FINE_TIMING_TOPN
|
||||
// number of hottest timing buckets shown in stdout
|
||||
//
|
||||
// define BSSN_KERNEL_FINE_TIMING
|
||||
// enable split timing inside compute_rhs_bssn
|
||||
//
|
||||
// define BSSN_ENABLE_STDIN_ABORT_POLL
|
||||
// poll stdin and broadcast abort flag every coarse step
|
||||
//
|
||||
// define USE_GPU
|
||||
// use gpu or not
|
||||
//
|
||||
@@ -142,4 +167,3 @@
|
||||
#define TINY 1e-10
|
||||
|
||||
#endif /* MICRODEF_H */
|
||||
|
||||
|
||||
@@ -2,6 +2,12 @@
|
||||
|
||||
include makefile.inc
|
||||
|
||||
## polint(ordn=6) kernel selector:
|
||||
## 1 (default): barycentric fast path
|
||||
## 0 : fallback to Neville path
|
||||
POLINT6_USE_BARY ?= 1
|
||||
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
||||
|
||||
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
|
||||
## make -> opt (PGO-guided, maximum performance)
|
||||
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data)
|
||||
@@ -12,15 +18,17 @@ ifeq ($(PGO_MODE),instrument)
|
||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
else
|
||||
## opt (default): maximum performance with PGO profile data
|
||||
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
||||
## PGO has been turned off, now tested and found to be negative optimization
|
||||
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
||||
|
||||
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-fprofile-instr-use=$(PROFDATA) \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-fprofile-instr-use=$(PROFDATA) \
|
||||
-align array64byte -fpp -I${MKLROOT}/include
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
endif
|
||||
|
||||
.SUFFIXES: .o .f90 .C .for .cu
|
||||
@@ -37,6 +45,14 @@ endif
|
||||
.cu.o:
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers)
|
||||
bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# CUDA rewrite of Z4C Cartesian RHS
|
||||
z4c_rhs_cuda.o: z4c_rhs_cuda.cu z4c_rhs_cuda.h bssn_rhs.h macrodef.h ricci_gamma.h
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# C rewrite of BSSN RHS kernel and helpers
|
||||
bssn_rhs_c.o: bssn_rhs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
@@ -53,9 +69,15 @@ kodiss_c.o: kodiss_c.C
|
||||
lopsided_c.o: lopsided_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
||||
lopsided_kodis_c.o: lopsided_kodis_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
z4c_rhs_c.o: z4c_rhs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
||||
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
@@ -70,13 +92,62 @@ TwoPunctureABE.o: TwoPunctureABE.C
|
||||
|
||||
# Input files
|
||||
|
||||
## CUDA BSSN RHS switch
|
||||
## 1 : use the rewritten CUDA bssn_rhs backend
|
||||
## 0 : keep the normal CPU/Fortran selection below
|
||||
USE_CUDA_BSSN ?= 0
|
||||
USE_CUDA_Z4C ?= 0
|
||||
|
||||
CXXAPPFLAGS += -DUSE_CUDA_BSSN=$(USE_CUDA_BSSN)
|
||||
CUDA_APP_FLAGS += -DUSE_CUDA_BSSN=$(USE_CUDA_BSSN)
|
||||
CXXAPPFLAGS += -DUSE_CUDA_Z4C=$(USE_CUDA_Z4C)
|
||||
CUDA_APP_FLAGS += -DUSE_CUDA_Z4C=$(USE_CUDA_Z4C)
|
||||
|
||||
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
||||
CFILES =
|
||||
CFILES_CPU =
|
||||
else
|
||||
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
||||
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o
|
||||
CFILES_CPU = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||
endif
|
||||
|
||||
CFILES_CUDA_BSSN = bssn_rhs_cuda.o
|
||||
|
||||
ifeq ($(USE_CUDA_BSSN),1)
|
||||
CFILES = $(CFILES_CUDA_BSSN)
|
||||
else
|
||||
CFILES = $(CFILES_CPU)
|
||||
endif
|
||||
|
||||
ifeq ($(USE_CUDA_Z4C),1)
|
||||
CFILES += z4c_rhs_cuda.o
|
||||
Z4C_F90_OBJ =
|
||||
else ifeq ($(USE_CXX_Z4C_KERNELS),1)
|
||||
CFILES += z4c_rhs_c.o
|
||||
Z4C_F90_OBJ =
|
||||
else
|
||||
Z4C_F90_OBJ = Z4c_rhs.o
|
||||
endif
|
||||
|
||||
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
||||
ifeq ($(USE_CXX_RK4),1)
|
||||
RK4_C_OBJ = rungekutta4_rout_c.o
|
||||
RK4_F90_OBJ =
|
||||
else
|
||||
RK4_C_OBJ =
|
||||
RK4_F90_OBJ = rungekutta4_rout.o
|
||||
endif
|
||||
|
||||
CFILES += $(RK4_C_OBJ)
|
||||
ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) z4c_rhs_cuda.o $(RK4_C_OBJ)
|
||||
|
||||
ABE_LDLIBS = $(LDLIBS)
|
||||
ifeq ($(USE_CUDA_BSSN),1)
|
||||
ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH)
|
||||
endif
|
||||
ifeq ($(USE_CUDA_Z4C),1)
|
||||
ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH)
|
||||
endif
|
||||
|
||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
@@ -86,7 +157,7 @@ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
||||
NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o
|
||||
|
||||
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
#C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o surface_integral.o ShellPatch.o\
|
||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
||||
@@ -96,11 +167,11 @@ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o
|
||||
|
||||
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
||||
rungekutta4_rout.o diff_new.o kodiss.o kodiss_sh.o\
|
||||
$(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\
|
||||
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
||||
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
|
||||
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
|
||||
fadmquantites_bssn.o Z4c_rhs.o Z4c_rhs_ss.o point_diff_new_sh.o\
|
||||
fadmquantites_bssn.o $(Z4C_F90_OBJ) Z4c_rhs_ss.o point_diff_new_sh.o\
|
||||
cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\
|
||||
getnpem2.o empart.o NullNews.o fourdcurvature.o\
|
||||
bssn2adm.o adm_constraint.o adm_ricci_gamma.o\
|
||||
@@ -124,10 +195,10 @@ initial_guess.o Newton.o Jacobian.o ilucg.o IntPnts0.o IntPnts.o
|
||||
|
||||
TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o
|
||||
|
||||
CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
||||
#CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
||||
|
||||
# file dependences
|
||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||
|
||||
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
@@ -138,7 +209,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
|
||||
initial_null2.h NullShellPatch2.h
|
||||
|
||||
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
#$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
rungekutta4_rout.h var.h bssn_rhs.h sommerfeld_rout.h\
|
||||
cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\
|
||||
@@ -150,7 +221,7 @@ $(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h
|
||||
|
||||
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
|
||||
|
||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||
|
||||
TwoPunctureFILES: TwoPunctures.h
|
||||
|
||||
@@ -160,13 +231,18 @@ misc.o : zbesh.o
|
||||
|
||||
# projects
|
||||
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(ABE_LDLIBS)
|
||||
|
||||
ABE_CUDA: USE_CUDA_BSSN=1
|
||||
ABE_CUDA: USE_CUDA_Z4C=1
|
||||
ABE_CUDA: $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH)
|
||||
|
||||
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
#ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||
# $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
|
||||
TwoPunctureABE: $(TwoPunctureFILES)
|
||||
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||
|
||||
clean:
|
||||
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||
rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f
|
||||
|
||||
@@ -10,6 +10,20 @@ filein = -I/usr/include/ -I${MKLROOT}/include
|
||||
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
||||
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
|
||||
|
||||
## Memory allocator switch
|
||||
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
|
||||
## 0 : use system default allocator (ptmalloc)
|
||||
USE_TBBMALLOC ?= 1
|
||||
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
|
||||
ifneq ($(wildcard $(TBBMALLOC_SO)),)
|
||||
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
|
||||
else
|
||||
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
|
||||
endif
|
||||
ifeq ($(USE_TBBMALLOC),1)
|
||||
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
|
||||
endif
|
||||
|
||||
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
|
||||
## opt : (default) maximum performance with PGO profile-guided optimization
|
||||
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
|
||||
@@ -33,13 +47,28 @@ endif
|
||||
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
||||
## 0 : fall back to original Fortran kernels
|
||||
USE_CXX_KERNELS ?= 1
|
||||
|
||||
## Z4C Cartesian RHS kernel switch
|
||||
## 1 (default) : use C++ rewrite of Z4c_rhs (main Cartesian path faster)
|
||||
## 0 : use original Fortran Z4c_rhs.o
|
||||
USE_CXX_Z4C_KERNELS ?= 1
|
||||
|
||||
## RK4 kernel implementation switch
|
||||
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
||||
## 0 : use original Fortran rungekutta4_rout.o
|
||||
USE_CXX_RK4 ?= 1
|
||||
|
||||
f90 = ifx
|
||||
f77 = ifx
|
||||
CXX = icpx
|
||||
CC = icx
|
||||
CLINKER = mpiicpx
|
||||
CLINKER = mpiicpx
|
||||
|
||||
Cu = nvcc
|
||||
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
||||
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
||||
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
||||
CUDA_ARCH ?= sm_80
|
||||
ifneq ($(strip $(CUDA_ARCH)),)
|
||||
CUDA_APP_FLAGS += -arch=$(CUDA_ARCH)
|
||||
endif
|
||||
|
||||
@@ -1934,18 +1934,35 @@
|
||||
! when if=1 -> ic=0, this is different to vertex center grid
|
||||
real*8, dimension(-2:extc(1),-2:extc(2),-2:extc(3)) :: funcc
|
||||
integer,dimension(3) :: cxI
|
||||
integer :: i,j,k,ii,jj,kk
|
||||
integer :: i,j,k,ii,jj,kk,px,py,pz
|
||||
real*8, dimension(6,6) :: tmp2
|
||||
real*8, dimension(6) :: tmp1
|
||||
integer, dimension(extf(1)) :: cix
|
||||
integer, dimension(extf(2)) :: ciy
|
||||
integer, dimension(extf(3)) :: ciz
|
||||
integer, dimension(extf(1)) :: pix
|
||||
integer, dimension(extf(2)) :: piy
|
||||
integer, dimension(extf(3)) :: piz
|
||||
|
||||
real*8, parameter :: C1=7.7d1/8.192d3,C2=-6.93d2/8.192d3,C3=3.465d3/4.096d3
|
||||
real*8, parameter :: C6=6.3d1/8.192d3,C5=-4.95d2/8.192d3,C4=1.155d3/4.096d3
|
||||
real*8, dimension(6,2), parameter :: WC = reshape((/&
|
||||
C1,C2,C3,C4,C5,C6,&
|
||||
C6,C5,C4,C3,C2,C1/), (/6,2/))
|
||||
|
||||
integer::imini,imaxi,jmini,jmaxi,kmini,kmaxi
|
||||
integer::imino,imaxo,jmino,jmaxo,kmino,kmaxo
|
||||
integer::maxcx,maxcy,maxcz
|
||||
|
||||
real*8,dimension(3) :: CD,FD
|
||||
|
||||
real*8 :: tmp_yz(extc(1), 6) ! 存储整条 X 线上 6 个 Y 轴偏置的 Z 向插值结果
|
||||
real*8 :: tmp_xyz_line(-2:extc(1)) ! 包含 X 向 6 点模板访问所需下界
|
||||
real*8 :: v1, v2, v3, v4, v5, v6
|
||||
integer :: ic, jc, kc, ix_offset,ix,iy,iz,jc_min,jc_max,ic_min,ic_max,kc_min,kc_max
|
||||
integer :: i_lo, i_hi, j_lo, j_hi, k_lo, k_hi
|
||||
logical :: need_full_symmetry
|
||||
real*8 :: res_line
|
||||
real*8 :: tmp_z_slab(-2:extc(1), -2:extc(2)) ! 包含 Y/X 向模板访问所需下界
|
||||
if(wei.ne.3)then
|
||||
write(*,*)"prolongrestrict.f90::prolong3: this routine only surport 3 dimension"
|
||||
write(*,*)"dim = ",wei
|
||||
@@ -2020,145 +2037,140 @@
|
||||
return
|
||||
endif
|
||||
|
||||
call symmetry_bd(3,extc,func,funcc,SoA)
|
||||
|
||||
!~~~~~~> prolongation start...
|
||||
do i = imino,imaxo
|
||||
ii = i + lbf(1) - 1
|
||||
cix(i) = ii/2 - lbc(1) + 1
|
||||
if(ii/2*2 == ii)then
|
||||
pix(i) = 1
|
||||
else
|
||||
pix(i) = 2
|
||||
endif
|
||||
enddo
|
||||
do j = jmino,jmaxo
|
||||
jj = j + lbf(2) - 1
|
||||
ciy(j) = jj/2 - lbc(2) + 1
|
||||
if(jj/2*2 == jj)then
|
||||
piy(j) = 1
|
||||
else
|
||||
piy(j) = 2
|
||||
endif
|
||||
enddo
|
||||
do k = kmino,kmaxo
|
||||
do j = jmino,jmaxo
|
||||
do i = imino,imaxo
|
||||
cxI(1) = i
|
||||
cxI(2) = j
|
||||
cxI(3) = k
|
||||
! change to coarse level reference
|
||||
!|---*--- ---*--- ---*--- ---*--- ---*--- ---*--- ---*--- ---*---|
|
||||
!|=======x===============x===============x===============x=======|
|
||||
cxI = (cxI+lbf-1)/2
|
||||
! change to array index
|
||||
cxI = cxI - lbc + 1
|
||||
|
||||
if(any(cxI+3 > extc)) write(*,*)"error in prolong"
|
||||
ii=i+lbf(1)-1
|
||||
jj=j+lbf(2)-1
|
||||
kk=k+lbf(3)-1
|
||||
#if 0
|
||||
if(ii/2*2==ii)then
|
||||
if(jj/2*2==jj)then
|
||||
if(kk/2*2==kk)then
|
||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
||||
else
|
||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
||||
endif
|
||||
else
|
||||
if(kk/2*2==kk)then
|
||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
||||
else
|
||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
||||
endif
|
||||
endif
|
||||
else
|
||||
if(jj/2*2==jj)then
|
||||
if(kk/2*2==kk)then
|
||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
||||
else
|
||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
||||
endif
|
||||
else
|
||||
if(kk/2*2==kk)then
|
||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
||||
else
|
||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
#else
|
||||
if(kk/2*2==kk)then
|
||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
else
|
||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
||||
endif
|
||||
|
||||
if(jj/2*2==jj)then
|
||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
||||
else
|
||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
||||
endif
|
||||
|
||||
if(ii/2*2==ii)then
|
||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
||||
else
|
||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
||||
endif
|
||||
#endif
|
||||
enddo
|
||||
enddo
|
||||
kk = k + lbf(3) - 1
|
||||
ciz(k) = kk/2 - lbc(3) + 1
|
||||
if(kk/2*2 == kk)then
|
||||
piz(k) = 1
|
||||
else
|
||||
piz(k) = 2
|
||||
endif
|
||||
enddo
|
||||
|
||||
ic_min = minval(cix(imino:imaxo))
|
||||
ic_max = maxval(cix(imino:imaxo))
|
||||
jc_min = minval(ciy(jmino:jmaxo))
|
||||
jc_max = maxval(ciy(jmino:jmaxo))
|
||||
kc_min = minval(ciz(kmino:kmaxo))
|
||||
kc_max = maxval(ciz(kmino:kmaxo))
|
||||
|
||||
maxcx = ic_max
|
||||
maxcy = jc_max
|
||||
maxcz = kc_max
|
||||
if(maxcx+3 > extc(1) .or. maxcy+3 > extc(2) .or. maxcz+3 > extc(3))then
|
||||
write(*,*)"error in prolong"
|
||||
return
|
||||
endif
|
||||
|
||||
i_lo = ic_min - 2
|
||||
i_hi = ic_max + 3
|
||||
j_lo = jc_min - 2
|
||||
j_hi = jc_max + 3
|
||||
k_lo = kc_min - 2
|
||||
k_hi = kc_max + 3
|
||||
need_full_symmetry = (i_lo < 1) .or. (j_lo < 1) .or. (k_lo < 1)
|
||||
if(need_full_symmetry)then
|
||||
call symmetry_bd(3,extc,func,funcc,SoA)
|
||||
else
|
||||
funcc(i_lo:i_hi,j_lo:j_hi,k_lo:k_hi) = func(i_lo:i_hi,j_lo:j_hi,k_lo:k_hi)
|
||||
endif
|
||||
|
||||
! 对每个 k(pz, kc 固定)预计算 Z 向插值的 2D 切片
|
||||
|
||||
do k = kmino, kmaxo
|
||||
pz = piz(k); kc = ciz(k)
|
||||
! --- Pass 1: Z 方向,只算一次 ---
|
||||
do iy = jc_min-2, jc_max+3 ! 仅需的 iy 范围(对应 jc-2:jc+3)
|
||||
do ii = ic_min-2, ic_max+3 ! 仅需的 ii 范围(对应 cix-2:cix+3)
|
||||
tmp_z_slab(ii, iy) = sum(WC(:,pz) * funcc(ii, iy, kc-2:kc+3))
|
||||
end do
|
||||
end do
|
||||
|
||||
do j = jmino, jmaxo
|
||||
py = piy(j); jc = ciy(j)
|
||||
! --- Pass 2: Y 方向 ---
|
||||
do ii = ic_min-2, ic_max+3
|
||||
tmp_xyz_line(ii) = sum(WC(:,py) * tmp_z_slab(ii, jc-2:jc+3))
|
||||
end do
|
||||
! --- Pass 3: X 方向 ---
|
||||
do i = imino, imaxo
|
||||
funf(i,j,k) = sum(WC(:,pix(i)) * tmp_xyz_line(cix(i)-2:cix(i)+3))
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
!~~~~~~> prolongation start...
|
||||
#if 0
|
||||
do k = kmino, kmaxo
|
||||
pz = piz(k)
|
||||
kc = ciz(k)
|
||||
|
||||
do j = jmino, jmaxo
|
||||
py = piy(j)
|
||||
jc = ciy(j)
|
||||
|
||||
! --- 步骤 1 & 2 融合:分段处理 X 轴,提升 Cache 命中率 ---
|
||||
! 我们将 ii 循环逻辑重组,减少对 funcc 的跨行重复访问
|
||||
do ii = 1, extc(1)
|
||||
! 1. 先做 Z 方向的 6 条线插值(针对当前的 ii 和当前的 6 个 iy)
|
||||
! 我们直接在这里把 Y 方向的加权也做了,省去 tmp_yz 数组
|
||||
! 这样 funcc 的数据读进来后立即完成所有维度的贡献,不再写回内存
|
||||
|
||||
res_line = 0.0d0
|
||||
do jj = 1, 6
|
||||
iy = jc - 3 + jj
|
||||
! 这一行代码是核心:一次性完成 Z 插值并加上 Y 的权重
|
||||
! 编译器会把 WC(jj, py) 存在寄存器里
|
||||
res_line = res_line + WC(jj, py) * ( &
|
||||
WC(1, pz) * funcc(ii, iy, kc-2) + &
|
||||
WC(2, pz) * funcc(ii, iy, kc-1) + &
|
||||
WC(3, pz) * funcc(ii, iy, kc ) + &
|
||||
WC(4, pz) * funcc(ii, iy, kc+1) + &
|
||||
WC(5, pz) * funcc(ii, iy, kc+2) + &
|
||||
WC(6, pz) * funcc(ii, iy, kc+3) )
|
||||
end do
|
||||
tmp_xyz_line(ii) = res_line
|
||||
end do
|
||||
|
||||
|
||||
|
||||
|
||||
! 3. 【降维:X 向】最后在最内层只处理 X 方向的 6 点加权
|
||||
! 此时每个点的计算量从原来的 200+ 次乘法降到了仅 6 次
|
||||
do i = imino, imaxo
|
||||
px = pix(i)
|
||||
ic = cix(i)
|
||||
|
||||
! 直接从预计算好的 line 中读取连续的 6 个点
|
||||
! ic-2 到 ic+3 对应原始 6 点算子
|
||||
funf(i,j,k) = WC(1,px)*tmp_xyz_line(ic-2) + &
|
||||
WC(2,px)*tmp_xyz_line(ic-1) + &
|
||||
WC(3,px)*tmp_xyz_line(ic ) + &
|
||||
WC(4,px)*tmp_xyz_line(ic+1) + &
|
||||
WC(5,px)*tmp_xyz_line(ic+2) + &
|
||||
WC(6,px)*tmp_xyz_line(ic+3)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
#endif
|
||||
return
|
||||
|
||||
end subroutine prolong3
|
||||
@@ -2357,7 +2369,14 @@
|
||||
integer::imino,imaxo,jmino,jmaxo,kmino,kmaxo
|
||||
|
||||
real*8,dimension(3) :: CD,FD
|
||||
|
||||
|
||||
real*8 :: tmp_xz_plane(-1:extf(1), 6)
|
||||
real*8 :: tmp_x_line(-1:extf(1))
|
||||
integer :: fi, fj, fk, ii, jj, kk
|
||||
integer :: fi_min, fi_max, ii_lo, ii_hi
|
||||
integer :: fj_min, fj_max, fk_min, fk_max, jj_lo, jj_hi, kk_lo, kk_hi
|
||||
logical :: need_full_symmetry
|
||||
|
||||
if(wei.ne.3)then
|
||||
write(*,*)"prolongrestrict.f90::restrict3: this routine only surport 3 dimension"
|
||||
write(*,*)"dim = ",wei
|
||||
@@ -2436,9 +2455,86 @@
|
||||
stop
|
||||
endif
|
||||
|
||||
call symmetry_bd(2,extf,funf,funff,SoA)
|
||||
! 仅计算 X 向最终写回所需的窗口:
|
||||
! func(i,j,k) 只访问 tmp_x_line(fi-2:fi+3)
|
||||
fi_min = 2*(imino + lbc(1) - 1) - 1 - lbf(1) + 1
|
||||
fi_max = 2*(imaxo + lbc(1) - 1) - 1 - lbf(1) + 1
|
||||
fj_min = 2*(jmino + lbc(2) - 1) - 1 - lbf(2) + 1
|
||||
fj_max = 2*(jmaxo + lbc(2) - 1) - 1 - lbf(2) + 1
|
||||
fk_min = 2*(kmino + lbc(3) - 1) - 1 - lbf(3) + 1
|
||||
fk_max = 2*(kmaxo + lbc(3) - 1) - 1 - lbf(3) + 1
|
||||
ii_lo = fi_min - 2
|
||||
ii_hi = fi_max + 3
|
||||
jj_lo = fj_min - 2
|
||||
jj_hi = fj_max + 3
|
||||
kk_lo = fk_min - 2
|
||||
kk_hi = fk_max + 3
|
||||
if(ii_lo < -1 .or. ii_hi > extf(1) .or. &
|
||||
jj_lo < -1 .or. jj_hi > extf(2) .or. &
|
||||
kk_lo < -1 .or. kk_hi > extf(3))then
|
||||
write(*,*)"restrict3: invalid stencil window"
|
||||
write(*,*)"ii=",ii_lo,ii_hi," jj=",jj_lo,jj_hi," kk=",kk_lo,kk_hi
|
||||
write(*,*)"extf=",extf
|
||||
stop
|
||||
endif
|
||||
need_full_symmetry = (ii_lo < 1) .or. (jj_lo < 1) .or. (kk_lo < 1)
|
||||
if(need_full_symmetry)then
|
||||
call symmetry_bd(2,extf,funf,funff,SoA)
|
||||
else
|
||||
funff(ii_lo:ii_hi,jj_lo:jj_hi,kk_lo:kk_hi) = funf(ii_lo:ii_hi,jj_lo:jj_hi,kk_lo:kk_hi)
|
||||
endif
|
||||
|
||||
!~~~~~~> restriction start...
|
||||
do k = kmino, kmaxo
|
||||
fk = 2*(k + lbc(3) - 1) - 1 - lbf(3) + 1
|
||||
|
||||
do j = jmino, jmaxo
|
||||
fj = 2*(j + lbc(2) - 1) - 1 - lbf(2) + 1
|
||||
|
||||
! 优化点 1: 显式展开 Z 方向计算,减少循环开销
|
||||
! 确保 ii 循环是最内层且连续访问
|
||||
!DIR$ VECTOR ALWAYS
|
||||
do ii = ii_lo, ii_hi
|
||||
! 预计算当前 j 对应的 6 行在 Z 方向的压缩结果
|
||||
! 这里直接硬编码 jj 的偏移,彻底消除一层循环
|
||||
tmp_xz_plane(ii, 1) = C1*(funff(ii,fj-2,fk-2)+funff(ii,fj-2,fk+3)) + &
|
||||
C2*(funff(ii,fj-2,fk-1)+funff(ii,fj-2,fk+2)) + &
|
||||
C3*(funff(ii,fj-2,fk )+funff(ii,fj-2,fk+1))
|
||||
tmp_xz_plane(ii, 2) = C1*(funff(ii,fj-1,fk-2)+funff(ii,fj-1,fk+3)) + &
|
||||
C2*(funff(ii,fj-1,fk-1)+funff(ii,fj-1,fk+2)) + &
|
||||
C3*(funff(ii,fj-1,fk )+funff(ii,fj-1,fk+1))
|
||||
tmp_xz_plane(ii, 3) = C1*(funff(ii,fj ,fk-2)+funff(ii,fj ,fk+3)) + &
|
||||
C2*(funff(ii,fj ,fk-1)+funff(ii,fj ,fk+2)) + &
|
||||
C3*(funff(ii,fj ,fk )+funff(ii,fj ,fk+1))
|
||||
tmp_xz_plane(ii, 4) = C1*(funff(ii,fj+1,fk-2)+funff(ii,fj+1,fk+3)) + &
|
||||
C2*(funff(ii,fj+1,fk-1)+funff(ii,fj+1,fk+2)) + &
|
||||
C3*(funff(ii,fj+1,fk )+funff(ii,fj+1,fk+1))
|
||||
tmp_xz_plane(ii, 5) = C1*(funff(ii,fj+2,fk-2)+funff(ii,fj+2,fk+3)) + &
|
||||
C2*(funff(ii,fj+2,fk-1)+funff(ii,fj+2,fk+2)) + &
|
||||
C3*(funff(ii,fj+2,fk )+funff(ii,fj+2,fk+1))
|
||||
tmp_xz_plane(ii, 6) = C1*(funff(ii,fj+3,fk-2)+funff(ii,fj+3,fk+3)) + &
|
||||
C2*(funff(ii,fj+3,fk-1)+funff(ii,fj+3,fk+2)) + &
|
||||
C3*(funff(ii,fj+3,fk )+funff(ii,fj+3,fk+1))
|
||||
end do
|
||||
|
||||
! 优化点 2: 同样向量化 Y 方向压缩
|
||||
!DIR$ VECTOR ALWAYS
|
||||
do ii = ii_lo, ii_hi
|
||||
tmp_x_line(ii) = C1*(tmp_xz_plane(ii, 1) + tmp_xz_plane(ii, 6)) + &
|
||||
C2*(tmp_xz_plane(ii, 2) + tmp_xz_plane(ii, 5)) + &
|
||||
C3*(tmp_xz_plane(ii, 3) + tmp_xz_plane(ii, 4))
|
||||
end do
|
||||
|
||||
! 优化点 3: 最终写入,利用已经缓存在 tmp_x_line 的数据
|
||||
do i = imino, imaxo
|
||||
fi = 2*(i + lbc(1) - 1) - 1 - lbf(1) + 1
|
||||
func(i, j, k) = C1*(tmp_x_line(fi-2) + tmp_x_line(fi+3)) + &
|
||||
C2*(tmp_x_line(fi-1) + tmp_x_line(fi+2)) + &
|
||||
C3*(tmp_x_line(fi ) + tmp_x_line(fi+1))
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
#if 0
|
||||
do k = kmino,kmaxo
|
||||
do j = jmino,jmaxo
|
||||
do i = imino,imaxo
|
||||
@@ -2462,7 +2558,7 @@
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
#endif
|
||||
return
|
||||
|
||||
end subroutine restrict3
|
||||
|
||||
212
AMSS_NCKU_source/rungekutta4_rout_c.C
Normal file
212
AMSS_NCKU_source/rungekutta4_rout_c.C
Normal file
@@ -0,0 +1,212 @@
|
||||
#include "rungekutta4_rout.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstddef>
|
||||
#include <complex>
|
||||
#include <immintrin.h>
|
||||
|
||||
namespace {
|
||||
|
||||
inline void rk4_stage0(std::size_t n,
|
||||
const double *__restrict f0,
|
||||
const double *__restrict frhs,
|
||||
double *__restrict f1,
|
||||
double c) {
|
||||
std::size_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
const __m512d vc = _mm512_set1_pd(c);
|
||||
for (; i + 7 < n; i += 8) {
|
||||
const __m512d v0 = _mm512_loadu_pd(f0 + i);
|
||||
const __m512d vr = _mm512_loadu_pd(frhs + i);
|
||||
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, vr, v0));
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
const __m256d vc = _mm256_set1_pd(c);
|
||||
for (; i + 3 < n; i += 4) {
|
||||
const __m256d v0 = _mm256_loadu_pd(f0 + i);
|
||||
const __m256d vr = _mm256_loadu_pd(frhs + i);
|
||||
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, vr, v0));
|
||||
}
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for (; i < n; ++i) {
|
||||
f1[i] = f0[i] + c * frhs[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline void rk4_rhs_accum(std::size_t n,
|
||||
const double *__restrict f1,
|
||||
double *__restrict frhs) {
|
||||
std::size_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
const __m512d v2 = _mm512_set1_pd(2.0);
|
||||
for (; i + 7 < n; i += 8) {
|
||||
const __m512d v1 = _mm512_loadu_pd(f1 + i);
|
||||
const __m512d vrhs = _mm512_loadu_pd(frhs + i);
|
||||
_mm512_storeu_pd(frhs + i, _mm512_fmadd_pd(v2, v1, vrhs));
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
const __m256d v2 = _mm256_set1_pd(2.0);
|
||||
for (; i + 3 < n; i += 4) {
|
||||
const __m256d v1 = _mm256_loadu_pd(f1 + i);
|
||||
const __m256d vrhs = _mm256_loadu_pd(frhs + i);
|
||||
_mm256_storeu_pd(frhs + i, _mm256_fmadd_pd(v2, v1, vrhs));
|
||||
}
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for (; i < n; ++i) {
|
||||
frhs[i] = frhs[i] + 2.0 * f1[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline void rk4_f1_from_f0_f1(std::size_t n,
|
||||
const double *__restrict f0,
|
||||
double *__restrict f1,
|
||||
double c) {
|
||||
std::size_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
const __m512d vc = _mm512_set1_pd(c);
|
||||
for (; i + 7 < n; i += 8) {
|
||||
const __m512d v0 = _mm512_loadu_pd(f0 + i);
|
||||
const __m512d v1 = _mm512_loadu_pd(f1 + i);
|
||||
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, v1, v0));
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
const __m256d vc = _mm256_set1_pd(c);
|
||||
for (; i + 3 < n; i += 4) {
|
||||
const __m256d v0 = _mm256_loadu_pd(f0 + i);
|
||||
const __m256d v1 = _mm256_loadu_pd(f1 + i);
|
||||
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, v1, v0));
|
||||
}
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for (; i < n; ++i) {
|
||||
f1[i] = f0[i] + c * f1[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline void rk4_stage3(std::size_t n,
|
||||
const double *__restrict f0,
|
||||
double *__restrict f1,
|
||||
const double *__restrict frhs,
|
||||
double c) {
|
||||
std::size_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
const __m512d vc = _mm512_set1_pd(c);
|
||||
for (; i + 7 < n; i += 8) {
|
||||
const __m512d v0 = _mm512_loadu_pd(f0 + i);
|
||||
const __m512d v1 = _mm512_loadu_pd(f1 + i);
|
||||
const __m512d vr = _mm512_loadu_pd(frhs + i);
|
||||
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, _mm512_add_pd(v1, vr), v0));
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
const __m256d vc = _mm256_set1_pd(c);
|
||||
for (; i + 3 < n; i += 4) {
|
||||
const __m256d v0 = _mm256_loadu_pd(f0 + i);
|
||||
const __m256d v1 = _mm256_loadu_pd(f1 + i);
|
||||
const __m256d vr = _mm256_loadu_pd(frhs + i);
|
||||
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, _mm256_add_pd(v1, vr), v0));
|
||||
}
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for (; i < n; ++i) {
|
||||
f1[i] = f0[i] + c * (f1[i] + frhs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
|
||||
void f_rungekutta4_scalar(double &dT, double &f0, double &f1, double &f_rhs, int &RK4) {
|
||||
constexpr double F1o6 = 1.0 / 6.0;
|
||||
constexpr double HLF = 0.5;
|
||||
constexpr double TWO = 2.0;
|
||||
|
||||
switch (RK4) {
|
||||
case 0:
|
||||
f1 = f0 + HLF * dT * f_rhs;
|
||||
break;
|
||||
case 1:
|
||||
f_rhs = f_rhs + TWO * f1;
|
||||
f1 = f0 + HLF * dT * f1;
|
||||
break;
|
||||
case 2:
|
||||
f_rhs = f_rhs + TWO * f1;
|
||||
f1 = f0 + dT * f1;
|
||||
break;
|
||||
case 3:
|
||||
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
|
||||
break;
|
||||
default:
|
||||
std::fprintf(stderr, "rungekutta4_scalar_c: invalid RK4 stage %d\n", RK4);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
void rungekutta4_cplxscalar_(double &dT,
|
||||
std::complex<double> &f0,
|
||||
std::complex<double> &f1,
|
||||
std::complex<double> &f_rhs,
|
||||
int &RK4) {
|
||||
constexpr double F1o6 = 1.0 / 6.0;
|
||||
constexpr double HLF = 0.5;
|
||||
constexpr double TWO = 2.0;
|
||||
|
||||
switch (RK4) {
|
||||
case 0:
|
||||
f1 = f0 + HLF * dT * f_rhs;
|
||||
break;
|
||||
case 1:
|
||||
f_rhs = f_rhs + TWO * f1;
|
||||
f1 = f0 + HLF * dT * f1;
|
||||
break;
|
||||
case 2:
|
||||
f_rhs = f_rhs + TWO * f1;
|
||||
f1 = f0 + dT * f1;
|
||||
break;
|
||||
case 3:
|
||||
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
|
||||
break;
|
||||
default:
|
||||
std::fprintf(stderr, "rungekutta4_cplxscalar_c: invalid RK4 stage %d\n", RK4);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
int f_rungekutta4_rout(int *ex, double &dT,
|
||||
double *f0, double *f1, double *f_rhs,
|
||||
int &RK4) {
|
||||
const std::size_t n = static_cast<std::size_t>(ex[0]) *
|
||||
static_cast<std::size_t>(ex[1]) *
|
||||
static_cast<std::size_t>(ex[2]);
|
||||
const double *const __restrict f0r = f0;
|
||||
double *const __restrict f1r = f1;
|
||||
double *const __restrict frhs = f_rhs;
|
||||
|
||||
if (__builtin_expect(static_cast<unsigned>(RK4) > 3u, 0)) {
|
||||
std::fprintf(stderr, "rungekutta4_rout_c: invalid RK4 stage %d\n", RK4);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
switch (RK4) {
|
||||
case 0:
|
||||
rk4_stage0(n, f0r, frhs, f1r, 0.5 * dT);
|
||||
break;
|
||||
case 1:
|
||||
rk4_rhs_accum(n, f1r, frhs);
|
||||
rk4_f1_from_f0_f1(n, f0r, f1r, 0.5 * dT);
|
||||
break;
|
||||
case 2:
|
||||
rk4_rhs_accum(n, f1r, frhs);
|
||||
rk4_f1_from_f0_f1(n, f0r, f1r, dT);
|
||||
break;
|
||||
default:
|
||||
rk4_stage3(n, f0r, f1r, frhs, (1.0 / 6.0) * dT);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <stddef.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
/* 主网格:0-based -> 1D */
|
||||
static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) {
|
||||
const int ex1 = ex[0], ex2 = ex[1];
|
||||
@@ -87,60 +88,159 @@ static inline size_t idx_funcc_F(int iF, int jF, int kF, int ord, const int extc
|
||||
* funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
|
||||
* enddo
|
||||
*/
|
||||
static inline void symmetry_bd_impl(int ord,
|
||||
int shift,
|
||||
const int extc[3],
|
||||
const double *__restrict func,
|
||||
double *__restrict funcc,
|
||||
const double SoA[3])
|
||||
{
|
||||
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
|
||||
const int nx = extc1 + ord;
|
||||
const int ny = extc2 + ord;
|
||||
|
||||
const size_t snx = (size_t)nx;
|
||||
const size_t splane = (size_t)nx * (size_t)ny;
|
||||
const size_t interior_i = (size_t)shift + 1u; /* iF = 1 */
|
||||
const size_t interior_j = ((size_t)shift + 1u) * snx; /* jF = 1 */
|
||||
const size_t interior_k = ((size_t)shift + 1u) * splane; /* kF = 1 */
|
||||
const size_t interior0 = interior_k + interior_j + interior_i;
|
||||
|
||||
/* 1) funcc(1:extc1,1:extc2,1:extc3) = func */
|
||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||
const double *src_k = func + (size_t)k0 * (size_t)extc2 * (size_t)extc1;
|
||||
const size_t dst_k0 = interior0 + (size_t)k0 * splane;
|
||||
for (int j0 = 0; j0 < extc2; ++j0) {
|
||||
const double *src = src_k + (size_t)j0 * (size_t)extc1;
|
||||
double *dst = funcc + dst_k0 + (size_t)j0 * snx;
|
||||
memcpy(dst, src, (size_t)extc1 * sizeof(double));
|
||||
}
|
||||
}
|
||||
|
||||
/* 2) funcc(-i,1:extc2,1:extc3) = funcc(i+1,1:extc2,1:extc3)*SoA(1) */
|
||||
const double s1 = SoA[0];
|
||||
if (s1 == 1.0) {
|
||||
for (int ii = 0; ii < ord; ++ii) {
|
||||
const size_t dst_i = (size_t)(shift - ii);
|
||||
const size_t src_i = (size_t)(shift + ii + 1);
|
||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||
const size_t kbase = interior_k + (size_t)k0 * splane + interior_j;
|
||||
for (int j0 = 0; j0 < extc2; ++j0) {
|
||||
const size_t off = kbase + (size_t)j0 * snx;
|
||||
funcc[off + dst_i] = funcc[off + src_i];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (s1 == -1.0) {
|
||||
for (int ii = 0; ii < ord; ++ii) {
|
||||
const size_t dst_i = (size_t)(shift - ii);
|
||||
const size_t src_i = (size_t)(shift + ii + 1);
|
||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||
const size_t kbase = interior_k + (size_t)k0 * splane + interior_j;
|
||||
for (int j0 = 0; j0 < extc2; ++j0) {
|
||||
const size_t off = kbase + (size_t)j0 * snx;
|
||||
funcc[off + dst_i] = -funcc[off + src_i];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int ii = 0; ii < ord; ++ii) {
|
||||
const size_t dst_i = (size_t)(shift - ii);
|
||||
const size_t src_i = (size_t)(shift + ii + 1);
|
||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||
const size_t kbase = interior_k + (size_t)k0 * splane + interior_j;
|
||||
for (int j0 = 0; j0 < extc2; ++j0) {
|
||||
const size_t off = kbase + (size_t)j0 * snx;
|
||||
funcc[off + dst_i] = funcc[off + src_i] * s1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* 3) funcc(:,-j,1:extc3) = funcc(:,j+1,1:extc3)*SoA(2) */
|
||||
const double s2 = SoA[1];
|
||||
if (s2 == 1.0) {
|
||||
for (int jj = 0; jj < ord; ++jj) {
|
||||
const size_t dst_j = (size_t)(shift - jj) * snx;
|
||||
const size_t src_j = (size_t)(shift + jj + 1) * snx;
|
||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||
const size_t kbase = interior_k + (size_t)k0 * splane;
|
||||
double *dst = funcc + kbase + dst_j;
|
||||
const double *src = funcc + kbase + src_j;
|
||||
for (int i = 0; i < nx; ++i) dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
} else if (s2 == -1.0) {
|
||||
for (int jj = 0; jj < ord; ++jj) {
|
||||
const size_t dst_j = (size_t)(shift - jj) * snx;
|
||||
const size_t src_j = (size_t)(shift + jj + 1) * snx;
|
||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||
const size_t kbase = interior_k + (size_t)k0 * splane;
|
||||
double *dst = funcc + kbase + dst_j;
|
||||
const double *src = funcc + kbase + src_j;
|
||||
for (int i = 0; i < nx; ++i) dst[i] = -src[i];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int jj = 0; jj < ord; ++jj) {
|
||||
const size_t dst_j = (size_t)(shift - jj) * snx;
|
||||
const size_t src_j = (size_t)(shift + jj + 1) * snx;
|
||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||
const size_t kbase = interior_k + (size_t)k0 * splane;
|
||||
double *dst = funcc + kbase + dst_j;
|
||||
const double *src = funcc + kbase + src_j;
|
||||
for (int i = 0; i < nx; ++i) dst[i] = src[i] * s2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* 4) funcc(:,:,-k) = funcc(:,:,k+1)*SoA(3) */
|
||||
const double s3 = SoA[2];
|
||||
if (s3 == 1.0) {
|
||||
for (int kk = 0; kk < ord; ++kk) {
|
||||
const size_t dst_k = (size_t)(shift - kk) * splane;
|
||||
const size_t src_k = (size_t)(shift + kk + 1) * splane;
|
||||
double *dst = funcc + dst_k;
|
||||
const double *src = funcc + src_k;
|
||||
for (size_t p = 0; p < splane; ++p) dst[p] = src[p];
|
||||
}
|
||||
} else if (s3 == -1.0) {
|
||||
for (int kk = 0; kk < ord; ++kk) {
|
||||
const size_t dst_k = (size_t)(shift - kk) * splane;
|
||||
const size_t src_k = (size_t)(shift + kk + 1) * splane;
|
||||
double *dst = funcc + dst_k;
|
||||
const double *src = funcc + src_k;
|
||||
for (size_t p = 0; p < splane; ++p) dst[p] = -src[p];
|
||||
}
|
||||
} else {
|
||||
for (int kk = 0; kk < ord; ++kk) {
|
||||
const size_t dst_k = (size_t)(shift - kk) * splane;
|
||||
const size_t src_k = (size_t)(shift + kk + 1) * splane;
|
||||
double *dst = funcc + dst_k;
|
||||
const double *src = funcc + src_k;
|
||||
for (size_t p = 0; p < splane; ++p) dst[p] = src[p] * s3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void symmetry_bd(int ord,
|
||||
const int extc[3],
|
||||
const double *func,
|
||||
double *funcc,
|
||||
const double SoA[3])
|
||||
{
|
||||
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
|
||||
if (ord <= 0) return;
|
||||
|
||||
// 1) funcc(1:extc1,1:extc2,1:extc3) = func
|
||||
// Fortran 的 (iF=1..extc1) 对应 C 的 func(i0=0..extc1-1)
|
||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||
for (int j0 = 0; j0 < extc2; ++j0) {
|
||||
for (int i0 = 0; i0 < extc1; ++i0) {
|
||||
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
|
||||
funcc[idx_funcc_F(iF, jF, kF, ord, extc)] = func[idx_func0(i0, j0, k0, extc)];
|
||||
}
|
||||
}
|
||||
/* Fast paths used by current C kernels: ord=2 (derivs), ord=3 (lopsided/KO). */
|
||||
if (ord == 2) {
|
||||
symmetry_bd_impl(2, 1, extc, func, funcc, SoA);
|
||||
return;
|
||||
}
|
||||
if (ord == 3) {
|
||||
symmetry_bd_impl(3, 2, extc, func, funcc, SoA);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
|
||||
for (int ii = 0; ii <= ord - 1; ++ii) {
|
||||
const int iF_dst = -ii; // 0, -1, -2, ...
|
||||
const int iF_src = ii + 1; // 1, 2, 3, ...
|
||||
for (int kF = 1; kF <= extc3; ++kF) {
|
||||
for (int jF = 1; jF <= extc2; ++jF) {
|
||||
funcc[idx_funcc_F(iF_dst, jF, kF, ord, extc)] =
|
||||
funcc[idx_funcc_F(iF_src, jF, kF, ord, extc)] * SoA[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
|
||||
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
|
||||
for (int jj = 0; jj <= ord - 1; ++jj) {
|
||||
const int jF_dst = -jj;
|
||||
const int jF_src = jj + 1;
|
||||
for (int kF = 1; kF <= extc3; ++kF) {
|
||||
for (int iF = -ord + 1; iF <= extc1; ++iF) {
|
||||
funcc[idx_funcc_F(iF, jF_dst, kF, ord, extc)] =
|
||||
funcc[idx_funcc_F(iF, jF_src, kF, ord, extc)] * SoA[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
|
||||
for (int kk = 0; kk <= ord - 1; ++kk) {
|
||||
const int kF_dst = -kk;
|
||||
const int kF_src = kk + 1;
|
||||
for (int jF = -ord + 1; jF <= extc2; ++jF) {
|
||||
for (int iF = -ord + 1; iF <= extc1; ++iF) {
|
||||
funcc[idx_funcc_F(iF, jF, kF_dst, ord, extc)] =
|
||||
funcc[idx_funcc_F(iF, jF, kF_src, ord, extc)] * SoA[2];
|
||||
}
|
||||
}
|
||||
}
|
||||
symmetry_bd_impl(ord, ord - 1, extc, func, funcc, SoA);
|
||||
}
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,19 +27,24 @@ using namespace std;
|
||||
class surface_integral
|
||||
{
|
||||
|
||||
private:
|
||||
int Symmetry, factor;
|
||||
int N_theta, N_phi; // Number of points in Theta & Phi directions
|
||||
double dphi, dcostheta;
|
||||
double *arcostheta, *wtcostheta;
|
||||
int n_tot; // size of arrays
|
||||
|
||||
double *nx_g, *ny_g, *nz_g; // global list of unit normals
|
||||
int myrank, cpusize;
|
||||
|
||||
public:
|
||||
surface_integral(int iSymmetry);
|
||||
~surface_integral();
|
||||
private:
|
||||
int Symmetry, factor;
|
||||
int N_theta, N_phi; // Number of points in Theta & Phi directions
|
||||
double dphi, dcostheta;
|
||||
double *arcostheta, *wtcostheta;
|
||||
int n_tot; // size of arrays
|
||||
|
||||
double *nx_g, *ny_g, *nz_g; // global list of unit normals
|
||||
int myrank, cpusize;
|
||||
int wave_cache_spinw, wave_cache_maxl, wave_cache_modes;
|
||||
double *wave_theta_pos, *wave_theta_neg;
|
||||
double *wave_phi_cos, *wave_phi_sin;
|
||||
void clear_wave_cache();
|
||||
void build_wave_cache(int spinw, int maxl);
|
||||
|
||||
public:
|
||||
surface_integral(int iSymmetry);
|
||||
~surface_integral();
|
||||
|
||||
void surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,
|
||||
int spinw, int maxl, int NN, double *RP, double *IP,
|
||||
@@ -77,21 +82,37 @@ public:
|
||||
double &, double &, double &, double &, double &, double &, double &,
|
||||
double &, double &, double &, double &, double &, double &,
|
||||
double &, double &)); // NN is the length of RP and IP
|
||||
void surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor);
|
||||
void surf_MassPAng(double rex, int lev, ShellPatch *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor);
|
||||
void surf_Wave(double rex, cgh *GH, ShellPatch *SH,
|
||||
var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
void surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||
void surf_MassPAng(double rex, int lev, ShellPatch *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||
void surf_WaveMassPAng(double rex, int lev, cgh *GH,
|
||||
var *Rpsi4, var *Ipsi4, int spinw, int maxl, int NN, double *RP, double *IP,
|
||||
var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||
void surf_WaveMassPAng(double rex, int lev, ShellPatch *GH,
|
||||
var *Rpsi4, var *Ipsi4, int spinw, int maxl, int NN, double *RP, double *IP,
|
||||
var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||
void surf_Wave(double rex, cgh *GH, ShellPatch *SH,
|
||||
var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *chix, var *chiy, var *chiz,
|
||||
var *trKx, var *trKy, var *trKz,
|
||||
@@ -110,12 +131,12 @@ public:
|
||||
bool SR_Interp_Points(MyList<var> *VarList, cgh *GH, ShellPatch *SH,
|
||||
int NN, double **XX, double *Shellf);
|
||||
|
||||
void surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i
|
||||
double *Rout, monitor *Monitor, MPI_Comm Comm_here);
|
||||
void surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||
var *Gmx, var *Gmy, var *Gmz,
|
||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i
|
||||
double *Rout, monitor *Monitor, MPI_Comm Comm_here, bool refresh_mass_fields = true);
|
||||
void surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,
|
||||
int spinw, int maxl, int NN, double *RP, double *IP,
|
||||
monitor *Monitor, MPI_Comm Comm_here);
|
||||
|
||||
@@ -24,4 +24,10 @@ void lopsided(const int ex[3],
|
||||
const double *X, const double *Y, const double *Z,
|
||||
const double *f, double *f_rhs,
|
||||
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||
int Symmetry, const double SoA[3]);
|
||||
int Symmetry, const double SoA[3]);
|
||||
|
||||
void lopsided_kodis(const int ex[3],
|
||||
const double *X, const double *Y, const double *Z,
|
||||
const double *f, double *f_rhs,
|
||||
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||
int Symmetry, const double SoA[3], double eps);
|
||||
|
||||
725
AMSS_NCKU_source/z4c_rhs_c.C
Normal file
725
AMSS_NCKU_source/z4c_rhs_c.C
Normal file
@@ -0,0 +1,725 @@
|
||||
#include "macrodef.h"
|
||||
#include "bssn_rhs.h"
|
||||
#include "fmisc.h"
|
||||
#include "ricci_gamma.h"
|
||||
#include "share_func.h"
|
||||
#include "tool.h"
|
||||
#include <vector>
|
||||
|
||||
#ifdef fortran1
|
||||
#define f_constraint_bssn constraint_bssn
|
||||
#define f_z4c_rhs_point z4c_rhs_point
|
||||
#endif
|
||||
#ifdef fortran2
|
||||
#define f_constraint_bssn CONSTRAINT_BSSN
|
||||
#define f_z4c_rhs_point Z4C_RHS_POINT
|
||||
#endif
|
||||
#ifdef fortran3
|
||||
#define f_constraint_bssn constraint_bssn_
|
||||
#define f_z4c_rhs_point z4c_rhs_point_
|
||||
#endif
|
||||
|
||||
extern "C" void f_constraint_bssn(int *, double *, double *, double *,
|
||||
double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
int &);
|
||||
|
||||
extern "C" void f_z4c_rhs_point(
|
||||
double &A11,
|
||||
double &A12,
|
||||
double &A13,
|
||||
double &A22,
|
||||
double &A23,
|
||||
double &A33,
|
||||
double &alpha,
|
||||
double &B1,
|
||||
double &B2,
|
||||
double &B3,
|
||||
double &beta1,
|
||||
double &beta2,
|
||||
double &beta3,
|
||||
double &chi,
|
||||
double &chiDivFloor,
|
||||
double &da1,
|
||||
double &dA111,
|
||||
double &dA112,
|
||||
double &dA113,
|
||||
double &dA122,
|
||||
double &dA123,
|
||||
double &dA133,
|
||||
double &da2,
|
||||
double &dA211,
|
||||
double &dA212,
|
||||
double &dA213,
|
||||
double &dA222,
|
||||
double &dA223,
|
||||
double &dA233,
|
||||
double &da3,
|
||||
double &dA311,
|
||||
double &dA312,
|
||||
double &dA313,
|
||||
double &dA322,
|
||||
double &dA323,
|
||||
double &dA333,
|
||||
double &db11,
|
||||
double &dB11,
|
||||
double &db12,
|
||||
double &dB12,
|
||||
double &db13,
|
||||
double &dB13,
|
||||
double &db21,
|
||||
double &dB21,
|
||||
double &db22,
|
||||
double &dB22,
|
||||
double &db23,
|
||||
double &dB23,
|
||||
double &db31,
|
||||
double &dB31,
|
||||
double &db32,
|
||||
double &dB32,
|
||||
double &db33,
|
||||
double &dB33,
|
||||
double &dchi1,
|
||||
double &dchi2,
|
||||
double &dchi3,
|
||||
double &dda11,
|
||||
double &dda12,
|
||||
double &dda13,
|
||||
double &dda22,
|
||||
double &dda23,
|
||||
double &dda33,
|
||||
double &ddb111,
|
||||
double &ddb112,
|
||||
double &ddb113,
|
||||
double &ddb121,
|
||||
double &ddb122,
|
||||
double &ddb123,
|
||||
double &ddb131,
|
||||
double &ddb132,
|
||||
double &ddb133,
|
||||
double &ddb221,
|
||||
double &ddb222,
|
||||
double &ddb223,
|
||||
double &ddb231,
|
||||
double &ddb232,
|
||||
double &ddb233,
|
||||
double &ddb331,
|
||||
double &ddb332,
|
||||
double &ddb333,
|
||||
double &ddchi11,
|
||||
double &ddchi12,
|
||||
double &ddchi13,
|
||||
double &ddchi22,
|
||||
double &ddchi23,
|
||||
double &ddchi33,
|
||||
double &deldelg1111,
|
||||
double &deldelg1112,
|
||||
double &deldelg1113,
|
||||
double &deldelg1122,
|
||||
double &deldelg1123,
|
||||
double &deldelg1133,
|
||||
double &deldelg1211,
|
||||
double &deldelg1212,
|
||||
double &deldelg1213,
|
||||
double &deldelg1222,
|
||||
double &deldelg1223,
|
||||
double &deldelg1233,
|
||||
double &deldelg1311,
|
||||
double &deldelg1312,
|
||||
double &deldelg1313,
|
||||
double &deldelg1322,
|
||||
double &deldelg1323,
|
||||
double &deldelg1333,
|
||||
double &deldelg2211,
|
||||
double &deldelg2212,
|
||||
double &deldelg2213,
|
||||
double &deldelg2222,
|
||||
double &deldelg2223,
|
||||
double &deldelg2233,
|
||||
double &deldelg2311,
|
||||
double &deldelg2312,
|
||||
double &deldelg2313,
|
||||
double &deldelg2322,
|
||||
double &deldelg2323,
|
||||
double &deldelg2333,
|
||||
double &deldelg3311,
|
||||
double &deldelg3312,
|
||||
double &deldelg3313,
|
||||
double &deldelg3322,
|
||||
double &deldelg3323,
|
||||
double &deldelg3333,
|
||||
double &delG11,
|
||||
double &delg111,
|
||||
double &delg112,
|
||||
double &delg113,
|
||||
double &delG12,
|
||||
double &delg122,
|
||||
double &delg123,
|
||||
double &delG13,
|
||||
double &delg133,
|
||||
double &delG21,
|
||||
double &delg211,
|
||||
double &delg212,
|
||||
double &delg213,
|
||||
double &delG22,
|
||||
double &delg222,
|
||||
double &delg223,
|
||||
double &delG23,
|
||||
double &delg233,
|
||||
double &delG31,
|
||||
double &delg311,
|
||||
double &delg312,
|
||||
double &delg313,
|
||||
double &delG32,
|
||||
double &delg322,
|
||||
double &delg323,
|
||||
double &delG33,
|
||||
double &delg333,
|
||||
double &dKhat1,
|
||||
double &dKhat2,
|
||||
double &dKhat3,
|
||||
double &dTheta1,
|
||||
double &dTheta2,
|
||||
double &dTheta3,
|
||||
double &G1,
|
||||
double &g11,
|
||||
double &g12,
|
||||
double &g13,
|
||||
double &G2,
|
||||
double &g22,
|
||||
double &g23,
|
||||
double &G3,
|
||||
double &g33,
|
||||
double &kappa1,
|
||||
double &kappa2,
|
||||
double &Khat,
|
||||
double &rA11,
|
||||
double &rA12,
|
||||
double &rA13,
|
||||
double &rA22,
|
||||
double &rA23,
|
||||
double &rA33,
|
||||
double &rchi,
|
||||
double &rG1,
|
||||
double &rg11,
|
||||
double &rg12,
|
||||
double &rg13,
|
||||
double &rG2,
|
||||
double &rg22,
|
||||
double &rg23,
|
||||
double &rG3,
|
||||
double &rg33,
|
||||
double &rKhat,
|
||||
double &rTheta,
|
||||
double &Theta);
|
||||
|
||||
static inline void z4c_contract_gamma(
|
||||
const double gxx, const double gxy, const double gxz,
|
||||
const double gyy, const double gyz, const double gzz,
|
||||
const double gxxx, const double gxyx, const double gxzx,
|
||||
const double gyyx, const double gyzx, const double gzzx,
|
||||
const double gxxy, const double gxyy, const double gxzy,
|
||||
const double gyyy, const double gyzy, const double gzzy,
|
||||
const double gxxz, const double gxyz, const double gxzz,
|
||||
const double gyyz, const double gyzz, const double gzzz,
|
||||
double &Gamxa, double &Gamya, double &Gamza)
|
||||
{
|
||||
double det = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz -
|
||||
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz;
|
||||
const double gupxx = (gyy * gzz - gyz * gyz) / det;
|
||||
const double gupxy = -(gxy * gzz - gyz * gxz) / det;
|
||||
const double gupxz = (gxy * gyz - gyy * gxz) / det;
|
||||
const double gupyy = (gxx * gzz - gxz * gxz) / det;
|
||||
const double gupyz = -(gxx * gyz - gxy * gxz) / det;
|
||||
const double gupzz = (gxx * gyy - gxy * gxy) / det;
|
||||
|
||||
const double Gamxxx = 0.5 * (gupxx * gxxx + gupxy * (2.0 * gxyx - gxxy) + gupxz * (2.0 * gxzx - gxxz));
|
||||
const double Gamyxx = 0.5 * (gupxy * gxxx + gupyy * (2.0 * gxyx - gxxy) + gupyz * (2.0 * gxzx - gxxz));
|
||||
const double Gamzxx = 0.5 * (gupxz * gxxx + gupyz * (2.0 * gxyx - gxxy) + gupzz * (2.0 * gxzx - gxxz));
|
||||
|
||||
const double Gamxyy = 0.5 * (gupxx * (2.0 * gxyy - gyyx) + gupxy * gyyy + gupxz * (2.0 * gyzy - gyyz));
|
||||
const double Gamyyy = 0.5 * (gupxy * (2.0 * gxyy - gyyx) + gupyy * gyyy + gupyz * (2.0 * gyzy - gyyz));
|
||||
const double Gamzyy = 0.5 * (gupxz * (2.0 * gxyy - gyyx) + gupyz * gyyy + gupzz * (2.0 * gyzy - gyyz));
|
||||
|
||||
const double Gamxzz = 0.5 * (gupxx * (2.0 * gxzz - gzzx) + gupxy * (2.0 * gyzz - gzzy) + gupxz * gzzz);
|
||||
const double Gamyzz = 0.5 * (gupxy * (2.0 * gxzz - gzzx) + gupyy * (2.0 * gyzz - gzzy) + gupyz * gzzz);
|
||||
const double Gamzzz = 0.5 * (gupxz * (2.0 * gxzz - gzzx) + gupyz * (2.0 * gyzz - gzzy) + gupzz * gzzz);
|
||||
|
||||
const double Gamxxy = 0.5 * (gupxx * gxxy + gupxy * gyyx + gupxz * (gxzy + gyzx - gxyz));
|
||||
const double Gamyxy = 0.5 * (gupxy * gxxy + gupyy * gyyx + gupyz * (gxzy + gyzx - gxyz));
|
||||
const double Gamzxy = 0.5 * (gupxz * gxxy + gupyz * gyyx + gupzz * (gxzy + gyzx - gxyz));
|
||||
|
||||
const double Gamxxz = 0.5 * (gupxx * gxxz + gupxy * (gxyz + gyzx - gxzy) + gupxz * gzzx);
|
||||
const double Gamyxz = 0.5 * (gupxy * gxxz + gupyy * (gxyz + gyzx - gxzy) + gupyz * gzzx);
|
||||
const double Gamzxz = 0.5 * (gupxz * gxxz + gupyz * (gxyz + gyzx - gxzy) + gupzz * gzzx);
|
||||
|
||||
const double Gamxyz = 0.5 * (gupxx * (gxyz + gxzy - gyzx) + gupxy * gyyz + gupxz * gzzy);
|
||||
const double Gamyyz = 0.5 * (gupxy * (gxyz + gxzy - gyzx) + gupyy * gyyz + gupyz * gzzy);
|
||||
const double Gamzyz = 0.5 * (gupxz * (gxyz + gxzy - gyzx) + gupyz * gyyz + gupzz * gzzy);
|
||||
|
||||
Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz +
|
||||
2.0 * (gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz);
|
||||
Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz +
|
||||
2.0 * (gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz);
|
||||
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz +
|
||||
2.0 * (gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz);
|
||||
}
|
||||
|
||||
static int compute_rhs_z4c_cartesian(
|
||||
int *ex, double &T, double *X, double *Y, double *Z,
|
||||
double *chi_state, double *chi_constraints, double *trK,
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
double *TZ,
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
double *TZ_rhs,
|
||||
double *rho, double *Sx, double *Sy, double *Sz,
|
||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
|
||||
int &Symmetry, int &Lev, double &eps, int &co)
|
||||
{
|
||||
(void)T;
|
||||
|
||||
const int nx = ex[0];
|
||||
const int ny = ex[1];
|
||||
const int nz = ex[2];
|
||||
const int all = nx * ny * nz;
|
||||
|
||||
double alpn1[all], chin1[all], gxx[all], gyy[all], gzz[all];
|
||||
double chix[all], chiy[all], chiz[all], chixx[all], chixy[all], chixz[all], chiyy[all], chiyz[all], chizz[all];
|
||||
double gxxx[all], gxyx[all], gxzx[all], gyyx[all], gyzx[all], gzzx[all];
|
||||
double gxxy[all], gxyy[all], gxzy[all], gyyy[all], gyzy[all], gzzy[all];
|
||||
double gxxz[all], gxyz[all], gxzz[all], gyyz[all], gyzz[all], gzzz[all];
|
||||
double gxxxx[all], gxxxy[all], gxxxz[all], gxxyy[all], gxxyz[all], gxxzz[all];
|
||||
double gxyxx[all], gxyxy[all], gxyxz[all], gxyyy[all], gxyyz[all], gxyzz[all];
|
||||
double gxzxx[all], gxzxy[all], gxzxz[all], gxzyy[all], gxzyz[all], gxzzz[all];
|
||||
double gyyxx[all], gyyxy[all], gyyxz[all], gyyyy[all], gyyyz[all], gyyzz[all];
|
||||
double gyzxx[all], gyzxy[all], gyzxz[all], gyzyy[all], gyzyz[all], gyzzz[all];
|
||||
double gzzxx[all], gzzxy[all], gzzxz[all], gzzyy[all], gzzyz[all], gzzzz[all];
|
||||
double Lapx[all], Lapy[all], Lapz[all], Lapxx[all], Lapxy[all], Lapxz[all], Lapyy[all], Lapyz[all], Lapzz[all];
|
||||
double betaxx[all], betaxy[all], betaxz[all], betayx[all], betayy[all], betayz[all], betazx[all], betazy[all], betazz[all];
|
||||
double dBxx[all], dBxy[all], dBxz[all], dByx[all], dByy[all], dByz[all], dBzx[all], dBzy[all], dBzz[all];
|
||||
double sfxxx[all], sfxxy[all], sfxxz[all], sfxyy[all], sfxyz[all], sfxzz[all];
|
||||
double sfyxx[all], sfyxy[all], sfyxz[all], sfyyy[all], sfyyz[all], sfyzz[all];
|
||||
double sfzxx[all], sfzxy[all], sfzxz[all], sfzyy[all], sfzyz[all], sfzzz[all];
|
||||
double Gamxx[all], Gamxy[all], Gamxz[all], Gamyx[all], Gamyy[all], Gamyz[all], Gamzx[all], Gamzy[all], Gamzz[all];
|
||||
double Kx[all], Ky[all], Kz[all], TZx[all], TZy[all], TZz[all];
|
||||
double Axxx[all], Axxy[all], Axxz[all], Axyx[all], Axyy[all], Axyz[all];
|
||||
double Axzx[all], Axzy[all], Axzz[all], Ayyx[all], Ayyy[all], Ayyz[all];
|
||||
double Ayzx[all], Ayzy[all], Ayzz[all], Azzx[all], Azzy[all], Azzz[all];
|
||||
|
||||
const double SSS[3] = {1.0, 1.0, 1.0};
|
||||
const double AAS[3] = {-1.0, -1.0, 1.0};
|
||||
const double ASA[3] = {-1.0, 1.0, -1.0};
|
||||
const double SAA[3] = {1.0, -1.0, -1.0};
|
||||
const double ASS[3] = {-1.0, 1.0, 1.0};
|
||||
const double SAS[3] = {1.0, -1.0, 1.0};
|
||||
const double SSA[3] = {1.0, 1.0, -1.0};
|
||||
|
||||
const double ONE = 1.0;
|
||||
const double TWO = 2.0;
|
||||
const double ZEO = 0.0;
|
||||
double chiDivfloor = 1.0e-5;
|
||||
|
||||
double kappa1 = 2.0e-2;
|
||||
double kappa2 = 0.0;
|
||||
double FF = 0.75;
|
||||
double eta = 2.0;
|
||||
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
alpn1[idx] = Lap[idx] + ONE;
|
||||
chin1[idx] = chi_state[idx] + ONE;
|
||||
gxx[idx] = dxx[idx] + ONE;
|
||||
gyy[idx] = dyy[idx] + ONE;
|
||||
gzz[idx] = dzz[idx] + ONE;
|
||||
}
|
||||
|
||||
fderivs(ex, betax, betaxx, betaxy, betaxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, betay, betayx, betayy, betayz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, betaz, betazx, betazy, betazz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, dtSfx, dBxx, dBxy, dBxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, dtSfy, dByx, dByy, dByz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, dtSfz, dBzx, dBzy, dBzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, chi_state, chix, chiy, chiz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, dxx, gxxx, gxxy, gxxz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, gxy, gxyx, gxyy, gxyz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, gxz, gxzx, gxzy, gxzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, dyy, gyyx, gyyy, gyyz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, gyz, gyzx, gyzy, gyzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, dzz, gzzx, gzzy, gzzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
|
||||
fdderivs(ex, dxx, gxxxx, gxxxy, gxxxz, gxxyy, gxxyz, gxxzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, dyy, gyyxx, gyyxy, gyyxz, gyyyy, gyyyz, gyyzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, dzz, gzzxx, gzzxy, gzzxz, gzzyy, gzzyz, gzzzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, gxy, gxyxx, gxyxy, gxyxz, gxyyy, gxyyz, gxyzz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, gxz, gxzxx, gxzxy, gxzxz, gxzyy, gxzyz, gxzzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fdderivs(ex, gyz, gyzxx, gyzxy, gyzxz, gyzyy, gyzyz, gyzzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
|
||||
|
||||
fderivs(ex, Gamx, Gamxx, Gamxy, Gamxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Gamy, Gamyx, Gamyy, Gamyz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Gamz, Gamzx, Gamzy, Gamzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
|
||||
fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, trK, Kx, Ky, Kz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, TZ, TZx, TZy, TZz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
|
||||
fdderivs(ex, betax, sfxxx, sfxxy, sfxxz, sfxyy, sfxyz, sfxzz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, betay, sfyxx, sfyxy, sfyxz, sfyyy, sfyyz, sfyzz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, betaz, sfzxx, sfzxy, sfzxz, sfzyy, sfzyz, sfzzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
|
||||
fdderivs(ex, chi_state, chixx, chixy, chixz, chiyy, chiyz, chizz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fdderivs(ex, Lap, Lapxx, Lapxy, Lapxz, Lapyy, Lapyz, Lapzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
|
||||
fderivs(ex, Axx, Axxx, Axxy, Axxz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Axy, Axyx, Axyy, Axyz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Axz, Axzx, Axzy, Axzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, Ayy, Ayyx, Ayyy, Ayyz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
fderivs(ex, Ayz, Ayzx, Ayzy, Ayzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
|
||||
fderivs(ex, Azz, Azzx, Azzy, Azzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
double point_kappa1 = 0.0;
|
||||
f_z4c_rhs_point(
|
||||
Axx[idx], Axy[idx], Axz[idx], Ayy[idx], Ayz[idx], Azz[idx],
|
||||
alpn1[idx], dtSfx[idx], dtSfy[idx], dtSfz[idx],
|
||||
betax[idx], betay[idx], betaz[idx],
|
||||
chin1[idx], chiDivfloor,
|
||||
Lapx[idx],
|
||||
Axxx[idx], Axyx[idx], Axzx[idx], Ayyx[idx], Ayzx[idx], Azzx[idx],
|
||||
Lapy[idx],
|
||||
Axxy[idx], Axyy[idx], Axzy[idx], Ayyy[idx], Ayzy[idx], Azzy[idx],
|
||||
Lapz[idx],
|
||||
Axxz[idx], Axyz[idx], Axzz[idx], Ayyz[idx], Ayzz[idx], Azzz[idx],
|
||||
betaxx[idx], dBxx[idx], betayx[idx], dByx[idx], betazx[idx], dBzx[idx],
|
||||
betaxy[idx], dBxy[idx], betayy[idx], dByy[idx], betazy[idx], dBzy[idx],
|
||||
betaxz[idx], dBxz[idx], betayz[idx], dByz[idx], betazz[idx], dBzz[idx],
|
||||
chix[idx], chiy[idx], chiz[idx],
|
||||
Lapxx[idx], Lapxy[idx], Lapxz[idx], Lapyy[idx], Lapyz[idx], Lapzz[idx],
|
||||
sfxxx[idx], sfyxx[idx], sfzxx[idx],
|
||||
sfxxy[idx], sfyxy[idx], sfzxy[idx],
|
||||
sfxxz[idx], sfyxz[idx], sfzxz[idx],
|
||||
sfxyy[idx], sfyyy[idx], sfzyy[idx],
|
||||
sfxyz[idx], sfyyz[idx], sfzyz[idx],
|
||||
sfxzz[idx], sfyzz[idx], sfzzz[idx],
|
||||
chixx[idx], chixy[idx], chixz[idx], chiyy[idx], chiyz[idx], chizz[idx],
|
||||
gxxxx[idx], gxyxx[idx], gxzxx[idx], gyyxx[idx], gyzxx[idx], gzzxx[idx],
|
||||
gxxxy[idx], gxyxy[idx], gxzxy[idx], gyyxy[idx], gyzxy[idx], gzzxy[idx],
|
||||
gxxxz[idx], gxyxz[idx], gxzxz[idx], gyyxz[idx], gyzxz[idx], gzzxz[idx],
|
||||
gxxyy[idx], gxyyy[idx], gxzyy[idx], gyyyy[idx], gyzyy[idx], gzzyy[idx],
|
||||
gxxyz[idx], gxyyz[idx], gxzyz[idx], gyyyz[idx], gyzyz[idx], gzzyz[idx],
|
||||
gxxzz[idx], gxyzz[idx], gxzzz[idx], gyyzz[idx], gyzzz[idx], gzzzz[idx],
|
||||
Gamxx[idx], gxxx[idx], gxyx[idx], gxzx[idx],
|
||||
Gamyx[idx], gyyx[idx], gyzx[idx],
|
||||
Gamzx[idx], gzzx[idx],
|
||||
Gamxy[idx], gxxy[idx], gxyy[idx], gxzy[idx],
|
||||
Gamyy[idx], gyyy[idx], gyzy[idx],
|
||||
Gamzy[idx], gzzy[idx],
|
||||
Gamxz[idx], gxxz[idx], gxyz[idx], gxzz[idx],
|
||||
Gamyz[idx], gyyz[idx], gyzz[idx],
|
||||
Gamzz[idx], gzzz[idx],
|
||||
Kx[idx], Ky[idx], Kz[idx],
|
||||
TZx[idx], TZy[idx], TZz[idx],
|
||||
Gamx[idx], gxx[idx], gxy[idx], gxz[idx],
|
||||
Gamy[idx], gyy[idx], gyz[idx],
|
||||
Gamz[idx], gzz[idx],
|
||||
point_kappa1, kappa2,
|
||||
trK[idx],
|
||||
Axx_rhs[idx], Axy_rhs[idx], Axz_rhs[idx], Ayy_rhs[idx], Ayz_rhs[idx], Azz_rhs[idx],
|
||||
chi_rhs[idx],
|
||||
Gamx_rhs[idx], gxx_rhs[idx], gxy_rhs[idx], gxz_rhs[idx],
|
||||
Gamy_rhs[idx], gyy_rhs[idx], gyz_rhs[idx],
|
||||
Gamz_rhs[idx], gzz_rhs[idx], trK_rhs[idx], TZ_rhs[idx], TZ[idx]);
|
||||
}
|
||||
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
Lap_rhs[idx] = -TWO * alpn1[idx] * trK[idx];
|
||||
|
||||
#if (GAUGE == 0)
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
betax_rhs[idx] = FF * dtSfx[idx];
|
||||
betay_rhs[idx] = FF * dtSfy[idx];
|
||||
betaz_rhs[idx] = FF * dtSfz[idx];
|
||||
dtSfx_rhs[idx] = Gamx_rhs[idx] - eta * dtSfx[idx];
|
||||
dtSfy_rhs[idx] = Gamy_rhs[idx] - eta * dtSfy[idx];
|
||||
dtSfz_rhs[idx] = Gamz_rhs[idx] - eta * dtSfz[idx];
|
||||
}
|
||||
#elif (GAUGE == 1)
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
betax_rhs[idx] = Gamx[idx] - eta * betax[idx];
|
||||
betay_rhs[idx] = Gamy[idx] - eta * betay[idx];
|
||||
betaz_rhs[idx] = Gamz[idx] - eta * betaz[idx];
|
||||
dtSfx_rhs[idx] = ZEO;
|
||||
dtSfy_rhs[idx] = ZEO;
|
||||
dtSfz_rhs[idx] = ZEO;
|
||||
}
|
||||
#else
|
||||
#error "z4c_rhs_c.C currently supports GAUGE == 0 or GAUGE == 1 for Z4C"
|
||||
#endif
|
||||
|
||||
lopsided(ex, X, Y, Z, gxx, gxx_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, gxy, gxy_rhs, betax, betay, betaz, Symmetry, AAS);
|
||||
lopsided(ex, X, Y, Z, gxz, gxz_rhs, betax, betay, betaz, Symmetry, ASA);
|
||||
lopsided(ex, X, Y, Z, gyy, gyy_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, gyz, gyz_rhs, betax, betay, betaz, Symmetry, SAA);
|
||||
lopsided(ex, X, Y, Z, gzz, gzz_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
|
||||
lopsided(ex, X, Y, Z, Axx, Axx_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, Axy, Axy_rhs, betax, betay, betaz, Symmetry, AAS);
|
||||
lopsided(ex, X, Y, Z, Axz, Axz_rhs, betax, betay, betaz, Symmetry, ASA);
|
||||
lopsided(ex, X, Y, Z, Ayy, Ayy_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, Ayz, Ayz_rhs, betax, betay, betaz, Symmetry, SAA);
|
||||
lopsided(ex, X, Y, Z, Azz, Azz_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
|
||||
lopsided(ex, X, Y, Z, chi_state, chi_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, trK, trK_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
|
||||
lopsided(ex, X, Y, Z, Gamx, Gamx_rhs, betax, betay, betaz, Symmetry, ASS);
|
||||
lopsided(ex, X, Y, Z, Gamy, Gamy_rhs, betax, betay, betaz, Symmetry, SAS);
|
||||
lopsided(ex, X, Y, Z, Gamz, Gamz_rhs, betax, betay, betaz, Symmetry, SSA);
|
||||
|
||||
lopsided(ex, X, Y, Z, Lap, Lap_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
lopsided(ex, X, Y, Z, betax, betax_rhs, betax, betay, betaz, Symmetry, ASS);
|
||||
lopsided(ex, X, Y, Z, betay, betay_rhs, betax, betay, betaz, Symmetry, SAS);
|
||||
lopsided(ex, X, Y, Z, betaz, betaz_rhs, betax, betay, betaz, Symmetry, SSA);
|
||||
#if (GAUGE == 0)
|
||||
lopsided(ex, X, Y, Z, dtSfx, dtSfx_rhs, betax, betay, betaz, Symmetry, ASS);
|
||||
lopsided(ex, X, Y, Z, dtSfy, dtSfy_rhs, betax, betay, betaz, Symmetry, SAS);
|
||||
lopsided(ex, X, Y, Z, dtSfz, dtSfz_rhs, betax, betay, betaz, Symmetry, SSA);
|
||||
#endif
|
||||
lopsided(ex, X, Y, Z, TZ, TZ_rhs, betax, betay, betaz, Symmetry, SSS);
|
||||
|
||||
for (int idx = 0; idx < all; ++idx)
|
||||
{
|
||||
double Gamxa = 0.0, Gamya = 0.0, Gamza = 0.0;
|
||||
z4c_contract_gamma(
|
||||
gxx[idx], gxy[idx], gxz[idx], gyy[idx], gyz[idx], gzz[idx],
|
||||
gxxx[idx], gxyx[idx], gxzx[idx], gyyx[idx], gyzx[idx], gzzx[idx],
|
||||
gxxy[idx], gxyy[idx], gxzy[idx], gyyy[idx], gyzy[idx], gzzy[idx],
|
||||
gxxz[idx], gxyz[idx], gxzz[idx], gyyz[idx], gyzz[idx], gzzz[idx],
|
||||
Gamxa, Gamya, Gamza);
|
||||
|
||||
TZ_rhs[idx] -= alpn1[idx] * (TWO + kappa2) * kappa1 * TZ[idx];
|
||||
trK_rhs[idx] += alpn1[idx] * kappa1 * (ONE - kappa2) * TZ[idx];
|
||||
Gamx_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamx[idx] - Gamxa);
|
||||
Gamy_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamy[idx] - Gamya);
|
||||
Gamz_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamz[idx] - Gamza);
|
||||
}
|
||||
|
||||
if (eps > 0.0)
|
||||
{
|
||||
kodis(ex, X, Y, Z, chi_state, chi_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, trK, trK_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gxx, gxx_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gxy, gxy_rhs, AAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gxz, gxz_rhs, ASA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gyy, gyy_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gyz, gyz_rhs, SAA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, gzz, gzz_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Axx, Axx_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Axy, Axy_rhs, AAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Axz, Axz_rhs, ASA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Ayy, Ayy_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Ayz, Ayz_rhs, SAA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Azz, Azz_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Gamx, Gamx_rhs, ASS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Gamy, Gamy_rhs, SAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Gamz, Gamz_rhs, SSA, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, Lap, Lap_rhs, SSS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, betax, betax_rhs, ASS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, betay, betay_rhs, SAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, betaz, betaz_rhs, SSA, Symmetry, eps);
|
||||
#if (GAUGE == 0)
|
||||
kodis(ex, X, Y, Z, dtSfx, dtSfx_rhs, ASS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, dtSfy, dtSfy_rhs, SAS, Symmetry, eps);
|
||||
kodis(ex, X, Y, Z, dtSfz, dtSfz_rhs, SSA, Symmetry, eps);
|
||||
#endif
|
||||
kodis(ex, X, Y, Z, TZ, TZ_rhs, SSS, Symmetry, eps);
|
||||
}
|
||||
|
||||
if (co == 0)
|
||||
{
|
||||
#if (ABV == 0)
|
||||
f_ricci_gamma(ex, X, Y, Z,
|
||||
chi_constraints,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Symmetry);
|
||||
#endif
|
||||
f_constraint_bssn(ex, X, Y, Z,
|
||||
chi_constraints, trK,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Lap, betax, betay, betaz, rho, Sx, Sy, Sz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
|
||||
Symmetry);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" int f_compute_rhs_Z4c(int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
double *chi, double *trK,
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
double *TZ,
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
double *TZ_rhs,
|
||||
double *rho, double *Sx, double *Sy, double *Sz,
|
||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
|
||||
int &Symmetry, int &Lev, double &eps, int &co)
|
||||
{
|
||||
return compute_rhs_z4c_cartesian(
|
||||
ex, T, X, Y, Z,
|
||||
chi, chi, trK,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Lap, betax, betay, betaz,
|
||||
dtSfx, dtSfy, dtSfz,
|
||||
TZ,
|
||||
chi_rhs, trK_rhs,
|
||||
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
|
||||
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
|
||||
Gamx_rhs, Gamy_rhs, Gamz_rhs,
|
||||
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
|
||||
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
|
||||
TZ_rhs,
|
||||
rho, Sx, Sy, Sz,
|
||||
Sxx, Sxy, Sxz, Syy, Syz, Szz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
|
||||
Symmetry, Lev, eps, co);
|
||||
}
|
||||
|
||||
extern "C" int f_compute_rhs_Z4cnot(int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
double *chi, double *trK,
|
||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||
double *Gamx, double *Gamy, double *Gamz,
|
||||
double *Lap, double *betax, double *betay, double *betaz,
|
||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||
double *TZ,
|
||||
double *chi_rhs, double *trK_rhs,
|
||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||
double *TZ_rhs,
|
||||
double *rho, double *Sx, double *Sy, double *Sz,
|
||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
|
||||
int &Symmetry, int &Lev, double &eps, int &co, double &chitiny)
|
||||
{
|
||||
const int all = ex[0] * ex[1] * ex[2];
|
||||
std::vector<double> chi_clamped(chi, chi + all);
|
||||
f_lowerboundset(ex, chi_clamped.data(), chitiny);
|
||||
|
||||
const int ret = compute_rhs_z4c_cartesian(
|
||||
ex, T, X, Y, Z,
|
||||
chi_clamped.data(), chi, trK,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Lap, betax, betay, betaz,
|
||||
dtSfx, dtSfy, dtSfz,
|
||||
TZ,
|
||||
chi_rhs, trK_rhs,
|
||||
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
|
||||
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
|
||||
Gamx_rhs, Gamy_rhs, Gamz_rhs,
|
||||
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
|
||||
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
|
||||
TZ_rhs,
|
||||
rho, Sx, Sy, Sz,
|
||||
Sxx, Sxy, Sxz, Syy, Syz, Szz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
|
||||
Symmetry, Lev, eps, co);
|
||||
|
||||
if (ret != 0 || co != 0)
|
||||
return ret;
|
||||
|
||||
#if (ABV == 0)
|
||||
f_ricci_gamma(ex, X, Y, Z,
|
||||
chi,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Symmetry);
|
||||
#endif
|
||||
f_constraint_bssn(ex, X, Y, Z,
|
||||
chi, trK,
|
||||
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||
Gamx, Gamy, Gamz,
|
||||
Lap, betax, betay, betaz, rho, Sx, Sy, Sz,
|
||||
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
|
||||
Symmetry);
|
||||
return ret;
|
||||
}
|
||||
7505
AMSS_NCKU_source/z4c_rhs_cuda.cu
Normal file
7505
AMSS_NCKU_source/z4c_rhs_cuda.cu
Normal file
File diff suppressed because it is too large
Load Diff
83
AMSS_NCKU_source/z4c_rhs_cuda.h
Normal file
83
AMSS_NCKU_source/z4c_rhs_cuda.h
Normal file
@@ -0,0 +1,83 @@
|
||||
#ifndef Z4C_RHS_CUDA_H
|
||||
#define Z4C_RHS_CUDA_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
Z4C_CUDA_STATE_COUNT = 25
|
||||
};
|
||||
|
||||
int z4c_cuda_rk4_substep(void *block_tag,
|
||||
int *ex, double *X, double *Y, double *Z,
|
||||
double **state_host_in,
|
||||
double **state_host_out,
|
||||
const double *propspeed,
|
||||
const double *soa_flat,
|
||||
const double *bbox,
|
||||
double &dT,
|
||||
double &T,
|
||||
int &RK4,
|
||||
int &apply_bam_bc,
|
||||
int &Symmetry,
|
||||
int &Lev,
|
||||
double &eps,
|
||||
int &co,
|
||||
int &keep_resident_state,
|
||||
int &apply_enforce_ga,
|
||||
double &chitiny);
|
||||
|
||||
int z4c_cuda_download_resident_state(void *block_tag,
|
||||
int *ex,
|
||||
double **state_host_out);
|
||||
|
||||
int z4c_cuda_pack_state_region_to_host_buffer(void *block_tag,
|
||||
int state_index,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int z4c_cuda_unpack_state_region_from_host_buffer(void *block_tag,
|
||||
int state_index,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int z4c_cuda_pack_state_batch_to_host_buffer(void *block_tag,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int z4c_cuda_unpack_state_batch_from_host_buffer(void *block_tag,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int i0, int j0, int k0,
|
||||
int sx, int sy, int sz);
|
||||
|
||||
int z4c_cuda_download_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
const int *state_indices,
|
||||
double **state_host_out);
|
||||
|
||||
int z4c_cuda_upload_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
const int *state_indices,
|
||||
double **state_host_in);
|
||||
|
||||
int z4c_cuda_has_resident_state(void *block_tag);
|
||||
|
||||
void z4c_cuda_release_step_ctx(void *block_tag);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -144,11 +144,67 @@ def generate_macrodef_h():
|
||||
print( "#define REGLEV 0", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
# Define fine-grained timing/debug macros.
|
||||
# All of them default to OFF so production builds do not pay profiling overhead.
|
||||
|
||||
fine_timing = getattr(input_data, "Fine_Timing",
|
||||
getattr(input_data, "Finegrained_Timing", "no"))
|
||||
kernel_fine_timing = getattr(input_data, "Kernel_Fine_Timing",
|
||||
getattr(input_data, "BSSN_Kernel_Fine_Timing", "no"))
|
||||
stdin_abort_poll = getattr(input_data, "Enable_Stdin_Abort_Poll",
|
||||
getattr(input_data, "Stdin_Abort_Poll", "no"))
|
||||
timing_report_every = max(1, int(getattr(
|
||||
input_data, "Timing_Every_Steps",
|
||||
getattr(input_data, "Timing_Report_Every", 1))))
|
||||
timing_top_hotspots = max(1, int(getattr(
|
||||
input_data, "Timing_Top_Hotspots", 8)))
|
||||
|
||||
if ( fine_timing == "yes" ):
|
||||
print( "#define BSSN_FINE_TIMING 1", file=file1 )
|
||||
print( file=file1 )
|
||||
elif ( fine_timing == "no" ):
|
||||
print( "#define BSSN_FINE_TIMING 0", file=file1 )
|
||||
print( file=file1 )
|
||||
else:
|
||||
print( "Fine_Timing setting error!!!" )
|
||||
print()
|
||||
print( "# Fine_Timing setting error!!!", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
print( f"#define BSSN_FINE_TIMING_EVERY {timing_report_every}", file=file1 )
|
||||
print( file=file1 )
|
||||
print( f"#define BSSN_FINE_TIMING_TOPN {timing_top_hotspots}", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
if ( kernel_fine_timing == "yes" ):
|
||||
print( "#define BSSN_KERNEL_FINE_TIMING 1", file=file1 )
|
||||
print( file=file1 )
|
||||
elif ( kernel_fine_timing == "no" ):
|
||||
print( "#define BSSN_KERNEL_FINE_TIMING 0", file=file1 )
|
||||
print( file=file1 )
|
||||
else:
|
||||
print( "Kernel_Fine_Timing setting error!!!" )
|
||||
print()
|
||||
print( "# Kernel_Fine_Timing setting error!!!", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
if ( stdin_abort_poll == "yes" ):
|
||||
print( "#define BSSN_ENABLE_STDIN_ABORT_POLL 1", file=file1 )
|
||||
print( file=file1 )
|
||||
elif ( stdin_abort_poll == "no" ):
|
||||
print( "#define BSSN_ENABLE_STDIN_ABORT_POLL 0", file=file1 )
|
||||
print( file=file1 )
|
||||
else:
|
||||
print( "Enable_Stdin_Abort_Poll setting error!!!" )
|
||||
print()
|
||||
print( "# Enable_Stdin_Abort_Poll setting error!!!", file=file1 )
|
||||
print( file=file1 )
|
||||
|
||||
# Define macro USE_GPU
|
||||
# use GPU or not
|
||||
|
||||
if ( input_data.GPU_Calculation == "yes"):
|
||||
print( "#define USE_GPU", file=file1 )
|
||||
print( "//#define USE_GPU", file=file1 )
|
||||
print( file=file1 )
|
||||
elif ( input_data.GPU_Calculation == "no"):
|
||||
print( "//#define USE_GPU", file=file1 )
|
||||
@@ -224,6 +280,21 @@ def generate_macrodef_h():
|
||||
print( "// 0: for every level;", file=file1 )
|
||||
print( "// 1: for all", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_FINE_TIMING", file=file1 )
|
||||
print( "// enable fine-grained per-timestep timing monitor", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_FINE_TIMING_EVERY", file=file1 )
|
||||
print( "// report timing every N coarse timesteps", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_FINE_TIMING_TOPN", file=file1 )
|
||||
print( "// number of hottest timing buckets shown in stdout", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_KERNEL_FINE_TIMING", file=file1 )
|
||||
print( "// enable split timing inside compute_rhs_bssn", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define BSSN_ENABLE_STDIN_ABORT_POLL", file=file1 )
|
||||
print( "// poll stdin and broadcast abort flag every coarse step", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
print( "// define USE_GPU", file=file1 )
|
||||
print( "// use gpu or not", file=file1 )
|
||||
print( "//", file=file1 )
|
||||
|
||||
@@ -43,7 +43,8 @@ def get_last_n_cores_per_socket(n=32):
|
||||
cpu_str = ",".join(segments)
|
||||
total = len(segments) * n
|
||||
print(f" CPU binding: taskset -c {cpu_str} ({total} cores, last {n} per socket)")
|
||||
return f"taskset -c {cpu_str}"
|
||||
#return f"taskset -c {cpu_str}"
|
||||
return f""
|
||||
|
||||
|
||||
## CPU core binding: dynamically select the last 32 cores of each socket (64 cores total)
|
||||
@@ -69,9 +70,9 @@ def makefile_ABE():
|
||||
|
||||
## Build command with CPU binding to nohz_full cores
|
||||
if (input_data.GPU_Calculation == "no"):
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=optimize ABE"
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=off USE_CUDA_BSSN=0 USE_CUDA_Z4C=0 ABE"
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=off USE_CUDA_BSSN=1 USE_CUDA_Z4C=1 ABE_CUDA"
|
||||
else:
|
||||
print( " CPU/GPU numerical calculation setting is wrong " )
|
||||
print( )
|
||||
@@ -150,7 +151,7 @@ def run_ABE():
|
||||
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||
mpi_command_outfile = "ABE_out.log"
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE_CUDA"
|
||||
mpi_command_outfile = "ABEGPU_out.log"
|
||||
|
||||
## Execute the MPI command and stream output
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
# AMSS-NCKU PGO Profile Analysis Report
|
||||
|
||||
## 1. Profiling Environment
|
||||
|
||||
| Item | Value |
|
||||
|------|-------|
|
||||
| Compiler | Intel oneAPI DPC++/C++ 2025.3.0 (icpx/ifx) |
|
||||
| Instrumentation Flag | `-fprofile-instr-generate` |
|
||||
| Optimization Level (instrumented) | `-O2 -xHost -fma` |
|
||||
| MPI Processes | 1 (single process to avoid MPI+instrumentation deadlock) |
|
||||
| Profile File | `default_9725750769337483397_0.profraw` (327 KB) |
|
||||
| Merged Profile | `default.profdata` (394 KB) |
|
||||
| llvm-profdata | `/home/intel/oneapi/compiler/2025.3/bin/compiler/llvm-profdata` |
|
||||
|
||||
## 2. Reduced Simulation Parameters (for profiling run)
|
||||
|
||||
| Parameter | Production Value | Profiling Value |
|
||||
|-----------|-----------------|-----------------|
|
||||
| MPI_processes | 64 | 1 |
|
||||
| grid_level | 9 | 4 |
|
||||
| static_grid_level | 5 | 3 |
|
||||
| static_grid_number | 96 | 24 |
|
||||
| moving_grid_number | 48 | 16 |
|
||||
| largest_box_xyz_max | 320^3 | 160^3 |
|
||||
| Final_Evolution_Time | 1000.0 | 10.0 |
|
||||
| Evolution_Step_Number | 10,000,000 | 1,000 |
|
||||
| Detector_Number | 12 | 2 |
|
||||
|
||||
## 3. Profile Summary
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Total instrumented functions | 1,392 |
|
||||
| Functions with non-zero counts | 117 (8.4%) |
|
||||
| Functions with zero counts | 1,275 (91.6%) |
|
||||
| Maximum function entry count | 386,459,248 |
|
||||
| Maximum internal block count | 370,477,680 |
|
||||
| Total block count | 4,198,023,118 |
|
||||
|
||||
## 4. Top 20 Hotspot Functions
|
||||
|
||||
| Rank | Total Count | Max Block Count | Function | Category |
|
||||
|------|------------|-----------------|----------|----------|
|
||||
| 1 | 1,241,601,732 | 370,477,680 | `polint_` | Interpolation |
|
||||
| 2 | 755,994,435 | 230,156,640 | `prolong3_` | Grid prolongation |
|
||||
| 3 | 667,964,095 | 3,697,792 | `compute_rhs_bssn_` | BSSN RHS evolution |
|
||||
| 4 | 539,736,051 | 386,459,248 | `symmetry_bd_` | Symmetry boundary |
|
||||
| 5 | 277,310,808 | 53,170,728 | `lopsided_` | Lopsided FD stencil |
|
||||
| 6 | 155,534,488 | 94,535,040 | `decide3d_` | 3D grid decision |
|
||||
| 7 | 119,267,712 | 19,266,048 | `rungekutta4_rout_` | RK4 time integrator |
|
||||
| 8 | 91,574,616 | 48,824,160 | `kodis_` | Kreiss-Oliger dissipation |
|
||||
| 9 | 67,555,389 | 43,243,680 | `fderivs_` | Finite differences |
|
||||
| 10 | 55,296,000 | 42,246,144 | `misc::fact(int)` | Factorial utility |
|
||||
| 11 | 43,191,071 | 27,663,328 | `fdderivs_` | 2nd-order FD derivatives |
|
||||
| 12 | 36,233,965 | 22,429,440 | `restrict3_` | Grid restriction |
|
||||
| 13 | 24,698,512 | 17,231,520 | `polin3_` | Polynomial interpolation |
|
||||
| 14 | 22,962,942 | 20,968,768 | `copy_` | Data copy |
|
||||
| 15 | 20,135,696 | 17,259,168 | `Ansorg::barycentric(...)` | Spectral interpolation |
|
||||
| 16 | 14,650,224 | 7,224,768 | `Ansorg::barycentric_omega(...)` | Spectral weights |
|
||||
| 17 | 13,242,296 | 2,871,920 | `global_interp_` | Global interpolation |
|
||||
| 18 | 12,672,000 | 7,734,528 | `sommerfeld_rout_` | Sommerfeld boundary |
|
||||
| 19 | 6,872,832 | 1,880,064 | `sommerfeld_routbam_` | Sommerfeld boundary (BAM) |
|
||||
| 20 | 5,709,900 | 2,809,632 | `l2normhelper_` | L2 norm computation |
|
||||
|
||||
## 5. Hotspot Category Breakdown
|
||||
|
||||
Top 20 functions account for ~98% of total execution counts:
|
||||
|
||||
| Category | Functions | Combined Count | Share |
|
||||
|----------|-----------|---------------|-------|
|
||||
| Interpolation / Prolongation / Restriction | polint_, prolong3_, restrict3_, polin3_, global_interp_, Ansorg::* | ~2,093M | ~50% |
|
||||
| BSSN RHS + FD stencils | compute_rhs_bssn_, lopsided_, fderivs_, fdderivs_ | ~1,056M | ~25% |
|
||||
| Boundary conditions | symmetry_bd_, sommerfeld_rout_, sommerfeld_routbam_ | ~559M | ~13% |
|
||||
| Time integration | rungekutta4_rout_ | ~119M | ~3% |
|
||||
| Dissipation | kodis_ | ~92M | ~2% |
|
||||
| Utilities | misc::fact, decide3d_, copy_, l2normhelper_ | ~256M | ~6% |
|
||||
|
||||
## 6. Conclusions
|
||||
|
||||
1. **Profile data is valid**: 1,392 functions instrumented, 117 exercised with ~4.2 billion total counts.
|
||||
2. **Hotspot concentration is high**: Top 5 functions alone account for ~76% of all counts, which is ideal for PGO — the compiler has strong branch/layout optimization targets.
|
||||
3. **Fortran numerical kernels dominate**: `polint_`, `prolong3_`, `compute_rhs_bssn_`, `symmetry_bd_`, `lopsided_` are all Fortran routines in the inner evolution loop. PGO will optimize their branch prediction and basic block layout.
|
||||
4. **91.6% of functions have zero counts**: These are code paths for unused features (GPU, BSSN-EScalar, BSSN-EM, Z4C, etc.). PGO will deprioritize them, improving instruction cache utilization.
|
||||
5. **Profile is representative**: Despite the reduced grid size, the code path coverage matches production — the same kernels (RHS, prolongation, restriction, boundary) are exercised. PGO branch probabilities from this profile will transfer well to full-scale runs.
|
||||
|
||||
## 7. PGO Phase 2 Usage
|
||||
|
||||
To apply the profile, use the following flags in `makefile.inc`:
|
||||
|
||||
```makefile
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-fprofile-instr-use=/home/amss/AMSS-NCKU/pgo_profile/default.profdata \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-fprofile-instr-use=/home/amss/AMSS-NCKU/pgo_profile/default.profdata \
|
||||
-align array64byte -fpp -I${MKLROOT}/include
|
||||
```
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user