Compare commits

..

1 Commits

Author SHA1 Message Date
wingrew
19b0e79692 黄老板逆天重写 2026-03-01 05:48:40 +08:00
259 changed files with 275145 additions and 263563 deletions

6
.idea/vcs.xml generated
View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

4877
2.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@ import numpy
File_directory = "GW150914" ## output file directory File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long ## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation MPI_processes = 2 ## number of mpi processes used in the simulation
GPU_Calculation = "no" ## Use GPU or not GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
@@ -50,7 +50,7 @@ Check_Time = 100.0
Dump_Time = 100.0 ## time inteval dT for dumping binary data Dump_Time = 100.0 ## time inteval dT for dumping binary data
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT' D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT" Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number Evolution_Step_Number = 6 ## stop the calculation after the maximal step number
Courant_Factor = 0.5 ## Courant Factor Courant_Factor = 0.5 ## Courant Factor
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength

View File

@@ -8,14 +8,6 @@
## ##
################################################################## ##################################################################
## Guard against re-execution by multiprocessing child processes.
## Without this, using 'spawn' or 'forkserver' context would cause every
## worker to re-run the entire script, spawning exponentially more
## workers (fork bomb).
if __name__ != '__main__':
import sys as _sys
_sys.exit(0)
################################################################## ##################################################################
@@ -57,32 +49,32 @@ import time
File_directory = os.path.join(input_data.File_directory) File_directory = os.path.join(input_data.File_directory)
## If the specified output directory exists, ask the user whether to continue ## If the specified output directory exists, ask the user whether to continue
if os.path.exists(File_directory): # if os.path.exists(File_directory):
print( " Output dictionary has been existed !!! " ) # print( " Output dictionary has been existed !!! " )
print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " ) # print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " )
print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " ) # print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " )
print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " ) # print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " )
print( ) # print( )
## Prompt whether to overwrite the existing directory # ## Prompt whether to overwrite the existing directory
while True: # while True:
try: # try:
inputvalue = input() # inputvalue = input()
## If the user agrees to overwrite, proceed and remove the existing directory # ## If the user agrees to overwrite, proceed and remove the existing directory
if ( inputvalue == "continue" ): # if ( inputvalue == "continue" ):
print( " Continue the calculation !!! " ) # print( " Continue the calculation !!! " )
print( ) # print( )
break # break
## If the user chooses not to overwrite, exit and keep the existing directory # ## If the user chooses not to overwrite, exit and keep the existing directory
elif ( inputvalue == "stop" ): # elif ( inputvalue == "stop" ):
print( " Stop the calculation !!! " ) # print( " Stop the calculation !!! " )
sys.exit() # sys.exit()
## If the user input is invalid, prompt again # ## If the user input is invalid, prompt again
else: # else:
print( " Please input your choice !!! " ) # print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " ) # print( " Input 'continue' or 'stop' in the terminal !!! " )
except ValueError: # except ValueError:
print( " Please input your choice !!! " ) # print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " ) # print( " Input 'continue' or 'stop' in the terminal !!! " )
## Remove the existing output directory if present ## Remove the existing output directory if present
shutil.rmtree(File_directory, ignore_errors=True) shutil.rmtree(File_directory, ignore_errors=True)
@@ -126,6 +118,11 @@ setup.generate_AMSSNCKU_input()
#inputvalue = input() ## Wait for user input (press Enter) to proceed #inputvalue = input() ## Wait for user input (press Enter) to proceed
#print() #print()
setup.print_puncture_information()
##################################################################
## Generate AMSS-NCKU program input files based on the configured parameters ## Generate AMSS-NCKU program input files based on the configured parameters
print( ) print( )
@@ -265,12 +262,6 @@ if not os.path.exists( ABE_file ):
## Copy the executable ABE (or ABEGPU) into the run directory ## Copy the executable ABE (or ABEGPU) into the run directory
shutil.copy2(ABE_file, output_directory) shutil.copy2(ABE_file, output_directory)
## Copy interp load balance profile if present (for optimize pass)
interp_lb_profile = os.path.join(AMSS_NCKU_source_copy, "interp_lb_profile.bin")
if os.path.exists(interp_lb_profile):
shutil.copy2(interp_lb_profile, output_directory)
print( " Copied interp_lb_profile.bin to run directory " )
########################### ###########################
## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory ## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
@@ -307,7 +298,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
import generate_TwoPuncture_input import generate_TwoPuncture_input
generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input(numerical_grid.puncture_data) generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input()
print( ) print( )
print( " The input parfile for the TwoPunctureABE executable has been generated. " ) print( " The input parfile for the TwoPunctureABE executable has been generated. " )
@@ -349,7 +340,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
import renew_puncture_parameter import renew_puncture_parameter
renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory, numerical_grid.puncture_data) renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory)
## Generated AMSS-NCKU input filename ## Generated AMSS-NCKU input filename
@@ -433,31 +424,26 @@ print(
import plot_xiaoqu import plot_xiaoqu
import plot_GW_strain_amplitude_xiaoqu import plot_GW_strain_amplitude_xiaoqu
from parallel_plot_helper import run_plot_tasks_parallel
plot_tasks = []
## Plot black hole trajectory ## Plot black hole trajectory
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory) ) ) plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory )
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) ) plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
## Plot black hole separation vs. time ## Plot black hole separation vs. time
plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) ) plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
## Plot gravitational waveforms (psi4 and strain amplitude) ## Plot gravitational waveforms (psi4 and strain amplitude)
for i in range(input_data.Detector_Number): for i in range(input_data.Detector_Number):
plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) ) plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) ) plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
## Plot ADM mass evolution ## Plot ADM mass evolution
for i in range(input_data.Detector_Number): for i in range(input_data.Detector_Number):
plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) ) plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
## Plot Hamiltonian constraint violation over time ## Plot Hamiltonian constraint violation over time
for i in range(input_data.grid_level): for i in range(input_data.grid_level):
plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) ) plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
run_plot_tasks_parallel(plot_tasks)
## Plot stored binary data ## Plot stored binary data
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory ) plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )

View File

@@ -1,19 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
AMSS-NCKU GW150914 Simulation Regression Test Script (Comprehensive Version) AMSS-NCKU GW150914 Simulation Regression Test Script
Verification Requirements: Verification Requirements:
1. RMS errors < 1% for: 1. XY-plane trajectory RMS error < 1% (Optimized vs. baseline, max of BH1 and BH2)
- 3D Vector Total RMS
- X Component RMS
- Y Component RMS
- Z Component RMS
2. ADM constraint violation < 2 (Grid Level 0) 2. ADM constraint violation < 2 (Grid Level 0)
3. The following figure PDFs must match GW150914-origin exactly after rasterization:
- ADM_Constraint_Grid_Level_0.pdf
- BH_Trajectory_21_XY.pdf
- BH_Trajectory_XY.pdf
The script also reports the percentage of differing pixels for each figure.
RMS Calculation Method: RMS Calculation Method:
- Computes trajectory deviation on the XY plane independently for BH1 and BH2 - Computes trajectory deviation on the XY plane independently for BH1 and BH2
@@ -28,10 +19,6 @@ Reference: GW150914-origin (baseline simulation)
import numpy as np import numpy as np
import sys import sys
import os import os
import shutil
import subprocess
import tempfile
from PIL import Image
# ANSI Color Codes # ANSI Color Codes
class Color: class Color:
@@ -71,187 +58,78 @@ def load_constraint_data(filepath):
return np.array(data) return np.array(data)
def resolve_figure_dir(path): def calculate_rms_error(bh_data_ref, bh_data_target):
"""Resolve the sibling figure directory from an output or figure path."""
normalized = os.path.normpath(path)
if os.path.basename(normalized) == "figure":
return normalized
return os.path.join(os.path.dirname(normalized), "figure")
def render_pdf_to_images(pdf_path, dpi=150):
"""Render a PDF to RGB images using Ghostscript."""
gs_path = shutil.which("gs")
if gs_path is None:
raise RuntimeError("Ghostscript executable 'gs' was not found in PATH")
with tempfile.TemporaryDirectory(prefix="amss_verify_pdf_") as temp_dir:
output_pattern = os.path.join(temp_dir, "page-%03d.ppm")
cmd = [
gs_path,
"-q",
"-dSAFER",
"-dBATCH",
"-dNOPAUSE",
"-sDEVICE=ppmraw",
f"-r{dpi}",
f"-o{output_pattern}",
pdf_path
]
try:
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
except subprocess.CalledProcessError as exc:
message = exc.stderr.strip() or str(exc)
raise RuntimeError(f"Failed to render PDF '{pdf_path}': {message}") from exc
ppm_files = sorted(
os.path.join(temp_dir, filename)
for filename in os.listdir(temp_dir)
if filename.endswith(".ppm")
)
if not ppm_files:
raise RuntimeError(f"No rendered pages were produced for '{pdf_path}'")
images = []
for ppm_file in ppm_files:
with Image.open(ppm_file) as img:
images.append(np.array(img.convert("RGB"), dtype=np.uint8))
return images
def compare_rendered_pages(ref_img, target_img):
"""Return (different_pixels, total_pixels) for two rendered RGB pages."""
ref_h, ref_w = ref_img.shape[:2]
tgt_h, tgt_w = target_img.shape[:2]
total_pixels = max(ref_h, tgt_h) * max(ref_w, tgt_w)
if ref_h == tgt_h and ref_w == tgt_w:
different_pixels = int(np.count_nonzero(np.any(ref_img != target_img, axis=2)))
return different_pixels, total_pixels
diff_mask = np.ones((max(ref_h, tgt_h), max(ref_w, tgt_w)), dtype=bool)
overlap_h = min(ref_h, tgt_h)
overlap_w = min(ref_w, tgt_w)
overlap_diff = np.any(ref_img[:overlap_h, :overlap_w] != target_img[:overlap_h, :overlap_w], axis=2)
diff_mask[:overlap_h, :overlap_w] = overlap_diff
different_pixels = int(np.count_nonzero(diff_mask))
return different_pixels, total_pixels
def compare_pdf_images(ref_pdf, target_pdf, dpi=150, threshold_percent=0.001):
"""Compare two PDFs by rasterizing them and counting differing pixels."""
ref_pages = render_pdf_to_images(ref_pdf, dpi=dpi)
target_pages = render_pdf_to_images(target_pdf, dpi=dpi)
total_pixels = 0
different_pixels = 0
max_pages = max(len(ref_pages), len(target_pages))
for page_idx in range(max_pages):
if page_idx < len(ref_pages) and page_idx < len(target_pages):
page_diff, page_total = compare_rendered_pages(ref_pages[page_idx], target_pages[page_idx])
else:
existing_page = ref_pages[page_idx] if page_idx < len(ref_pages) else target_pages[page_idx]
page_total = existing_page.shape[0] * existing_page.shape[1]
page_diff = page_total
total_pixels += page_total
different_pixels += page_diff
diff_percent = (different_pixels / total_pixels * 100.0) if total_pixels else 0.0
return {
"different_pixels": different_pixels,
"total_pixels": total_pixels,
"diff_percent": diff_percent,
"pages_ref": len(ref_pages),
"pages_target": len(target_pages),
"passed": diff_percent < threshold_percent
}
def compare_required_figures(reference_figure_dir, target_figure_dir):
"""Compare the required GW150914 figure PDFs."""
figure_names = [
"ADM_Constraint_Grid_Level_0.pdf",
"BH_Trajectory_21_XY.pdf",
"BH_Trajectory_XY.pdf"
]
results = []
for figure_name in figure_names:
ref_pdf = os.path.join(reference_figure_dir, figure_name)
target_pdf = os.path.join(target_figure_dir, figure_name)
if not os.path.exists(ref_pdf):
raise FileNotFoundError(f"Reference figure not found: {ref_pdf}")
if not os.path.exists(target_pdf):
raise FileNotFoundError(f"Target figure not found: {target_pdf}")
comparison = compare_pdf_images(ref_pdf, target_pdf)
comparison["name"] = figure_name
results.append(comparison)
return results
def calculate_all_rms_errors(bh_data_ref, bh_data_target):
""" """
Calculate 3D Vector RMS and component-wise RMS (X, Y, Z) independently. Calculate trajectory-based RMS error on the XY plane between baseline and optimized simulations.
Uses r = sqrt(x^2 + y^2) as the denominator for all error normalizations.
Returns the maximum error between BH1 and BH2 for each category. This function computes the RMS error independently for BH1 and BH2 trajectories,
then returns the maximum of the two as the final RMS error metric.
For each black hole, the RMS is calculated as:
RMS = sqrt( (1/M) * sum( (Δr_i / r_i^max)^2 ) ) × 100%
where:
Δr_i = sqrt((x_ref,i - x_new,i)^2 + (y_ref,i - y_new,i)^2)
r_i^max = max(sqrt(x_ref,i^2 + y_ref,i^2), sqrt(x_new,i^2 + y_new,i^2))
Args:
bh_data_ref: Reference (baseline) trajectory data
bh_data_target: Target (optimized) trajectory data
Returns:
rms_value: Final RMS error as a percentage (max of BH1 and BH2)
error: Error message if any
""" """
# Align data: truncate to the length of the shorter dataset
M = min(len(bh_data_ref['time']), len(bh_data_target['time'])) M = min(len(bh_data_ref['time']), len(bh_data_target['time']))
if M < 10: if M < 10:
return None, "Insufficient data points for comparison" return None, "Insufficient data points for comparison"
results = {} # Extract XY coordinates for both black holes
x1_ref = bh_data_ref['x1'][:M]
y1_ref = bh_data_ref['y1'][:M]
x2_ref = bh_data_ref['x2'][:M]
y2_ref = bh_data_ref['y2'][:M]
for bh in ['1', '2']: x1_new = bh_data_target['x1'][:M]
x_r, y_r, z_r = bh_data_ref[f'x{bh}'][:M], bh_data_ref[f'y{bh}'][:M], bh_data_ref[f'z{bh}'][:M] y1_new = bh_data_target['y1'][:M]
x_n, y_n, z_n = bh_data_target[f'x{bh}'][:M], bh_data_target[f'y{bh}'][:M], bh_data_target[f'z{bh}'][:M] x2_new = bh_data_target['x2'][:M]
y2_new = bh_data_target['y2'][:M]
# 核心修改:根据组委会的邮件指示,分母统一使用 r = sqrt(x^2 + y^2) # Calculate RMS for BH1
r_ref = np.sqrt(x_r**2 + y_r**2) delta_r1 = np.sqrt((x1_ref - x1_new)**2 + (y1_ref - y1_new)**2)
r_new = np.sqrt(x_n**2 + y_n**2) r1_ref = np.sqrt(x1_ref**2 + y1_ref**2)
denom_max = np.maximum(r_ref, r_new) r1_new = np.sqrt(x1_new**2 + y1_new**2)
r1_max = np.maximum(r1_ref, r1_new)
valid = denom_max > 1e-15 # Calculate RMS for BH2
if np.sum(valid) < 10: delta_r2 = np.sqrt((x2_ref - x2_new)**2 + (y2_ref - y2_new)**2)
results[f'BH{bh}'] = { '3D_Vector': 0.0, 'X_Component': 0.0, 'Y_Component': 0.0, 'Z_Component': 0.0 } r2_ref = np.sqrt(x2_ref**2 + y2_ref**2)
continue r2_new = np.sqrt(x2_new**2 + y2_new**2)
r2_max = np.maximum(r2_ref, r2_new)
def calc_rms(delta): # Avoid division by zero for BH1
# 将对应分量的偏差除以统一的轨道半径分母 denom_max valid_mask1 = r1_max > 1e-15
return np.sqrt(np.mean((delta[valid] / denom_max[valid])**2)) * 100 if np.sum(valid_mask1) < 10:
return None, "Insufficient valid data points for BH1"
# 1. Total 3D Vector RMS terms1 = (delta_r1[valid_mask1] / r1_max[valid_mask1])**2
delta_vec = np.sqrt((x_r - x_n)**2 + (y_r - y_n)**2 + (z_r - z_n)**2) rms_bh1 = np.sqrt(np.mean(terms1)) * 100
rms_3d = calc_rms(delta_vec)
# 2. Component-wise RMS (分离计算各轴,但共用半径分母) # Avoid division by zero for BH2
rms_x = calc_rms(np.abs(x_r - x_n)) valid_mask2 = r2_max > 1e-15
rms_y = calc_rms(np.abs(y_r - y_n)) if np.sum(valid_mask2) < 10:
rms_z = calc_rms(np.abs(z_r - z_n)) return None, "Insufficient valid data points for BH2"
results[f'BH{bh}'] = { terms2 = (delta_r2[valid_mask2] / r2_max[valid_mask2])**2
'3D_Vector': rms_3d, rms_bh2 = np.sqrt(np.mean(terms2)) * 100
'X_Component': rms_x,
'Y_Component': rms_y,
'Z_Component': rms_z
}
# 获取 BH1 BH2 中的最大误差 # Final RMS is the maximum of BH1 and BH2
max_rms = { rms_final = max(rms_bh1, rms_bh2)
'3D_Vector': max(results['BH1']['3D_Vector'], results['BH2']['3D_Vector']),
'X_Component': max(results['BH1']['X_Component'], results['BH2']['X_Component']), return rms_final, None
'Y_Component': max(results['BH1']['Y_Component'], results['BH2']['Y_Component']),
'Z_Component': max(results['BH1']['Z_Component'], results['BH2']['Z_Component'])
}
return max_rms, None
def analyze_constraint_violation(constraint_data, n_levels=9): def analyze_constraint_violation(constraint_data, n_levels=9):
""" """
@@ -277,32 +155,34 @@ def analyze_constraint_violation(constraint_data, n_levels=9):
def print_header(): def print_header():
"""Print report header"""
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
print(Color.BOLD + " AMSS-NCKU GW150914 Comprehensive Regression Test" + Color.RESET) print(Color.BOLD + " AMSS-NCKU GW150914 Simulation Regression Test Report" + Color.RESET)
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
def print_rms_results(rms_dict, error, threshold=1.0):
print(f"\n{Color.BOLD}1. RMS Error Analysis (Maximums of BH1 & BH2){Color.RESET}") def print_rms_results(rms_rel, error, threshold=1.0):
print("-" * 65) """Print RMS error results"""
print(f"\n{Color.BOLD}1. RMS Error Analysis (Baseline vs Optimized){Color.RESET}")
print("-" * 45)
if error: if error:
print(f" {Color.RED}Error: {error}{Color.RESET}") print(f" {Color.RED}Error: {error}{Color.RESET}")
return False return False
all_passed = True passed = rms_rel < threshold
print(f" Requirement: < {threshold}%\n")
for key, val in rms_dict.items(): print(f" RMS relative error: {rms_rel:.4f}%")
passed = val < threshold print(f" Requirement: < {threshold}%")
all_passed = all_passed and passed print(f" Status: {get_status_text(passed)}")
status = get_status_text(passed)
print(f" {key:15}: {val:8.4f}% | Status: {status}") return passed
return all_passed
def print_constraint_results(results, threshold=2.0): def print_constraint_results(results, threshold=2.0):
"""Print constraint violation results"""
print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}") print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}")
print("-" * 65) print("-" * 45)
names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz'] names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz']
for i, name in enumerate(names): for i, name in enumerate(names):
@@ -319,45 +199,19 @@ def print_constraint_results(results, threshold=2.0):
return passed return passed
def print_figure_results(results, threshold_percent=0.001): def print_summary(rms_passed, constraint_passed):
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}") """Print summary"""
print("-" * 65)
print(f" Requirement: < {threshold_percent:.3f}% differing pixels\n")
all_passed = True
for result in results:
passed = result["passed"]
all_passed = all_passed and passed
status = get_status_text(passed)
print(f" {result['name']:32}: {result['diff_percent']:10.6f}% | Status: {status}")
if result["pages_ref"] != result["pages_target"]:
print(f" {'':32} pages(ref/target): {result['pages_ref']}/{result['pages_target']}")
return all_passed
def print_figure_error(error_message):
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
print("-" * 65)
print(f" {Color.RED}Error: {error_message}{Color.RESET}")
return False
def print_summary(rms_passed, constraint_passed, figure_passed):
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
print(Color.BOLD + "Verification Summary" + Color.RESET) print(Color.BOLD + "Verification Summary" + Color.RESET)
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET) print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
all_passed = rms_passed and constraint_passed and figure_passed all_passed = rms_passed and constraint_passed
res_rms = get_status_text(rms_passed) res_rms = get_status_text(rms_passed)
res_con = get_status_text(constraint_passed) res_con = get_status_text(constraint_passed)
res_fig = get_status_text(figure_passed)
print(f" [1] Comprehensive RMS check: {res_rms}") print(f" [1] RMS trajectory check: {res_rms}")
print(f" [2] ADM constraint check: {res_con}") print(f" [2] ADM constraint check: {res_con}")
print(f" [3] Figure pixel comparison: {res_fig}")
final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}" final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
print(f"\n Overall result: {final_status}") print(f"\n Overall result: {final_status}")
@@ -365,58 +219,61 @@ def print_summary(rms_passed, constraint_passed, figure_passed):
return all_passed return all_passed
def main(): def main():
# Determine target (optimized) output directory
if len(sys.argv) > 1: if len(sys.argv) > 1:
target_dir = sys.argv[1] target_dir = sys.argv[1]
else: else:
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output") target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")
# Determine reference (baseline) directory
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output") reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
target_figure_dir = resolve_figure_dir(target_dir)
reference_figure_dir = os.path.join(script_dir, "GW150914-origin/figure")
# Data file paths
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat") bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
bh_file_target = os.path.join(target_dir, "bssn_BH.dat") bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
constraint_file = os.path.join(target_dir, "bssn_constraint.dat") constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
# Check if files exist
if not os.path.exists(bh_file_ref): if not os.path.exists(bh_file_ref):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}") print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}")
sys.exit(1) sys.exit(1)
if not os.path.exists(bh_file_target): if not os.path.exists(bh_file_target):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}") print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}")
sys.exit(1) sys.exit(1)
if not os.path.exists(constraint_file): if not os.path.exists(constraint_file):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}") print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}")
sys.exit(1) sys.exit(1)
# Print header
print_header() print_header()
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}") print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}") print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
print(f"{Color.BOLD}Reference Figures: {Color.RESET} {Color.BLUE}{reference_figure_dir}{Color.RESET}")
print(f"{Color.BOLD}Target Figures: {Color.RESET} {Color.BLUE}{target_figure_dir}{Color.RESET}")
# Load data
bh_data_ref = load_bh_trajectory(bh_file_ref) bh_data_ref = load_bh_trajectory(bh_file_ref)
bh_data_target = load_bh_trajectory(bh_file_target) bh_data_target = load_bh_trajectory(bh_file_target)
constraint_data = load_constraint_data(constraint_file) constraint_data = load_constraint_data(constraint_file)
# Output modified RMS results # Calculate RMS error
rms_dict, error = calculate_all_rms_errors(bh_data_ref, bh_data_target) rms_rel, error = calculate_rms_error(bh_data_ref, bh_data_target)
rms_passed = print_rms_results(rms_dict, error) rms_passed = print_rms_results(rms_rel, error)
# Output constraint results # Analyze constraint violation
constraint_results = analyze_constraint_violation(constraint_data) constraint_results = analyze_constraint_violation(constraint_data)
constraint_passed = print_constraint_results(constraint_results) constraint_passed = print_constraint_results(constraint_results)
try: # Print summary
figure_results = compare_required_figures(reference_figure_dir, target_figure_dir) all_passed = print_summary(rms_passed, constraint_passed)
figure_passed = print_figure_results(figure_results)
except (FileNotFoundError, RuntimeError) as exc:
figure_passed = print_figure_error(str(exc))
all_passed = print_summary(rms_passed, constraint_passed, figure_passed) # Return exit code
sys.exit(0 if all_passed else 1) sys.exit(0 if all_passed else 1)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -24,7 +24,7 @@ using namespace std;
#include "misc.h" #include "misc.h"
#include "macrodef.h" #include "macrodef.h"
#include <omp.h>
#ifndef ABEtype #ifndef ABEtype
#error "not define ABEtype" #error "not define ABEtype"
#endif #endif
@@ -71,6 +71,7 @@ int main(int argc, char *argv[])
if (myrank == 0) if (myrank == 0)
{ {
Begin_clock = MPI_Wtime(); Begin_clock = MPI_Wtime();
} }
if (argc > 1) if (argc > 1)

File diff suppressed because it is too large Load Diff

View File

@@ -7,179 +7,13 @@
#include <string> #include <string>
#include <cmath> #include <cmath>
#include <new> #include <new>
#include <vector>
using namespace std; using namespace std;
#include "misc.h" #include "misc.h"
#include "MPatch.h" #include "MPatch.h"
#include "Parallel.h" #include "Parallel.h"
#include "fmisc.h" #include "fmisc.h"
#ifdef INTERP_LB_PROFILE #include "xh_global_interp.h"
#include "interp_lb_profile.h"
#endif
namespace
{
struct InterpBlockView
{
Block *bp;
double llb[dim];
double uub[dim];
};
struct BlockBinIndex
{
int bins[dim];
double lo[dim];
double inv[dim];
vector<InterpBlockView> views;
vector<vector<int>> bin_to_blocks;
bool valid;
BlockBinIndex() : valid(false)
{
for (int i = 0; i < dim; i++)
{
bins[i] = 1;
lo[i] = 0.0;
inv[i] = 0.0;
}
}
};
inline int clamp_int(int v, int lo, int hi)
{
return (v < lo) ? lo : ((v > hi) ? hi : v);
}
inline int coord_to_bin(double x, double lo, double inv, int nb)
{
if (nb <= 1 || inv <= 0.0)
return 0;
int b = int(floor((x - lo) * inv));
return clamp_int(b, 0, nb - 1);
}
inline int bin_loc(const BlockBinIndex &index, int b0, int b1, int b2)
{
return b0 + index.bins[0] * (b1 + index.bins[1] * b2);
}
inline bool point_in_block_view(const InterpBlockView &view, const double *pox, const double *DH)
{
for (int i = 0; i < dim; i++)
{
if (pox[i] - view.llb[i] < -DH[i] / 2 || pox[i] - view.uub[i] > DH[i] / 2)
return false;
}
return true;
}
void build_block_bin_index(Patch *patch, const double *DH, BlockBinIndex &index)
{
index = BlockBinIndex();
MyList<Block> *Bp = patch->blb;
while (Bp)
{
Block *BP = Bp->data;
InterpBlockView view;
view.bp = BP;
for (int i = 0; i < dim; i++)
{
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
}
index.views.push_back(view);
if (Bp == patch->ble)
break;
Bp = Bp->next;
}
const int nblocks = int(index.views.size());
if (nblocks <= 0)
return;
int bins_1d = int(ceil(pow(double(nblocks), 1.0 / 3.0)));
bins_1d = clamp_int(bins_1d, 1, 32);
for (int i = 0; i < dim; i++)
{
index.bins[i] = bins_1d;
index.lo[i] = patch->bbox[i] + patch->lli[i] * DH[i];
const double hi = patch->bbox[dim + i] - patch->uui[i] * DH[i];
if (hi > index.lo[i] && bins_1d > 1)
index.inv[i] = bins_1d / (hi - index.lo[i]);
else
index.inv[i] = 0.0;
}
index.bin_to_blocks.resize(index.bins[0] * index.bins[1] * index.bins[2]);
for (int bi = 0; bi < nblocks; bi++)
{
const InterpBlockView &view = index.views[bi];
int bmin[dim], bmax[dim];
for (int d = 0; d < dim; d++)
{
const double low = view.llb[d] - DH[d] / 2;
const double up = view.uub[d] + DH[d] / 2;
bmin[d] = coord_to_bin(low, index.lo[d], index.inv[d], index.bins[d]);
bmax[d] = coord_to_bin(up, index.lo[d], index.inv[d], index.bins[d]);
if (bmax[d] < bmin[d])
{
int t = bmin[d];
bmin[d] = bmax[d];
bmax[d] = t;
}
}
for (int bz = bmin[2]; bz <= bmax[2]; bz++)
for (int by = bmin[1]; by <= bmax[1]; by++)
for (int bx = bmin[0]; bx <= bmax[0]; bx++)
index.bin_to_blocks[bin_loc(index, bx, by, bz)].push_back(bi);
}
index.valid = true;
}
int find_block_index_for_point(const BlockBinIndex &index, const double *pox, const double *DH)
{
if (!index.valid)
return -1;
const int bx = coord_to_bin(pox[0], index.lo[0], index.inv[0], index.bins[0]);
const int by = coord_to_bin(pox[1], index.lo[1], index.inv[1], index.bins[1]);
const int bz = coord_to_bin(pox[2], index.lo[2], index.inv[2], index.bins[2]);
const vector<int> &cand = index.bin_to_blocks[bin_loc(index, bx, by, bz)];
for (size_t ci = 0; ci < cand.size(); ci++)
{
const int bi = cand[ci];
if (point_in_block_view(index.views[bi], pox, DH))
return bi;
}
// Fallback to full scan for numerical edge cases around bin boundaries.
for (size_t bi = 0; bi < index.views.size(); bi++)
if (point_in_block_view(index.views[bi], pox, DH))
return int(bi);
return -1;
}
} // namespace
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi) Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
{ {
@@ -530,11 +364,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
owner_rank[j] = -1; owner_rank[j] = -1;
double DH[dim]; double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points for (int j = 0; j < NN; j++) // run along points
{ {
@@ -557,25 +389,60 @@ void Patch::Interp_Points(MyList<var> *VarList,
} }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp) // run along Blocks
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank; bool flag = true;
if (myrank == BP->rank) for (int i = 0; i < dim; i++)
{ {
//---> interpolation #ifdef Vertex
varl = VarList; #ifdef Cell
int k = 0; #error Both Cell and Vertex are defined
while (varl) // run along variables #endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
//---> interpolation
varl = VarList;
int k = 0;
while (varl) // run along variables
{
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }
// Replace MPI_Allreduce with per-owner MPI_Bcast: // Replace MPI_Allreduce with per-owner MPI_Bcast:
@@ -642,13 +509,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
// Targeted point-to-point overload: each owner sends each point only to // Targeted point-to-point overload: each owner sends each point only to
// the one rank that needs it for integration (consumer), reducing // the one rank that needs it for integration (consumer), reducing
// communication volume by ~nprocs times compared to the Bcast version. // communication volume by ~nprocs times compared to the Bcast version.
#ifdef INTERP_LB_PROFILE
double t_interp_start = MPI_Wtime();
#endif
int myrank, nprocs; int myrank, nprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
// printf("here----\n");
// int zzz = 0;
int ordn = 2 * ghost_width; int ordn = 2 * ghost_width;
MyList<var> *varl; MyList<var> *varl;
int num_var = 0; int num_var = 0;
@@ -670,56 +535,90 @@ void Patch::Interp_Points(MyList<var> *VarList,
double DH[dim]; double DH[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
// --- Interpolation phase (identical to original) --- // --- Interpolation phase (identical to original) ---
// printf("NN: %d, num_var = %d\n", NN, num_var);
#pragma omp parallel
{
#pragma omp for
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
{ {
double pox[dim]; double pox[dim], llb[dim], uub[dim];
MyList<var> *varl1;
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
{ {
pox[i] = XX[i][j]; pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) // if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{ // {
cout << "Patch::Interp_Points: point ("; // cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++) // for (int k = 0; k < dim; k++)
{ // {
cout << XX[k][j]; // cout << XX[k][j];
if (k < dim - 1) // if (k < dim - 1)
cout << ","; // cout << ",";
else // else
cout << ") is out of current Patch." << endl; // cout << ") is out of current Patch." << endl;
} // }
MPI_Abort(MPI_COMM_WORLD, 1); // MPI_Abort(MPI_COMM_WORLD, 1);
} // }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp)
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank;
if (myrank == BP->rank) bool flag = true;
for (int i = 0; i < dim; i++)
{ {
varl = VarList; #ifdef Vertex
int k = 0; #ifdef Cell
while (varl) #error Both Cell and Vertex are defined
#endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
// printf("flag = %d\n", flag);
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
varl1 = VarList;
int k = 0;
while (varl1)
{
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl1->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl1->data->SoA, Symmetry);
varl1 = varl1->next;
k++;
// zzz += 1;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }
}
#ifdef INTERP_LB_PROFILE // printf("Interpolation done, zzz = %d\n", zzz);
double t_interp_end = MPI_Wtime();
double t_interp_local = t_interp_end - t_interp_start;
#endif
// --- Error check for unfound points --- // --- Error check for unfound points ---
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
{ {
@@ -876,31 +775,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
delete[] recv_count; delete[] recv_count;
delete[] consumer_rank; delete[] consumer_rank;
delete[] owner_rank; delete[] owner_rank;
#ifdef INTERP_LB_PROFILE
{
static bool profile_written = false;
if (!profile_written) {
double *all_times = nullptr;
if (myrank == 0) all_times = new double[nprocs];
MPI_Gather(&t_interp_local, 1, MPI_DOUBLE,
all_times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if (myrank == 0) {
int heavy[64];
int nh = InterpLBProfile::identify_heavy_ranks(
all_times, nprocs, 2.5, heavy, 64);
InterpLBProfile::write_profile(
"interp_lb_profile.bin", nprocs,
all_times, heavy, nh, 2.5);
printf("[InterpLB] Profile written: %d heavy ranks\n", nh);
for (int i = 0; i < nh; i++)
printf(" Heavy rank %d: %.6f s\n", heavy[i], all_times[heavy[i]]);
delete[] all_times;
}
profile_written = true;
}
}
#endif
} }
void Patch::Interp_Points(MyList<var> *VarList, void Patch::Interp_Points(MyList<var> *VarList,
int NN, double **XX, int NN, double **XX,
@@ -910,7 +784,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
int myrank, lmyrank; int myrank, lmyrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_rank(Comm_here, &lmyrank); MPI_Comm_rank(Comm_here, &lmyrank);
int ordn = 2 * ghost_width; int ordn = 2 * ghost_width;
MyList<var> *varl; MyList<var> *varl;
int num_var = 0; int num_var = 0;
@@ -934,11 +807,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
MPI_Comm_group(MPI_COMM_WORLD, &world_group); MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Comm_group(Comm_here, &local_group); MPI_Comm_group(Comm_here, &local_group);
double DH[dim]; double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points for (int j = 0; j < NN; j++) // run along points
{ {
@@ -961,24 +832,57 @@ void Patch::Interp_Points(MyList<var> *VarList,
} }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp) // run along Blocks
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank;
if (myrank == BP->rank) bool flag = true;
for (int i = 0; i < dim; i++)
{ {
//---> interpolation #ifdef Vertex
varl = VarList; #ifdef Cell
int k = 0; #error Both Cell and Vertex are defined
while (varl) // run along variables #endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
//---> interpolation
varl = VarList;
int k = 0;
while (varl) // run along variables
{
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }
@@ -1201,7 +1105,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
{ {
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
// pox,ordn,varl->data->SoA,Symmetry); // pox,ordn,varl->data->SoA,Symmetry);
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k], xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next; varl = varl->next;
k++; k++;
@@ -1443,7 +1347,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
{ {
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
// pox,ordn,varl->data->SoA,Symmetry); // pox,ordn,varl->data->SoA,Symmetry);
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k], xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next; varl = varl->next;
k++; k++;

View File

@@ -32,16 +32,6 @@ namespace Parallel
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape); int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
MyList<Block> *distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0);
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
int ib0_orig, int ib3_orig,
int jb1_orig, int jb4_orig,
int kb2_orig, int kb5_orig,
Patch* PP, int r_left, int r_right,
int ingfsi, int fngfsi, bool periodic,
Block* &split_first_block, Block* &split_last_block);
Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
int block_id, int ingfsi, int fngfsi, int lev);
void KillBlocks(MyList<Patch> *PatchLIST); void KillBlocks(MyList<Patch> *PatchLIST);
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z)); void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
@@ -108,9 +98,6 @@ namespace Parallel
MPI_Status *stats; MPI_Status *stats;
int max_reqs; int max_reqs;
bool lengths_valid; bool lengths_valid;
int *tc_req_node;
int *tc_req_is_recv;
int *tc_completed;
SyncCache(); SyncCache();
void invalidate(); void invalidate();
void destroy(); void destroy();
@@ -124,10 +111,7 @@ namespace Parallel
struct AsyncSyncState { struct AsyncSyncState {
int req_no; int req_no;
bool active; bool active;
int *req_node; AsyncSyncState() : req_no(0), active(false) {}
int *req_is_recv;
int pending_recv;
AsyncSyncState() : req_no(0), active(false), req_node(0), req_is_recv(0), pending_recv(0) {}
}; };
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
@@ -146,15 +130,6 @@ namespace Parallel
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL, void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
int Symmetry); int Symmetry);
void Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<var> *VarList1, MyList<var> *VarList2,
int Symmetry, SyncCache &cache);
void OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<var> *VarList1, MyList<var> *VarList2,
int Symmetry, SyncCache &cache);
void OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<var> *VarList1, MyList<var> *VarList2,
int Symmetry, SyncCache &cache);
void Prolong(Patch *Patc, Patch *Patf, void Prolong(Patch *Patc, Patch *Patf,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
int Symmetry); int Symmetry);
@@ -183,7 +158,6 @@ namespace Parallel
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst); MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry); void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
double L2Norm(Patch *Pat, var *vf); double L2Norm(Patch *Pat, var *vf);
void L2Norm7(Patch *Pat, var **vf, double *norms);
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only); void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
void checkvarl(MyList<var> *pp, bool first_only); void checkvarl(MyList<var> *pp, bool first_only);
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat); MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
@@ -219,7 +193,6 @@ namespace Parallel
void checkpatchlist(MyList<Patch> *PatL, bool buflog); void checkpatchlist(MyList<Patch> *PatL, bool buflog);
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here); double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here);
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList, bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
int NN, double **XX, int NN, double **XX,
double *Shellf, int Symmetry, MPI_Comm Comm_here); double *Shellf, int Symmetry, MPI_Comm Comm_here);

View File

@@ -1,212 +0,0 @@
#include "rungekutta4_rout.h"
#include <cstdio>
#include <cstdlib>
#include <cstddef>
#include <complex>
#include <immintrin.h>
namespace {
inline void rk4_stage0(std::size_t n,
const double *__restrict f0,
const double *__restrict frhs,
double *__restrict f1,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d vr = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, vr, v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d vr = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, vr, v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * frhs[i];
}
}
inline void rk4_rhs_accum(std::size_t n,
const double *__restrict f1,
double *__restrict frhs) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d v2 = _mm512_set1_pd(2.0);
for (; i + 7 < n; i += 8) {
const __m512d v1 = _mm512_loadu_pd(f1 + i);
const __m512d vrhs = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(frhs + i, _mm512_fmadd_pd(v2, v1, vrhs));
}
#elif defined(__AVX2__)
const __m256d v2 = _mm256_set1_pd(2.0);
for (; i + 3 < n; i += 4) {
const __m256d v1 = _mm256_loadu_pd(f1 + i);
const __m256d vrhs = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(frhs + i, _mm256_fmadd_pd(v2, v1, vrhs));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
frhs[i] = frhs[i] + 2.0 * f1[i];
}
}
inline void rk4_f1_from_f0_f1(std::size_t n,
const double *__restrict f0,
double *__restrict f1,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d v1 = _mm512_loadu_pd(f1 + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, v1, v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d v1 = _mm256_loadu_pd(f1 + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, v1, v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * f1[i];
}
}
inline void rk4_stage3(std::size_t n,
const double *__restrict f0,
double *__restrict f1,
const double *__restrict frhs,
double c) {
std::size_t i = 0;
#if defined(__AVX512F__)
const __m512d vc = _mm512_set1_pd(c);
for (; i + 7 < n; i += 8) {
const __m512d v0 = _mm512_loadu_pd(f0 + i);
const __m512d v1 = _mm512_loadu_pd(f1 + i);
const __m512d vr = _mm512_loadu_pd(frhs + i);
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, _mm512_add_pd(v1, vr), v0));
}
#elif defined(__AVX2__)
const __m256d vc = _mm256_set1_pd(c);
for (; i + 3 < n; i += 4) {
const __m256d v0 = _mm256_loadu_pd(f0 + i);
const __m256d v1 = _mm256_loadu_pd(f1 + i);
const __m256d vr = _mm256_loadu_pd(frhs + i);
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, _mm256_add_pd(v1, vr), v0));
}
#endif
#pragma ivdep
for (; i < n; ++i) {
f1[i] = f0[i] + c * (f1[i] + frhs[i]);
}
}
} // namespace
extern "C" {
void f_rungekutta4_scalar(double &dT, double &f0, double &f1, double &f_rhs, int &RK4) {
constexpr double F1o6 = 1.0 / 6.0;
constexpr double HLF = 0.5;
constexpr double TWO = 2.0;
switch (RK4) {
case 0:
f1 = f0 + HLF * dT * f_rhs;
break;
case 1:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + HLF * dT * f1;
break;
case 2:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + dT * f1;
break;
case 3:
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
break;
default:
std::fprintf(stderr, "rungekutta4_scalar_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
}
void rungekutta4_cplxscalar_(double &dT,
std::complex<double> &f0,
std::complex<double> &f1,
std::complex<double> &f_rhs,
int &RK4) {
constexpr double F1o6 = 1.0 / 6.0;
constexpr double HLF = 0.5;
constexpr double TWO = 2.0;
switch (RK4) {
case 0:
f1 = f0 + HLF * dT * f_rhs;
break;
case 1:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + HLF * dT * f1;
break;
case 2:
f_rhs = f_rhs + TWO * f1;
f1 = f0 + dT * f1;
break;
case 3:
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
break;
default:
std::fprintf(stderr, "rungekutta4_cplxscalar_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
}
int f_rungekutta4_rout(int *ex, double &dT,
double *f0, double *f1, double *f_rhs,
int &RK4) {
const std::size_t n = static_cast<std::size_t>(ex[0]) *
static_cast<std::size_t>(ex[1]) *
static_cast<std::size_t>(ex[2]);
const double *const __restrict f0r = f0;
double *const __restrict f1r = f1;
double *const __restrict frhs = f_rhs;
if (__builtin_expect(static_cast<unsigned>(RK4) > 3u, 0)) {
std::fprintf(stderr, "rungekutta4_rout_c: invalid RK4 stage %d\n", RK4);
std::abort();
}
switch (RK4) {
case 0:
rk4_stage0(n, f0r, frhs, f1r, 0.5 * dT);
break;
case 1:
rk4_rhs_accum(n, f1r, frhs);
rk4_f1_from_f0_f1(n, f0r, f1r, 0.5 * dT);
break;
case 2:
rk4_rhs_accum(n, f1r, frhs);
rk4_f1_from_f0_f1(n, f0r, f1r, dT);
break;
default:
rk4_stage3(n, f0r, f1r, frhs, (1.0 / 6.0) * dT);
break;
}
return 0;
}
} // extern "C"

View File

@@ -3472,43 +3472,6 @@ double ShellPatch::L2Norm(var *vf)
return tvf; return tvf;
} }
void ShellPatch::L2Norm7(var **vf, double *norms)
{
double tvf[7], dtvf[7];
int BDW = overghost;
for (int i = 0; i < 7; i++)
dtvf[i] = 0;
MyList<ss_patch> *sPp = PatL;
while (sPp)
{
MyList<Block> *Bp = sPp->data->blb;
while (Bp)
{
Block *cg = Bp->data;
if (myrank == cg->rank)
{
f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2],
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn],
cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn],
cg->fgfs[vf[6]->sgfn], tvf, BDW);
for (int i = 0; i < 7; i++)
dtvf[i] += tvf[i];
}
if (Bp == sPp->data->ble)
break;
Bp = Bp->next;
}
sPp = sPp->next;
}
MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
for (int i = 0; i < 7; i++)
norms[i] = sqrt(tvf[i]);
}
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs // find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX, void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,

View File

@@ -198,7 +198,6 @@ public:
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
char *filename, int sst); char *filename, int sst);
double L2Norm(var *vf); double L2Norm(var *vf);
void L2Norm7(var **vf, double *norms);
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf); void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
}; };

View File

@@ -48,7 +48,6 @@ public:
double StartTime, TotalTime; double StartTime, TotalTime;
double AnasTime, DumpTime, d2DumpTime, CheckTime; double AnasTime, DumpTime, d2DumpTime, CheckTime;
double LastAnas, LastConsOut; double LastAnas, LastConsOut;
int *ConstraintRefreshLevels;
double Courant; double Courant;
double numepss, numepsb, numepsh; double numepss, numepsb, numepsh;
int Symmetry; int Symmetry;
@@ -131,11 +130,9 @@ public:
Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync
Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1] Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1]
Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev] Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev]
Parallel::SyncCache *sync_cache_restrict; // cached Restrict in RestrictProlong
Parallel::SyncCache *sync_cache_outbd; // cached OutBdLow2Hi in RestrictProlong
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor; monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
monitor *ConVMonitor, *TimingMonitor; monitor *ConVMonitor;
surface_integral *Waveshell; surface_integral *Waveshell;
checkpoint *CheckPoint; checkpoint *CheckPoint;

View File

@@ -62,7 +62,6 @@
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
! gont = 0: success; gont = 1: something wrong ! gont = 0: success; gont = 1: something wrong
integer::gont integer::gont
integer :: i,j,k
!~~~~~~> Other variables: !~~~~~~> Other variables:
@@ -86,13 +85,6 @@
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
real*8 :: dX, dY, dZ, PI real*8 :: dX, dY, dZ, PI
real*8 :: divb_loc,det_loc
real*8 :: gupxx_loc,gupxy_loc,gupxz_loc,gupyy_loc,gupyz_loc,gupzz_loc
real*8 :: Rxx_loc,Rxy_loc,Rxz_loc,Ryy_loc,Ryz_loc,Rzz_loc
real*8 :: fxx_loc,fxy_loc,fxz_loc
real*8 :: Gamxa_loc,Gamya_loc,Gamza_loc
real*8 :: f_loc,chin_loc
real*8 :: l_fxx,l_fxy,l_fxz,l_fyy,l_fyz,l_fzz,S_loc
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0 real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0 real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0 real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
@@ -105,7 +97,7 @@
#endif #endif
#if (GAUGE == 6 || GAUGE == 7) #if (GAUGE == 6 || GAUGE == 7)
integer :: BHN integer :: BHN,i,j,k
real*8, dimension(9) :: Porg real*8, dimension(9) :: Porg
real*8, dimension(3) :: Mass real*8, dimension(3) :: Mass
real*8 :: r1,r2,M,A,w1,w2,C1,C2 real*8 :: r1,r2,M,A,w1,w2,C1,C2
@@ -114,38 +106,6 @@
call getpbh(BHN,Porg,Mass) call getpbh(BHN,Porg,Mass)
#endif #endif
!!! sanity check (disabled in production builds for performance)
#ifdef DEBUG
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)
if(dX.ne.dX) then
if(sum(chi).ne.sum(chi))write(*,*)"bssn.f90: find NaN in chi"
if(sum(trK).ne.sum(trK))write(*,*)"bssn.f90: find NaN in trk"
if(sum(dxx).ne.sum(dxx))write(*,*)"bssn.f90: find NaN in dxx"
if(sum(gxy).ne.sum(gxy))write(*,*)"bssn.f90: find NaN in gxy"
if(sum(gxz).ne.sum(gxz))write(*,*)"bssn.f90: find NaN in gxz"
if(sum(dyy).ne.sum(dyy))write(*,*)"bssn.f90: find NaN in dyy"
if(sum(gyz).ne.sum(gyz))write(*,*)"bssn.f90: find NaN in gyz"
if(sum(dzz).ne.sum(dzz))write(*,*)"bssn.f90: find NaN in dzz"
if(sum(Axx).ne.sum(Axx))write(*,*)"bssn.f90: find NaN in Axx"
if(sum(Axy).ne.sum(Axy))write(*,*)"bssn.f90: find NaN in Axy"
if(sum(Axz).ne.sum(Axz))write(*,*)"bssn.f90: find NaN in Axz"
if(sum(Ayy).ne.sum(Ayy))write(*,*)"bssn.f90: find NaN in Ayy"
if(sum(Ayz).ne.sum(Ayz))write(*,*)"bssn.f90: find NaN in Ayz"
if(sum(Azz).ne.sum(Azz))write(*,*)"bssn.f90: find NaN in Azz"
if(sum(Gamx).ne.sum(Gamx))write(*,*)"bssn.f90: find NaN in Gamx"
if(sum(Gamy).ne.sum(Gamy))write(*,*)"bssn.f90: find NaN in Gamy"
if(sum(Gamz).ne.sum(Gamz))write(*,*)"bssn.f90: find NaN in Gamz"
if(sum(Lap).ne.sum(Lap))write(*,*)"bssn.f90: find NaN in Lap"
if(sum(betax).ne.sum(betax))write(*,*)"bssn.f90: find NaN in betax"
if(sum(betay).ne.sum(betay))write(*,*)"bssn.f90: find NaN in betay"
if(sum(betaz).ne.sum(betaz))write(*,*)"bssn.f90: find NaN in betaz"
gont = 1
return
endif
#endif
PI = dacos(-ONE) PI = dacos(-ONE)
@@ -153,24 +113,22 @@
dY = Y(2) - Y(1) dY = Y(2) - Y(1)
dZ = Z(2) - Z(1) dZ = Z(2) - Z(1)
do k=1,ex(3) alpn1 = Lap + ONE
do j=1,ex(2) chin1 = chi + ONE
do i=1,ex(1) gxx = dxx + ONE
alpn1(i,j,k) = Lap(i,j,k) + ONE gyy = dyy + ONE
chin1(i,j,k) = chi(i,j,k) + ONE gzz = dzz + ONE
gxx(i,j,k) = dxx(i,j,k) + ONE
gyy(i,j,k) = dyy(i,j,k) + ONE
gzz(i,j,k) = dzz(i,j,k) + ONE
enddo
enddo
enddo
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev) call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev) call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev) call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
div_beta = betaxx + betayy + betazz
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev) call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev) call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev) call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
@@ -178,179 +136,151 @@
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev) call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
do k=1,ex(3) gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
do j=1,ex(2) TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
do i=1,ex(1)
divb_loc = betaxx(i,j,k) + betayy(i,j,k) + betazz(i,j,k)
div_beta(i,j,k) = divb_loc
chi_rhs(i,j,k) = F2o3 * chin1(i,j,k) * (alpn1(i,j,k) * trK(i,j,k) - divb_loc) gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
gxx_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axx(i,j,k) - F2o3 * gxx(i,j,k) * divb_loc + & gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
TWO * ( gxx(i,j,k) * betaxx(i,j,k) + gxy(i,j,k) * betayx(i,j,k) + gxz(i,j,k) * betazx(i,j,k) ) TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
gyy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayy(i,j,k) - F2o3 * gyy(i,j,k) * divb_loc + & gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
TWO * ( gxy(i,j,k) * betaxy(i,j,k) + gyy(i,j,k) * betayy(i,j,k) + gyz(i,j,k) * betazy(i,j,k) ) gxx * betaxy + gxz * betazy + &
gyy * betayx + gyz * betazx &
- gxy * betazz
gzz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Azz(i,j,k) - F2o3 * gzz(i,j,k) * divb_loc + & gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
TWO * ( gxz(i,j,k) * betaxz(i,j,k) + gyz(i,j,k) * betayz(i,j,k) + gzz(i,j,k) * betazz(i,j,k) ) gxy * betaxz + gyy * betayz + &
gxz * betaxy + gzz * betazy &
- gyz * betaxx
gxy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axy(i,j,k) + F1o3 * gxy(i,j,k) * divb_loc + & gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
gxx(i,j,k) * betaxy(i,j,k) + gxz(i,j,k) * betazy(i,j,k) + gyy(i,j,k) * betayx(i,j,k) + & gxx * betaxz + gxy * betayz + &
gyz(i,j,k) * betazx(i,j,k) - gxy(i,j,k) * betazz(i,j,k) gyz * betayx + gzz * betazx &
- gxz * betayy !rhs for gij
gyz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayz(i,j,k) + F1o3 * gyz(i,j,k) * divb_loc + & ! invert tilted metric
gxy(i,j,k) * betaxz(i,j,k) + gyy(i,j,k) * betayz(i,j,k) + gxz(i,j,k) * betaxy(i,j,k) + & gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
gzz(i,j,k) * betazy(i,j,k) - gyz(i,j,k) * betaxx(i,j,k) gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
gupxx = ( gyy * gzz - gyz * gyz ) / gupzz
gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
gupxz = ( gxy * gyz - gyy * gxz ) / gupzz
gupyy = ( gxx * gzz - gxz * gxz ) / gupzz
gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
gupzz = ( gxx * gyy - gxy * gxy ) / gupzz
gxz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axz(i,j,k) + F1o3 * gxz(i,j,k) * divb_loc + & if(co == 0)then
gxx(i,j,k) * betaxz(i,j,k) + gxy(i,j,k) * betayz(i,j,k) + gyz(i,j,k) * betayx(i,j,k) + & ! Gam^i_Res = Gam^i + gup^ij_,j
gzz(i,j,k) * betazx(i,j,k) - gxz(i,j,k) * betayy(i,j,k) Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)&
+gupxy*(gupxx*gxyx+gupxy*gyyx+gupxz*gyzx)&
+gupxz*(gupxx*gxzx+gupxy*gyzx+gupxz*gzzx)&
+gupxx*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
+gupxy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
+gupxz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
+gupxx*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
+gupxy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
+gupxz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
Gmy_Res = Gamy - (gupxx*(gupxy*gxxx+gupyy*gxyx+gupyz*gxzx)&
+gupxy*(gupxy*gxyx+gupyy*gyyx+gupyz*gyzx)&
+gupxz*(gupxy*gxzx+gupyy*gyzx+gupyz*gzzx)&
+gupxy*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
+gupyy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
+gupyz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
+gupxy*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
+gupyy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
+gupyz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
Gmz_Res = Gamz - (gupxx*(gupxz*gxxx+gupyz*gxyx+gupzz*gxzx)&
+gupxy*(gupxz*gxyx+gupyz*gyyx+gupzz*gyzx)&
+gupxz*(gupxz*gxzx+gupyz*gyzx+gupzz*gzzx)&
+gupxy*(gupxz*gxxy+gupyz*gxyy+gupzz*gxzy)&
+gupyy*(gupxz*gxyy+gupyz*gyyy+gupzz*gyzy)&
+gupyz*(gupxz*gxzy+gupyz*gyzy+gupzz*gzzy)&
+gupxz*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
+gupyz*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
+gupzz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
endif
det_loc = gxx(i,j,k) * gyy(i,j,k) * gzz(i,j,k) + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) + & ! second kind of connection
gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) - gxz(i,j,k) * gyy(i,j,k) * gxz(i,j,k) - & Gamxxx =HALF*( gupxx*gxxx + gupxy*(TWO*gxyx - gxxy ) + gupxz*(TWO*gxzx - gxxz ))
gxy(i,j,k) * gxy(i,j,k) * gzz(i,j,k) - gxx(i,j,k) * gyz(i,j,k) * gyz(i,j,k) Gamyxx =HALF*( gupxy*gxxx + gupyy*(TWO*gxyx - gxxy ) + gupyz*(TWO*gxzx - gxxz ))
gupxx_loc = ( gyy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gyz(i,j,k) ) / det_loc Gamzxx =HALF*( gupxz*gxxx + gupyz*(TWO*gxyx - gxxy ) + gupzz*(TWO*gxzx - gxxz ))
gupxy_loc = - ( gxy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gxz(i,j,k) ) / det_loc
gupxz_loc = ( gxy(i,j,k) * gyz(i,j,k) - gyy(i,j,k) * gxz(i,j,k) ) / det_loc
gupyy_loc = ( gxx(i,j,k) * gzz(i,j,k) - gxz(i,j,k) * gxz(i,j,k) ) / det_loc
gupyz_loc = - ( gxx(i,j,k) * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / det_loc
gupzz_loc = ( gxx(i,j,k) * gyy(i,j,k) - gxy(i,j,k) * gxy(i,j,k) ) / det_loc
gupxx(i,j,k) = gupxx_loc
gupxy(i,j,k) = gupxy_loc
gupxz(i,j,k) = gupxz_loc
gupyy(i,j,k) = gupyy_loc
gupyz(i,j,k) = gupyz_loc
gupzz(i,j,k) = gupzz_loc
if(co == 0)then Gamxyy =HALF*( gupxx*(TWO*gxyy - gyyx ) + gupxy*gyyy + gupxz*(TWO*gyzy - gyyz ))
Gmx_Res(i,j,k) = Gamx(i,j,k) - ( & Gamyyy =HALF*( gupxy*(TWO*gxyy - gyyx ) + gupyy*gyyy + gupyz*(TWO*gyzy - gyyz ))
gupxx_loc*(gupxx_loc*gxxx(i,j,k)+gupxy_loc*gxyx(i,j,k)+gupxz_loc*gxzx(i,j,k)) + & Gamzyy =HALF*( gupxz*(TWO*gxyy - gyyx ) + gupyz*gyyy + gupzz*(TWO*gyzy - gyyz ))
gupxy_loc*(gupxx_loc*gxyx(i,j,k)+gupxy_loc*gyyx(i,j,k)+gupxz_loc*gyzx(i,j,k)) + &
gupxz_loc*(gupxx_loc*gxzx(i,j,k)+gupxy_loc*gyzx(i,j,k)+gupxz_loc*gzzx(i,j,k)) + &
gupxx_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + &
gupxy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + &
gupxz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + &
gupxx_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
gupxy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
gupxz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
Gmy_Res(i,j,k) = Gamy(i,j,k) - ( &
gupxx_loc*(gupxy_loc*gxxx(i,j,k)+gupyy_loc*gxyx(i,j,k)+gupyz_loc*gxzx(i,j,k)) + &
gupxy_loc*(gupxy_loc*gxyx(i,j,k)+gupyy_loc*gyyx(i,j,k)+gupyz_loc*gyzx(i,j,k)) + &
gupxz_loc*(gupxy_loc*gxzx(i,j,k)+gupyy_loc*gyzx(i,j,k)+gupyz_loc*gzzx(i,j,k)) + &
gupxy_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + &
gupyy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + &
gupyz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + &
gupxy_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
gupyy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
gupyz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
Gmz_Res(i,j,k) = Gamz(i,j,k) - ( &
gupxx_loc*(gupxz_loc*gxxx(i,j,k)+gupyz_loc*gxyx(i,j,k)+gupzz_loc*gxzx(i,j,k)) + &
gupxy_loc*(gupxz_loc*gxyx(i,j,k)+gupyz_loc*gyyx(i,j,k)+gupzz_loc*gyzx(i,j,k)) + &
gupxz_loc*(gupxz_loc*gxzx(i,j,k)+gupyz_loc*gyzx(i,j,k)+gupzz_loc*gzzx(i,j,k)) + &
gupxy_loc*(gupxz_loc*gxxy(i,j,k)+gupyz_loc*gxyy(i,j,k)+gupzz_loc*gxzy(i,j,k)) + &
gupyy_loc*(gupxz_loc*gxyy(i,j,k)+gupyz_loc*gyyy(i,j,k)+gupzz_loc*gyzy(i,j,k)) + &
gupyz_loc*(gupxz_loc*gxzy(i,j,k)+gupyz_loc*gyzy(i,j,k)+gupzz_loc*gzzy(i,j,k)) + &
gupxz_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
gupyz_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
gupzz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
endif
Gamxxx(i,j,k)=HALF*( gupxx_loc*gxxx(i,j,k) + gupxy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupxz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamxzz =HALF*( gupxx*(TWO*gxzz - gzzx ) + gupxy*(TWO*gyzz - gzzy ) + gupxz*gzzz)
Gamyxx(i,j,k)=HALF*( gupxy_loc*gxxx(i,j,k) + gupyy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupyz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamyzz =HALF*( gupxy*(TWO*gxzz - gzzx ) + gupyy*(TWO*gyzz - gzzy ) + gupyz*gzzz)
Gamzxx(i,j,k)=HALF*( gupxz_loc*gxxx(i,j,k) + gupyz_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupzz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamzzz =HALF*( gupxz*(TWO*gxzz - gzzx ) + gupyz*(TWO*gyzz - gzzy ) + gupzz*gzzz)
Gamxyy(i,j,k)=HALF*( gupxx_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupxy_loc*gyyy(i,j,k) + gupxz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamxxy =HALF*( gupxx*gxxy + gupxy*gyyx + gupxz*( gxzy + gyzx - gxyz ) )
Gamyyy(i,j,k)=HALF*( gupxy_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyy_loc*gyyy(i,j,k) + gupyz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamyxy =HALF*( gupxy*gxxy + gupyy*gyyx + gupyz*( gxzy + gyzx - gxyz ) )
Gamzyy(i,j,k)=HALF*( gupxz_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyz_loc*gyyy(i,j,k) + gupzz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamzxy =HALF*( gupxz*gxxy + gupyz*gyyx + gupzz*( gxzy + gyzx - gxyz ) )
Gamxzz(i,j,k)=HALF*( gupxx_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupxy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupxz_loc*gzzz(i,j,k)) Gamxxz =HALF*( gupxx*gxxz + gupxy*( gxyz + gyzx - gxzy ) + gupxz*gzzx )
Gamyzz(i,j,k)=HALF*( gupxy_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupyz_loc*gzzz(i,j,k)) Gamyxz =HALF*( gupxy*gxxz + gupyy*( gxyz + gyzx - gxzy ) + gupyz*gzzx )
Gamzzz(i,j,k)=HALF*( gupxz_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyz_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupzz_loc*gzzz(i,j,k)) Gamzxz =HALF*( gupxz*gxxz + gupyz*( gxyz + gyzx - gxzy ) + gupzz*gzzx )
Gamxxy(i,j,k)=HALF*( gupxx_loc*gxxy(i,j,k) + gupxy_loc*gyyx(i,j,k) + gupxz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) Gamxyz =HALF*( gupxx*( gxyz + gxzy - gyzx ) + gupxy*gyyz + gupxz*gzzy )
Gamyxy(i,j,k)=HALF*( gupxy_loc*gxxy(i,j,k) + gupyy_loc*gyyx(i,j,k) + gupyz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) Gamyyz =HALF*( gupxy*( gxyz + gxzy - gyzx ) + gupyy*gyyz + gupyz*gzzy )
Gamzxy(i,j,k)=HALF*( gupxz_loc*gxxy(i,j,k) + gupyz_loc*gyyx(i,j,k) + gupzz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) Gamzyz =HALF*( gupxz*( gxyz + gxzy - gyzx ) + gupyz*gyyz + gupzz*gzzy )
Gamxxz(i,j,k)=HALF*( gupxx_loc*gxxz(i,j,k) + gupxy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupxz_loc*gzzx(i,j,k) )
Gamyxz(i,j,k)=HALF*( gupxy_loc*gxxz(i,j,k) + gupyy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupyz_loc*gzzx(i,j,k) )
Gamzxz(i,j,k)=HALF*( gupxz_loc*gxxz(i,j,k) + gupyz_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupzz_loc*gzzx(i,j,k) )
Gamxyz(i,j,k)=HALF*( gupxx_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupxy_loc*gyyz(i,j,k) + gupxz_loc*gzzy(i,j,k) )
Gamyyz(i,j,k)=HALF*( gupxy_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyy_loc*gyyz(i,j,k) + gupyz_loc*gzzy(i,j,k) )
Gamzyz(i,j,k)=HALF*( gupxz_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyz_loc*gyyz(i,j,k) + gupzz_loc*gzzy(i,j,k) )
enddo
enddo
enddo
! Raise indices of \tilde A_{ij} and store in R_ij ! Raise indices of \tilde A_{ij} and store in R_ij
Rxx = gupxx * gupxx * Axx + gupxy * gupxy * Ayy + gupxz * gupxz * Azz + &
TWO*(gupxx * gupxy * Axy + gupxx * gupxz * Axz + gupxy * gupxz * Ayz)
Ryy = gupxy * gupxy * Axx + gupyy * gupyy * Ayy + gupyz * gupyz * Azz + &
TWO*(gupxy * gupyy * Axy + gupxy * gupyz * Axz + gupyy * gupyz * Ayz)
Rzz = gupxz * gupxz * Axx + gupyz * gupyz * Ayy + gupzz * gupzz * Azz + &
TWO*(gupxz * gupyz * Axy + gupxz * gupzz * Axz + gupyz * gupzz * Ayz)
Rxy = gupxx * gupxy * Axx + gupxy * gupyy * Ayy + gupxz * gupyz * Azz + &
(gupxx * gupyy + gupxy * gupxy)* Axy + &
(gupxx * gupyz + gupxz * gupxy)* Axz + &
(gupxy * gupyz + gupxz * gupyy)* Ayz
Rxz = gupxx * gupxz * Axx + gupxy * gupyz * Ayy + gupxz * gupzz * Azz + &
(gupxx * gupyz + gupxy * gupxz)* Axy + &
(gupxx * gupzz + gupxz * gupxz)* Axz + &
(gupxy * gupzz + gupxz * gupyz)* Ayz
Ryz = gupxy * gupxz * Axx + gupyy * gupyz * Ayy + gupyz * gupzz * Azz + &
(gupxy * gupyz + gupyy * gupxz)* Axy + &
(gupxy * gupzz + gupyz * gupxz)* Axz + &
(gupyy * gupzz + gupyz * gupyz)* Ayz
! Right hand side for Gam^i without shift terms... ! Right hand side for Gam^i without shift terms...
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev) call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev) call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
gupxx_loc = gupxx(i,j,k)
gupxy_loc = gupxy(i,j,k)
gupxz_loc = gupxz(i,j,k)
gupyy_loc = gupyy(i,j,k)
gupyz_loc = gupyz(i,j,k)
gupzz_loc = gupzz(i,j,k)
Rxx_loc = gupxx_loc * gupxx_loc * Axx(i,j,k) + gupxy_loc * gupxy_loc * Ayy(i,j,k) + gupxz_loc * gupxz_loc * Azz(i,j,k) + & Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
TWO * (gupxx_loc * gupxy_loc * Axy(i,j,k) + gupxx_loc * gupxz_loc * Axz(i,j,k) + gupxy_loc * gupxz_loc * Ayz(i,j,k)) TWO * alpn1 * ( &
Ryy_loc = gupxy_loc * gupxy_loc * Axx(i,j,k) + gupyy_loc * gupyy_loc * Ayy(i,j,k) + gupyz_loc * gupyz_loc * Azz(i,j,k) + & -F3o2/chin1 * ( chix * Rxx + chiy * Rxy + chiz * Rxz ) - &
TWO * (gupxy_loc * gupyy_loc * Axy(i,j,k) + gupxy_loc * gupyz_loc * Axz(i,j,k) + gupyy_loc * gupyz_loc * Ayz(i,j,k)) gupxx * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
Rzz_loc = gupxz_loc * gupxz_loc * Axx(i,j,k) + gupyz_loc * gupyz_loc * Ayy(i,j,k) + gupzz_loc * gupzz_loc * Azz(i,j,k) + & gupxy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
TWO * (gupxz_loc * gupyz_loc * Axy(i,j,k) + gupxz_loc * gupzz_loc * Axz(i,j,k) + gupyz_loc * gupzz_loc * Ayz(i,j,k)) gupxz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
Rxy_loc = gupxx_loc * gupxy_loc * Axx(i,j,k) + gupxy_loc * gupyy_loc * Ayy(i,j,k) + gupxz_loc * gupyz_loc * Azz(i,j,k) + & Gamxxx * Rxx + Gamxyy * Ryy + Gamxzz * Rzz + &
(gupxx_loc * gupyy_loc + gupxy_loc * gupxy_loc) * Axy(i,j,k) + & TWO * ( Gamxxy * Rxy + Gamxxz * Rxz + Gamxyz * Ryz ) )
(gupxx_loc * gupyz_loc + gupxz_loc * gupxy_loc) * Axz(i,j,k) + &
(gupxy_loc * gupyz_loc + gupxz_loc * gupyy_loc) * Ayz(i,j,k)
Rxz_loc = gupxx_loc * gupxz_loc * Axx(i,j,k) + gupxy_loc * gupyz_loc * Ayy(i,j,k) + gupxz_loc * gupzz_loc * Azz(i,j,k) + &
(gupxx_loc * gupyz_loc + gupxy_loc * gupxz_loc) * Axy(i,j,k) + &
(gupxx_loc * gupzz_loc + gupxz_loc * gupxz_loc) * Axz(i,j,k) + &
(gupxy_loc * gupzz_loc + gupxz_loc * gupyz_loc) * Ayz(i,j,k)
Ryz_loc = gupxy_loc * gupxz_loc * Axx(i,j,k) + gupyy_loc * gupyz_loc * Ayy(i,j,k) + gupyz_loc * gupzz_loc * Azz(i,j,k) + &
(gupxy_loc * gupyz_loc + gupyy_loc * gupxz_loc) * Axy(i,j,k) + &
(gupxy_loc * gupzz_loc + gupyz_loc * gupxz_loc) * Axz(i,j,k) + &
(gupyy_loc * gupzz_loc + gupyz_loc * gupyz_loc) * Ayz(i,j,k)
Rxx(i,j,k) = Rxx_loc
Ryy(i,j,k) = Ryy_loc
Rzz(i,j,k) = Rzz_loc
Rxy(i,j,k) = Rxy_loc
Rxz(i,j,k) = Rxz_loc
Ryz(i,j,k) = Ryz_loc
Gamx_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxx_loc + Lapy(i,j,k) * Rxy_loc + Lapz(i,j,k) * Rxz_loc) + & Gamy_rhs = - TWO * ( Lapx * Rxy + Lapy * Ryy + Lapz * Ryz ) + &
TWO * alpn1(i,j,k) * ( & TWO * alpn1 * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxx_loc + chiy(i,j,k) * Rxy_loc + chiz(i,j,k) * Rxz_loc) - & -F3o2/chin1 * ( chix * Rxy + chiy * Ryy + chiz * Ryz ) - &
gupxx_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - & gupxy * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
gupxy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - & gupyy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
gupxz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + & gupyz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
Gamxxx(i,j,k) * Rxx_loc + Gamxyy(i,j,k) * Ryy_loc + Gamxzz(i,j,k) * Rzz_loc + & Gamyxx * Rxx + Gamyyy * Ryy + Gamyzz * Rzz + &
TWO * (Gamxxy(i,j,k) * Rxy_loc + Gamxxz(i,j,k) * Rxz_loc + Gamxyz(i,j,k) * Ryz_loc)) TWO * ( Gamyxy * Rxy + Gamyxz * Rxz + Gamyyz * Ryz ) )
Gamy_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxy_loc + Lapy(i,j,k) * Ryy_loc + Lapz(i,j,k) * Ryz_loc) + & Gamz_rhs = - TWO * ( Lapx * Rxz + Lapy * Ryz + Lapz * Rzz ) + &
TWO * alpn1(i,j,k) * ( & TWO * alpn1 * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxy_loc + chiy(i,j,k) * Ryy_loc + chiz(i,j,k) * Ryz_loc) - & -F3o2/chin1 * ( chix * Rxz + chiy * Ryz + chiz * Rzz ) - &
gupxy_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - & gupxz * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
gupyy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - & gupyz * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
gupyz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + & gupzz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
Gamyxx(i,j,k) * Rxx_loc + Gamyyy(i,j,k) * Ryy_loc + Gamyzz(i,j,k) * Rzz_loc + & Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
TWO * (Gamyxy(i,j,k) * Rxy_loc + Gamyxz(i,j,k) * Rxz_loc + Gamyyz(i,j,k) * Ryz_loc)) TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
Gamz_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxz_loc + Lapy(i,j,k) * Ryz_loc + Lapz(i,j,k) * Rzz_loc) + &
TWO * alpn1(i,j,k) * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxz_loc + chiy(i,j,k) * Ryz_loc + chiz(i,j,k) * Rzz_loc) - &
gupxz_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
gupyz_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
gupzz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
Gamzxx(i,j,k) * Rxx_loc + Gamzyy(i,j,k) * Ryy_loc + Gamzzz(i,j,k) * Rzz_loc + &
TWO * (Gamzxy(i,j,k) * Rxy_loc + Gamzxz(i,j,k) * Rxz_loc + Gamzyz(i,j,k) * Ryz_loc))
enddo
enddo
enddo
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,& call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev) X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
@@ -359,54 +289,38 @@
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,& call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev) X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
fxx = gxxx + gxyy + gxzz
fxy = gxyx + gyyy + gyzz
fxz = gxzx + gyzy + gzzz
Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz + &
TWO*( gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz )
Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz + &
TWO*( gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz )
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev) call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev) call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
divb_loc = div_beta(i,j,k)
fxx_loc = gxxx(i,j,k) + gxyy(i,j,k) + gxzz(i,j,k)
fxy_loc = gxyx(i,j,k) + gyyy(i,j,k) + gyzz(i,j,k)
fxz_loc = gxzx(i,j,k) + gyzy(i,j,k) + gzzz(i,j,k)
gupxx_loc = gupxx(i,j,k) Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
gupxy_loc = gupxy(i,j,k) Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
gupxz_loc = gupxz(i,j,k) F1o3 * (gupxx * fxx + gupxy * fxy + gupxz * fxz ) + &
gupyy_loc = gupyy(i,j,k) gupxx * gxxx + gupyy * gyyx + gupzz * gzzx + &
gupyz_loc = gupyz(i,j,k) TWO * (gupxy * gxyx + gupxz * gxzx + gupyz * gyzx )
gupzz_loc = gupzz(i,j,k)
Gamxa_loc = gupxx_loc * Gamxxx(i,j,k) + gupyy_loc * Gamxyy(i,j,k) + gupzz_loc * Gamxzz(i,j,k) + & Gamy_rhs = Gamy_rhs + F2o3 * Gamya * div_beta - &
TWO * (gupxy_loc * Gamxxy(i,j,k) + gupxz_loc * Gamxxz(i,j,k) + gupyz_loc * Gamxyz(i,j,k)) Gamxa * betayx - Gamya * betayy - Gamza * betayz + &
Gamya_loc = gupxx_loc * Gamyxx(i,j,k) + gupyy_loc * Gamyyy(i,j,k) + gupzz_loc * Gamyzz(i,j,k) + & F1o3 * (gupxy * fxx + gupyy * fxy + gupyz * fxz ) + &
TWO * (gupxy_loc * Gamyxy(i,j,k) + gupxz_loc * Gamyxz(i,j,k) + gupyz_loc * Gamyyz(i,j,k)) gupxx * gxxy + gupyy * gyyy + gupzz * gzzy + &
Gamza_loc = gupxx_loc * Gamzxx(i,j,k) + gupyy_loc * Gamzyy(i,j,k) + gupzz_loc * Gamzzz(i,j,k) + & TWO * (gupxy * gxyy + gupxz * gxzy + gupyz * gyzy )
TWO * (gupxy_loc * Gamzxy(i,j,k) + gupxz_loc * Gamzxz(i,j,k) + gupyz_loc * Gamzyz(i,j,k))
Gamxa(i,j,k) = Gamxa_loc
Gamya(i,j,k) = Gamya_loc
Gamza(i,j,k) = Gamza_loc
Gamx_rhs(i,j,k) = Gamx_rhs(i,j,k) + F2o3 * Gamxa_loc * divb_loc - & Gamz_rhs = Gamz_rhs + F2o3 * Gamza * div_beta - &
Gamxa_loc * betaxx(i,j,k) - Gamya_loc * betaxy(i,j,k) - Gamza_loc * betaxz(i,j,k) + & Gamxa * betazx - Gamya * betazy - Gamza * betazz + &
F1o3 * (gupxx_loc * fxx_loc + gupxy_loc * fxy_loc + gupxz_loc * fxz_loc) + & F1o3 * (gupxz * fxx + gupyz * fxy + gupzz * fxz ) + &
gupxx_loc * gxxx(i,j,k) + gupyy_loc * gyyx(i,j,k) + gupzz_loc * gzzx(i,j,k) + & gupxx * gxxz + gupyy * gyyz + gupzz * gzzz + &
TWO * (gupxy_loc * gxyx(i,j,k) + gupxz_loc * gxzx(i,j,k) + gupyz_loc * gyzx(i,j,k)) TWO * (gupxy * gxyz + gupxz * gxzz + gupyz * gyzz ) !rhs for Gam^i
Gamy_rhs(i,j,k) = Gamy_rhs(i,j,k) + F2o3 * Gamya_loc * divb_loc - &
Gamxa_loc * betayx(i,j,k) - Gamya_loc * betayy(i,j,k) - Gamza_loc * betayz(i,j,k) + &
F1o3 * (gupxy_loc * fxx_loc + gupyy_loc * fxy_loc + gupyz_loc * fxz_loc) + &
gupxx_loc * gxxy(i,j,k) + gupyy_loc * gyyy(i,j,k) + gupzz_loc * gzzy(i,j,k) + &
TWO * (gupxy_loc * gxyy(i,j,k) + gupxz_loc * gxzy(i,j,k) + gupyz_loc * gyzy(i,j,k))
Gamz_rhs(i,j,k) = Gamz_rhs(i,j,k) + F2o3 * Gamza_loc * divb_loc - &
Gamxa_loc * betazx(i,j,k) - Gamya_loc * betazy(i,j,k) - Gamza_loc * betazz(i,j,k) + &
F1o3 * (gupxz_loc * fxx_loc + gupyz_loc * fxy_loc + gupzz_loc * fxz_loc) + &
gupxx_loc * gxxz(i,j,k) + gupyy_loc * gyyz(i,j,k) + gupzz_loc * gzzz(i,j,k) + &
TWO * (gupxy_loc * gxyz(i,j,k) + gupxz_loc * gxzz(i,j,k) + gupyz_loc * gyzz(i,j,k))
enddo
enddo
enddo
!first kind of connection stored in gij,k !first kind of connection stored in gij,k
gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx
@@ -658,187 +572,189 @@
!covariant second derivative of chi respect to tilted metric !covariant second derivative of chi respect to tilted metric
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev) call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
do k=1,ex(3) fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
do j=1,ex(2) fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
do i=1,ex(1) fxz = fxz - Gamxxz * chix - Gamyxz * chiy - Gamzxz * chiz
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k) * chix(i,j,k) - Gamyxx(i,j,k) * chiy(i,j,k) - Gamzxx(i,j,k) * chiz(i,j,k) fyy = fyy - Gamxyy * chix - Gamyyy * chiy - Gamzyy * chiz
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k) * chix(i,j,k) - Gamyxy(i,j,k) * chiy(i,j,k) - Gamzxy(i,j,k) * chiz(i,j,k) fyz = fyz - Gamxyz * chix - Gamyyz * chiy - Gamzyz * chiz
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k) * chix(i,j,k) - Gamyxz(i,j,k) * chiy(i,j,k) - Gamzxz(i,j,k) * chiz(i,j,k) fzz = fzz - Gamxzz * chix - Gamyzz * chiy - Gamzzz * chiz
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k) * chix(i,j,k) - Gamyyy(i,j,k) * chiy(i,j,k) - Gamzyy(i,j,k) * chiz(i,j,k) ! Store D^l D_l chi - 3/(2*chi) D^l chi D_l chi in f
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k) * chix(i,j,k) - Gamyyz(i,j,k) * chiy(i,j,k) - Gamzyz(i,j,k) * chiz(i,j,k)
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k) * chix(i,j,k) - Gamyzz(i,j,k) * chiy(i,j,k) - Gamzzz(i,j,k) * chiz(i,j,k)
chin_loc = chin1(i,j,k) f = gupxx * ( fxx - F3o2/chin1 * chix * chix ) + &
f_loc = gupxx(i,j,k) * (fxx(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chix(i,j,k)) + & gupyy * ( fyy - F3o2/chin1 * chiy * chiy ) + &
gupyy(i,j,k) * (fyy(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiy(i,j,k)) + & gupzz * ( fzz - F3o2/chin1 * chiz * chiz ) + &
gupzz(i,j,k) * (fzz(i,j,k) - F3o2/chin_loc * chiz(i,j,k) * chiz(i,j,k)) + & TWO * gupxy * ( fxy - F3o2/chin1 * chix * chiy ) + &
TWO * gupxy(i,j,k) * (fxy(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiy(i,j,k)) + & TWO * gupxz * ( fxz - F3o2/chin1 * chix * chiz ) + &
TWO * gupxz(i,j,k) * (fxz(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiz(i,j,k)) + & TWO * gupyz * ( fyz - F3o2/chin1 * chiy * chiz )
TWO * gupyz(i,j,k) * (fyz(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiz(i,j,k)) ! Add chi part to Ricci tensor:
f(i,j,k) = f_loc
Rxx(i,j,k) = Rxx(i,j,k) + (fxx(i,j,k) - chix(i,j,k)*chix(i,j,k)/chin_loc/TWO + gxx(i,j,k) * f_loc)/chin_loc/TWO Rxx = Rxx + (fxx - chix*chix/chin1/TWO + gxx * f)/chin1/TWO
Ryy(i,j,k) = Ryy(i,j,k) + (fyy(i,j,k) - chiy(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gyy(i,j,k) * f_loc)/chin_loc/TWO Ryy = Ryy + (fyy - chiy*chiy/chin1/TWO + gyy * f)/chin1/TWO
Rzz(i,j,k) = Rzz(i,j,k) + (fzz(i,j,k) - chiz(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gzz(i,j,k) * f_loc)/chin_loc/TWO Rzz = Rzz + (fzz - chiz*chiz/chin1/TWO + gzz * f)/chin1/TWO
Rxy(i,j,k) = Rxy(i,j,k) + (fxy(i,j,k) - chix(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gxy(i,j,k) * f_loc)/chin_loc/TWO Rxy = Rxy + (fxy - chix*chiy/chin1/TWO + gxy * f)/chin1/TWO
Rxz(i,j,k) = Rxz(i,j,k) + (fxz(i,j,k) - chix(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gxz(i,j,k) * f_loc)/chin_loc/TWO Rxz = Rxz + (fxz - chix*chiz/chin1/TWO + gxz * f)/chin1/TWO
Ryz(i,j,k) = Ryz(i,j,k) + (fyz(i,j,k) - chiy(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gyz(i,j,k) * f_loc)/chin_loc/TWO Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
enddo
enddo
enddo
! covariant second derivatives of the lapse respect to physical metric ! covariant second derivatives of the lapse respect to physical metric
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, & call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
SYM,SYM,SYM,symmetry,Lev) SYM,SYM,SYM,symmetry,Lev)
do k=1,ex(3) gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
do j=1,ex(2) gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
do i=1,ex(1) gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
chin_loc = chin1(i,j,k)
gxxx(i,j,k) = (gupxx(i,j,k) * chix(i,j,k) + gupxy(i,j,k) * chiy(i,j,k) + gupxz(i,j,k) * chiz(i,j,k)) / chin_loc
gxxy(i,j,k) = (gupxy(i,j,k) * chix(i,j,k) + gupyy(i,j,k) * chiy(i,j,k) + gupyz(i,j,k) * chiz(i,j,k)) / chin_loc
gxxz(i,j,k) = (gupxz(i,j,k) * chix(i,j,k) + gupyz(i,j,k) * chiy(i,j,k) + gupzz(i,j,k) * chiz(i,j,k)) / chin_loc
Gamxxx(i,j,k) = Gamxxx(i,j,k) - ( (chix(i,j,k) + chix(i,j,k))/chin_loc - gxx(i,j,k) * gxxx(i,j,k) )*HALF Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
Gamyxx(i,j,k) = Gamyxx(i,j,k) - ( - gxx(i,j,k) * gxxy(i,j,k) )*HALF Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
Gamzxx(i,j,k) = Gamzxx(i,j,k) - ( - gxx(i,j,k) * gxxz(i,j,k) )*HALF Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF
Gamxyy(i,j,k) = Gamxyy(i,j,k) - ( - gyy(i,j,k) * gxxx(i,j,k) )*HALF Gamxyy = Gamxyy - ( - gyy * gxxx )*HALF
Gamyyy(i,j,k) = Gamyyy(i,j,k) - ( (chiy(i,j,k) + chiy(i,j,k))/chin_loc - gyy(i,j,k) * gxxy(i,j,k) )*HALF Gamyyy = Gamyyy - ( (chiy + chiy)/chin1 - gyy * gxxy )*HALF
Gamzyy(i,j,k) = Gamzyy(i,j,k) - ( - gyy(i,j,k) * gxxz(i,j,k) )*HALF Gamzyy = Gamzyy - ( - gyy * gxxz )*HALF
Gamxzz(i,j,k) = Gamxzz(i,j,k) - ( - gzz(i,j,k) * gxxx(i,j,k) )*HALF Gamxzz = Gamxzz - ( - gzz * gxxx )*HALF
Gamyzz(i,j,k) = Gamyzz(i,j,k) - ( - gzz(i,j,k) * gxxy(i,j,k) )*HALF Gamyzz = Gamyzz - ( - gzz * gxxy )*HALF
Gamzzz(i,j,k) = Gamzzz(i,j,k) - ( (chiz(i,j,k) + chiz(i,j,k))/chin_loc - gzz(i,j,k) * gxxz(i,j,k) )*HALF Gamzzz = Gamzzz - ( (chiz + chiz)/chin1 - gzz * gxxz )*HALF
Gamxxy(i,j,k) = Gamxxy(i,j,k) - ( chiy(i,j,k) /chin_loc - gxy(i,j,k) * gxxx(i,j,k) )*HALF Gamxxy = Gamxxy - ( chiy /chin1 - gxy * gxxx )*HALF
Gamyxy(i,j,k) = Gamyxy(i,j,k) - ( chix(i,j,k) /chin_loc - gxy(i,j,k) * gxxy(i,j,k) )*HALF Gamyxy = Gamyxy - ( chix /chin1 - gxy * gxxy )*HALF
Gamzxy(i,j,k) = Gamzxy(i,j,k) - ( - gxy(i,j,k) * gxxz(i,j,k) )*HALF Gamzxy = Gamzxy - ( - gxy * gxxz )*HALF
Gamxxz(i,j,k) = Gamxxz(i,j,k) - ( chiz(i,j,k) /chin_loc - gxz(i,j,k) * gxxx(i,j,k) )*HALF Gamxxz = Gamxxz - ( chiz /chin1 - gxz * gxxx )*HALF
Gamyxz(i,j,k) = Gamyxz(i,j,k) - ( - gxz(i,j,k) * gxxy(i,j,k) )*HALF Gamyxz = Gamyxz - ( - gxz * gxxy )*HALF
Gamzxz(i,j,k) = Gamzxz(i,j,k) - ( chix(i,j,k) /chin_loc - gxz(i,j,k) * gxxz(i,j,k) )*HALF Gamzxz = Gamzxz - ( chix /chin1 - gxz * gxxz )*HALF
Gamxyz(i,j,k) = Gamxyz(i,j,k) - ( - gyz(i,j,k) * gxxx(i,j,k) )*HALF Gamxyz = Gamxyz - ( - gyz * gxxx )*HALF
Gamyyz(i,j,k) = Gamyyz(i,j,k) - ( chiz(i,j,k) /chin_loc - gyz(i,j,k) * gxxy(i,j,k) )*HALF Gamyyz = Gamyyz - ( chiz /chin1 - gyz * gxxy )*HALF
Gamzyz(i,j,k) = Gamzyz(i,j,k) - ( chiy(i,j,k) /chin_loc - gyz(i,j,k) * gxxz(i,j,k) )*HALF Gamzyz = Gamzyz - ( chiy /chin1 - gyz * gxxz )*HALF
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k)*Lapx(i,j,k) - Gamyxx(i,j,k)*Lapy(i,j,k) - Gamzxx(i,j,k)*Lapz(i,j,k) fxx = fxx - Gamxxx*Lapx - Gamyxx*Lapy - Gamzxx*Lapz
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k)*Lapx(i,j,k) - Gamyyy(i,j,k)*Lapy(i,j,k) - Gamzyy(i,j,k)*Lapz(i,j,k) fyy = fyy - Gamxyy*Lapx - Gamyyy*Lapy - Gamzyy*Lapz
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k)*Lapx(i,j,k) - Gamyzz(i,j,k)*Lapy(i,j,k) - Gamzzz(i,j,k)*Lapz(i,j,k) fzz = fzz - Gamxzz*Lapx - Gamyzz*Lapy - Gamzzz*Lapz
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k)*Lapx(i,j,k) - Gamyxy(i,j,k)*Lapy(i,j,k) - Gamzxy(i,j,k)*Lapz(i,j,k) fxy = fxy - Gamxxy*Lapx - Gamyxy*Lapy - Gamzxy*Lapz
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k)*Lapx(i,j,k) - Gamyxz(i,j,k)*Lapy(i,j,k) - Gamzxz(i,j,k)*Lapz(i,j,k) fxz = fxz - Gamxxz*Lapx - Gamyxz*Lapy - Gamzxz*Lapz
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k)*Lapx(i,j,k) - Gamyyz(i,j,k)*Lapy(i,j,k) - Gamzyz(i,j,k)*Lapz(i,j,k) fyz = fyz - Gamxyz*Lapx - Gamyyz*Lapy - Gamzyz*Lapz
trK_rhs(i,j,k) = gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & ! store D^i D_i Lap in trK_rhs upto chi
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) trK_rhs = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
enddo TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz )
enddo #if 1
enddo !! follow bam code
do k=1,ex(3) S = chin1 * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
do j=1,ex(2) TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
do i=1,ex(1) f = F2o3 * trK * trK -(&
divb_loc = div_beta(i,j,k) gupxx * ( &
chin_loc = chin1(i,j,k) gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz) ) + &
gupyy * ( &
gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz) ) + &
gupzz * ( &
gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz) ) + &
TWO * ( &
gupxy * ( &
gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
gupxy * (Axx * Ayy + Axy * Axy) + &
gupxz * (Axx * Ayz + Axz * Axy) + &
gupyz * (Axy * Ayz + Axz * Ayy) ) + &
gupxz * ( &
gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
gupxy * (Axx * Ayz + Axy * Axz) + &
gupxz * (Axx * Azz + Axz * Axz) + &
gupyz * (Axy * Azz + Axz * Ayz) ) + &
gupyz * ( &
gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
gupxy * (Axy * Ayz + Ayy * Axz) + &
gupxz * (Axy * Azz + Ayz * Axz) + &
gupyz * (Ayy * Azz + Ayz * Ayz) ) )) -1.6d1*PI*rho + EIGHT * PI * S
f = - F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + alpn1/chin1*f)
S_loc = chin_loc * ( gupxx(i,j,k) * Sxx(i,j,k) + gupyy(i,j,k) * Syy(i,j,k) + gupzz(i,j,k) * Szz(i,j,k) + & fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
TWO * (gupxy(i,j,k) * Sxy(i,j,k) + gupxz(i,j,k) * Sxz(i,j,k) + gupyz(i,j,k) * Syz(i,j,k)) ) fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
S(i,j,k) = S_loc fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
#else
! Add lapse and S_ij parts to Ricci tensor:
f_loc = F2o3 * trK(i,j,k) * trK(i,j,k) - ( & fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
gupxx(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + & fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + & fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + & fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) ) + & fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
gupyy(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + & fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) ) + &
gupzz(i,j,k) * ( gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + &
TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) ) + &
TWO * ( gupxy(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + &
gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + &
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) ) + &
gupxz(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + &
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) ) + &
gupyz(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + &
gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + &
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k)) ) ) ) - &
F16 * PI * rho(i,j,k) + EIGHT * PI * S_loc
f_loc = -F1o3 * ( gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & ! Compute trace-free part (note: chi^-1 and chi cancel!):
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + &
alpn1(i,j,k)/chin_loc * f_loc )
f(i,j,k) = f_loc
l_fxx = alpn1(i,j,k) * (Rxx(i,j,k) - EIGHT * PI * Sxx(i,j,k)) - fxx(i,j,k) f = F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
l_fxy = alpn1(i,j,k) * (Rxy(i,j,k) - EIGHT * PI * Sxy(i,j,k)) - fxy(i,j,k) TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) )
l_fxz = alpn1(i,j,k) * (Rxz(i,j,k) - EIGHT * PI * Sxz(i,j,k)) - fxz(i,j,k) #endif
l_fyy = alpn1(i,j,k) * (Ryy(i,j,k) - EIGHT * PI * Syy(i,j,k)) - fyy(i,j,k)
l_fyz = alpn1(i,j,k) * (Ryz(i,j,k) - EIGHT * PI * Syz(i,j,k)) - fyz(i,j,k)
l_fzz = alpn1(i,j,k) * (Rzz(i,j,k) - EIGHT * PI * Szz(i,j,k)) - fzz(i,j,k)
Axx_rhs(i,j,k) = l_fxx - gxx(i,j,k) * f_loc Axx_rhs = fxx - gxx * f
Ayy_rhs(i,j,k) = l_fyy - gyy(i,j,k) * f_loc Ayy_rhs = fyy - gyy * f
Azz_rhs(i,j,k) = l_fzz - gzz(i,j,k) * f_loc Azz_rhs = fzz - gzz * f
Axy_rhs(i,j,k) = l_fxy - gxy(i,j,k) * f_loc Axy_rhs = fxy - gxy * f
Axz_rhs(i,j,k) = l_fxz - gxz(i,j,k) * f_loc Axz_rhs = fxz - gxz * f
Ayz_rhs(i,j,k) = l_fyz - gyz(i,j,k) * f_loc Ayz_rhs = fyz - gyz * f
fxx(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + & ! Now: store A_il A^l_j into fij:
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + &
gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k))
fyy(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + &
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k))
fzz(i,j,k) = gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + &
gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k))
fxy(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + &
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k))
fxz(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + &
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k))
fyz(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + &
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + &
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k))
trK_rhs(i,j,k) = chin_loc * trK_rhs(i,j,k) fxx = gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz)
fyy = gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz)
fzz = gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz)
fxy = gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
gupxy *(Axx * Ayy + Axy * Axy) + &
gupxz *(Axx * Ayz + Axz * Axy) + &
gupyz *(Axy * Ayz + Axz * Ayy)
fxz = gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
gupxy *(Axx * Ayz + Axy * Axz) + &
gupxz *(Axx * Azz + Axz * Axz) + &
gupyz *(Axy * Azz + Axz * Ayz)
fyz = gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
gupxy *(Axy * Ayz + Ayy * Axz) + &
gupxz *(Axy * Azz + Ayz * Axz) + &
gupyz *(Ayy * Azz + Ayz * Ayz)
Axx_rhs(i,j,k) = chin_loc * Axx_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axx(i,j,k) - TWO * fxx(i,j,k)) + & f = chin1
TWO * (Axx(i,j,k) * betaxx(i,j,k) + Axy(i,j,k) * betayx(i,j,k) + Axz(i,j,k) * betazx(i,j,k)) - & ! store D^i D_i Lap in trK_rhs
F2o3 * Axx(i,j,k) * divb_loc trK_rhs = f*trK_rhs
Ayy_rhs(i,j,k) = chin_loc * Ayy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayy(i,j,k) - TWO * fyy(i,j,k)) + &
TWO * (Axy(i,j,k) * betaxy(i,j,k) + Ayy(i,j,k) * betayy(i,j,k) + Ayz(i,j,k) * betazy(i,j,k)) - &
F2o3 * Ayy(i,j,k) * divb_loc
Azz_rhs(i,j,k) = chin_loc * Azz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Azz(i,j,k) - TWO * fzz(i,j,k)) + &
TWO * (Axz(i,j,k) * betaxz(i,j,k) + Ayz(i,j,k) * betayz(i,j,k) + Azz(i,j,k) * betazz(i,j,k)) - &
F2o3 * Azz(i,j,k) * divb_loc
Axy_rhs(i,j,k) = chin_loc * Axy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axy(i,j,k) - TWO * fxy(i,j,k)) + &
Axx(i,j,k) * betaxy(i,j,k) + Axz(i,j,k) * betazy(i,j,k) + Ayy(i,j,k) * betayx(i,j,k) + &
Ayz(i,j,k) * betazx(i,j,k) + F1o3 * Axy(i,j,k) * divb_loc - Axy(i,j,k) * betazz(i,j,k)
Ayz_rhs(i,j,k) = chin_loc * Ayz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayz(i,j,k) - TWO * fyz(i,j,k)) + &
Axy(i,j,k) * betaxz(i,j,k) + Ayy(i,j,k) * betayz(i,j,k) + Axz(i,j,k) * betaxy(i,j,k) + &
Azz(i,j,k) * betazy(i,j,k) + F1o3 * Ayz(i,j,k) * divb_loc - Ayz(i,j,k) * betaxx(i,j,k)
Axz_rhs(i,j,k) = chin_loc * Axz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axz(i,j,k) - TWO * fxz(i,j,k)) + &
Axx(i,j,k) * betaxz(i,j,k) + Axy(i,j,k) * betayz(i,j,k) + Ayz(i,j,k) * betayx(i,j,k) + &
Azz(i,j,k) * betazx(i,j,k) + F1o3 * Axz(i,j,k) * divb_loc - Axz(i,j,k) * betayy(i,j,k)
trK_rhs(i,j,k) = - trK_rhs(i,j,k) + alpn1(i,j,k) * ( F1o3 * trK(i,j,k) * trK(i,j,k) + & Axx_rhs = f * Axx_rhs+ alpn1 * (trK * Axx - TWO * fxx) + &
gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & TWO * ( Axx * betaxx + Axy * betayx + Axz * betazx )- &
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + & F2o3 * Axx * div_beta
FOUR * PI * (rho(i,j,k) + S_loc) )
enddo Ayy_rhs = f * Ayy_rhs+ alpn1 * (trK * Ayy - TWO * fyy) + &
enddo TWO * ( Axy * betaxy + Ayy * betayy + Ayz * betazy )- &
enddo F2o3 * Ayy * div_beta
Azz_rhs = f * Azz_rhs+ alpn1 * (trK * Azz - TWO * fzz) + &
TWO * ( Axz * betaxz + Ayz * betayz + Azz * betazz )- &
F2o3 * Azz * div_beta
Axy_rhs = f * Axy_rhs+ alpn1 *( trK * Axy - TWO * fxy )+ &
Axx * betaxy + Axz * betazy + &
Ayy * betayx + Ayz * betazx + &
F1o3 * Axy * div_beta - Axy * betazz
Ayz_rhs = f * Ayz_rhs+ alpn1 *( trK * Ayz - TWO * fyz )+ &
Axy * betaxz + Ayy * betayz + &
Axz * betaxy + Azz * betazy + &
F1o3 * Ayz * div_beta - Ayz * betaxx
Axz_rhs = f * Axz_rhs+ alpn1 *( trK * Axz - TWO * fxz )+ &
Axx * betaxz + Axy * betayz + &
Ayz * betayx + Azz * betazx + &
F1o3 * Axz * div_beta - Axz * betayy !rhs for Aij
! Compute trace of S_ij
S = f * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
trK_rhs = - trK_rhs + alpn1 *( F1o3 * trK * trK + &
gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO * ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + &
FOUR * PI * ( rho + S )) !rhs for trK
!!!! gauge variable part !!!! gauge variable part
@@ -997,60 +913,103 @@
SSA(2)=SYM SSA(2)=SYM
SSA(3)=ANTI SSA(3)=ANTI
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency) !!!!!!!!!advection term part
! lopsided_kodis shares the symmetry_bd buffer between advection and
! dissipation, eliminating redundant full-grid copies. For metric variables
! gxx/gyy/gzz (=dxx/dyy/dzz+1): stencil coefficients sum to zero,
! so the constant offset has no effect on dissipation.
call lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps) call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS)
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps) call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA)
call lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps) call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA)
call lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps) call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS)
call lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps) call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA)
call lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps) call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA)
call lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps) call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps) call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps) call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA)
!!
#if 1
!! bam does not apply dissipation on gauge variables
call lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
call lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
#endif
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
#endif
#else
! No dissipation on gauge variables (advection only)
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) #if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif #endif
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) #if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif #endif
if(eps>0)then
! usual Kreiss-Oliger dissipation
call kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
#define k 40
if(Lev == 1)then
write(*,*) X(i),Y(j),Z(k)
write(*,*) "before",Axx_rhs(i,j,k)
endif
#undef i
#undef j
#undef k
!!stop
#endif #endif
call kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
#define k 40
if(Lev == 1)then
write(*,*) X(i),Y(j),Z(k)
write(*,*) "after",Axx_rhs(i,j,k)
endif
#undef i
#undef j
#undef k
!!stop
#endif
call kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps)
#if 1
!! bam does not apply dissipation on gauge variables
call kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps)
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps)
#endif
#endif
endif
if(co == 0)then if(co == 0)then
! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho ! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho

View File

@@ -32,19 +32,6 @@
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_ #define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
#define f_compute_constraint_fr compute_constraint_fr_ #define f_compute_constraint_fr compute_constraint_fr_
#endif #endif
#ifdef __cplusplus
extern "C"
{
#endif
void f_bssn_rhs_kernel_timing_reset();
int f_bssn_rhs_kernel_timing_bucket_count();
const double *f_bssn_rhs_kernel_timing_local_seconds();
const char *f_bssn_rhs_kernel_timing_label(int);
#ifdef __cplusplus
}
#endif
extern "C" extern "C"
{ {
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z

View File

@@ -130,11 +130,7 @@ void cgh::compose_cgh(int nprocs)
for (int lev = 0; lev < levels; lev++) for (int lev = 0; lev < levels; lev++)
{ {
checkPatchList(PatL[lev], false); checkPatchList(PatL[lev], false);
#ifdef INTERP_LB_OPTIMIZE
Parallel::distribute_optimize(PatL[lev], nprocs, ingfs, fngfs, false);
#else
Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false); Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false);
#endif
#if (RPB == 1) #if (RPB == 1)
// we need distributed box of PatL[lev] and PatL[lev-1] // we need distributed box of PatL[lev] and PatL[lev-1]
if (lev > 0) if (lev > 0)
@@ -1305,13 +1301,13 @@ bool cgh::Interp_One_Point(MyList<var> *VarList,
} }
bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, void cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor) monitor *ErrorMonitor)
{ {
if (lev < movls) if (lev < movls)
return false; return;
#if (0) #if (0)
// #if (PSTR == 1 || PSTR == 2) // #if (PSTR == 1 || PSTR == 2)
@@ -1400,7 +1396,7 @@ bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, do
for (bhi = 0; bhi < BH_num; bhi++) for (bhi = 0; bhi < BH_num; bhi++)
delete[] tmpPorg[bhi]; delete[] tmpPorg[bhi];
delete[] tmpPorg; delete[] tmpPorg;
return false; return;
} }
// x direction // x direction
rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX;
@@ -1504,7 +1500,6 @@ bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, do
for (int bhi = 0; bhi < BH_num; bhi++) for (int bhi = 0; bhi < BH_num; bhi++)
delete[] tmpPorg[bhi]; delete[] tmpPorg[bhi];
delete[] tmpPorg; delete[] tmpPorg;
return tot_flag;
} }

View File

@@ -74,7 +74,7 @@ public:
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB); int Symmetry, bool BB);
bool Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);

View File

@@ -69,8 +69,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -373,8 +371,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1

View File

@@ -1513,7 +1513,6 @@
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
real*8, dimension(3) :: SoA real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
integer :: i_core_min,i_core_max,j_core_min,j_core_max,k_core_min,k_core_max
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
@@ -1566,47 +1565,9 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
i_core_min = max(1, imin+2)
i_core_max = min(ex(1), imax-2)
j_core_min = max(1, jmin+2)
j_core_max = min(ex(2), jmax-2)
k_core_min = max(1, kmin+2)
k_core_max = min(ex(3), kmax-2)
if(i_core_min <= i_core_max .and. j_core_min <= j_core_max .and. k_core_min <= k_core_max)then
do k=k_core_min,k_core_max
do j=j_core_min,j_core_max
do i=i_core_min,i_core_max
! interior points always use 4th-order stencils without branch checks
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
enddo
enddo
enddo
endif
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
if(i>=i_core_min .and. i<=i_core_max .and. &
j>=j_core_min .and. j<=j_core_max .and. &
k>=k_core_min .and. k<=k_core_max) cycle
!~~~~~~ fxx !~~~~~~ fxx
if(i+2 <= imax .and. i-2 >= imin)then if(i+2 <= imax .and. i-2 >= imin)then
! !

Some files were not shown because too many files have changed in this diff Show More