Compare commits
9 Commits
legacy
...
cjy-vitali
| Author | SHA1 | Date | |
|---|---|---|---|
| 0db537479b | |||
| 1f3fd264c0 | |||
| 442674cedc | |||
| 8d28c29a91 | |||
| 0cf58176d9 | |||
| 0f1d0de1e7 | |||
| b57d80ca61 | |||
| 9cd3741a90 | |||
| ac82ebd889 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,6 +1,6 @@
|
|||||||
__pycache__
|
__pycache__
|
||||||
GW150914
|
GW150914
|
||||||
GW150914-origin
|
GW150914*
|
||||||
docs
|
docs
|
||||||
*.tmp
|
*.tmp
|
||||||
|
.codex
|
||||||
@@ -177,6 +177,9 @@ print( " AMSS-NCKU macro file macrodef.h has been generated. " )
|
|||||||
generate_macrodef.generate_macrodef_fh()
|
generate_macrodef.generate_macrodef_fh()
|
||||||
print( " AMSS-NCKU macro file macrodef.fh has been generated. " )
|
print( " AMSS-NCKU macro file macrodef.fh has been generated. " )
|
||||||
|
|
||||||
|
generate_macrodef.generate_build_config()
|
||||||
|
print( " AMSS-NCKU build config AMSS_NCKU_build.mk has been generated. " )
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
@@ -219,9 +222,11 @@ shutil.copytree(AMSS_NCKU_source_path, AMSS_NCKU_source_copy)
|
|||||||
|
|
||||||
macrodef_h_path = os.path.join(File_directory, "macrodef.h")
|
macrodef_h_path = os.path.join(File_directory, "macrodef.h")
|
||||||
macrodef_fh_path = os.path.join(File_directory, "macrodef.fh")
|
macrodef_fh_path = os.path.join(File_directory, "macrodef.fh")
|
||||||
|
build_config_path = os.path.join(File_directory, "AMSS_NCKU_build.mk")
|
||||||
|
|
||||||
shutil.copy2(macrodef_h_path, AMSS_NCKU_source_copy)
|
shutil.copy2(macrodef_h_path, AMSS_NCKU_source_copy)
|
||||||
shutil.copy2(macrodef_fh_path, AMSS_NCKU_source_copy)
|
shutil.copy2(macrodef_fh_path, AMSS_NCKU_source_copy)
|
||||||
|
shutil.copy2(build_config_path, AMSS_NCKU_source_copy)
|
||||||
|
|
||||||
# Notes on copying files:
|
# Notes on copying files:
|
||||||
# shutil.copy2 preserves file metadata such as modification time.
|
# shutil.copy2 preserves file metadata such as modification time.
|
||||||
|
|||||||
@@ -9,6 +9,11 @@ Verification Requirements:
|
|||||||
- Y Component RMS
|
- Y Component RMS
|
||||||
- Z Component RMS
|
- Z Component RMS
|
||||||
2. ADM constraint violation < 2 (Grid Level 0)
|
2. ADM constraint violation < 2 (Grid Level 0)
|
||||||
|
3. The following figure PDFs must match GW150914-origin exactly after rasterization:
|
||||||
|
- ADM_Constraint_Grid_Level_0.pdf
|
||||||
|
- BH_Trajectory_21_XY.pdf
|
||||||
|
- BH_Trajectory_XY.pdf
|
||||||
|
The script also reports the percentage of differing pixels for each figure.
|
||||||
|
|
||||||
RMS Calculation Method:
|
RMS Calculation Method:
|
||||||
- Computes trajectory deviation on the XY plane independently for BH1 and BH2
|
- Computes trajectory deviation on the XY plane independently for BH1 and BH2
|
||||||
@@ -23,6 +28,10 @@ Reference: GW150914-origin (baseline simulation)
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
# ANSI Color Codes
|
# ANSI Color Codes
|
||||||
class Color:
|
class Color:
|
||||||
@@ -61,6 +70,132 @@ def load_constraint_data(filepath):
|
|||||||
data.append([float(x) for x in parts[:8]])
|
data.append([float(x) for x in parts[:8]])
|
||||||
return np.array(data)
|
return np.array(data)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_figure_dir(path):
|
||||||
|
"""Resolve the sibling figure directory from an output or figure path."""
|
||||||
|
normalized = os.path.normpath(path)
|
||||||
|
if os.path.basename(normalized) == "figure":
|
||||||
|
return normalized
|
||||||
|
return os.path.join(os.path.dirname(normalized), "figure")
|
||||||
|
|
||||||
|
|
||||||
|
def render_pdf_to_images(pdf_path, dpi=150):
|
||||||
|
"""Render a PDF to RGB images using Ghostscript."""
|
||||||
|
gs_path = shutil.which("gs")
|
||||||
|
if gs_path is None:
|
||||||
|
raise RuntimeError("Ghostscript executable 'gs' was not found in PATH")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory(prefix="amss_verify_pdf_") as temp_dir:
|
||||||
|
output_pattern = os.path.join(temp_dir, "page-%03d.ppm")
|
||||||
|
cmd = [
|
||||||
|
gs_path,
|
||||||
|
"-q",
|
||||||
|
"-dSAFER",
|
||||||
|
"-dBATCH",
|
||||||
|
"-dNOPAUSE",
|
||||||
|
"-sDEVICE=ppmraw",
|
||||||
|
f"-r{dpi}",
|
||||||
|
f"-o{output_pattern}",
|
||||||
|
pdf_path
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
message = exc.stderr.strip() or str(exc)
|
||||||
|
raise RuntimeError(f"Failed to render PDF '{pdf_path}': {message}") from exc
|
||||||
|
|
||||||
|
ppm_files = sorted(
|
||||||
|
os.path.join(temp_dir, filename)
|
||||||
|
for filename in os.listdir(temp_dir)
|
||||||
|
if filename.endswith(".ppm")
|
||||||
|
)
|
||||||
|
|
||||||
|
if not ppm_files:
|
||||||
|
raise RuntimeError(f"No rendered pages were produced for '{pdf_path}'")
|
||||||
|
|
||||||
|
images = []
|
||||||
|
for ppm_file in ppm_files:
|
||||||
|
with Image.open(ppm_file) as img:
|
||||||
|
images.append(np.array(img.convert("RGB"), dtype=np.uint8))
|
||||||
|
|
||||||
|
return images
|
||||||
|
|
||||||
|
|
||||||
|
def compare_rendered_pages(ref_img, target_img):
|
||||||
|
"""Return (different_pixels, total_pixels) for two rendered RGB pages."""
|
||||||
|
ref_h, ref_w = ref_img.shape[:2]
|
||||||
|
tgt_h, tgt_w = target_img.shape[:2]
|
||||||
|
total_pixels = max(ref_h, tgt_h) * max(ref_w, tgt_w)
|
||||||
|
|
||||||
|
if ref_h == tgt_h and ref_w == tgt_w:
|
||||||
|
different_pixels = int(np.count_nonzero(np.any(ref_img != target_img, axis=2)))
|
||||||
|
return different_pixels, total_pixels
|
||||||
|
|
||||||
|
diff_mask = np.ones((max(ref_h, tgt_h), max(ref_w, tgt_w)), dtype=bool)
|
||||||
|
overlap_h = min(ref_h, tgt_h)
|
||||||
|
overlap_w = min(ref_w, tgt_w)
|
||||||
|
overlap_diff = np.any(ref_img[:overlap_h, :overlap_w] != target_img[:overlap_h, :overlap_w], axis=2)
|
||||||
|
diff_mask[:overlap_h, :overlap_w] = overlap_diff
|
||||||
|
different_pixels = int(np.count_nonzero(diff_mask))
|
||||||
|
return different_pixels, total_pixels
|
||||||
|
|
||||||
|
|
||||||
|
def compare_pdf_images(ref_pdf, target_pdf, dpi=150, threshold_percent=0.001):
|
||||||
|
"""Compare two PDFs by rasterizing them and counting differing pixels."""
|
||||||
|
ref_pages = render_pdf_to_images(ref_pdf, dpi=dpi)
|
||||||
|
target_pages = render_pdf_to_images(target_pdf, dpi=dpi)
|
||||||
|
|
||||||
|
total_pixels = 0
|
||||||
|
different_pixels = 0
|
||||||
|
max_pages = max(len(ref_pages), len(target_pages))
|
||||||
|
|
||||||
|
for page_idx in range(max_pages):
|
||||||
|
if page_idx < len(ref_pages) and page_idx < len(target_pages):
|
||||||
|
page_diff, page_total = compare_rendered_pages(ref_pages[page_idx], target_pages[page_idx])
|
||||||
|
else:
|
||||||
|
existing_page = ref_pages[page_idx] if page_idx < len(ref_pages) else target_pages[page_idx]
|
||||||
|
page_total = existing_page.shape[0] * existing_page.shape[1]
|
||||||
|
page_diff = page_total
|
||||||
|
|
||||||
|
total_pixels += page_total
|
||||||
|
different_pixels += page_diff
|
||||||
|
|
||||||
|
diff_percent = (different_pixels / total_pixels * 100.0) if total_pixels else 0.0
|
||||||
|
return {
|
||||||
|
"different_pixels": different_pixels,
|
||||||
|
"total_pixels": total_pixels,
|
||||||
|
"diff_percent": diff_percent,
|
||||||
|
"pages_ref": len(ref_pages),
|
||||||
|
"pages_target": len(target_pages),
|
||||||
|
"passed": diff_percent < threshold_percent
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compare_required_figures(reference_figure_dir, target_figure_dir):
|
||||||
|
"""Compare the required GW150914 figure PDFs."""
|
||||||
|
figure_names = [
|
||||||
|
"ADM_Constraint_Grid_Level_0.pdf",
|
||||||
|
"BH_Trajectory_21_XY.pdf",
|
||||||
|
"BH_Trajectory_XY.pdf"
|
||||||
|
]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for figure_name in figure_names:
|
||||||
|
ref_pdf = os.path.join(reference_figure_dir, figure_name)
|
||||||
|
target_pdf = os.path.join(target_figure_dir, figure_name)
|
||||||
|
|
||||||
|
if not os.path.exists(ref_pdf):
|
||||||
|
raise FileNotFoundError(f"Reference figure not found: {ref_pdf}")
|
||||||
|
if not os.path.exists(target_pdf):
|
||||||
|
raise FileNotFoundError(f"Target figure not found: {target_pdf}")
|
||||||
|
|
||||||
|
comparison = compare_pdf_images(ref_pdf, target_pdf)
|
||||||
|
comparison["name"] = figure_name
|
||||||
|
results.append(comparison)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
def calculate_all_rms_errors(bh_data_ref, bh_data_target):
|
def calculate_all_rms_errors(bh_data_ref, bh_data_target):
|
||||||
"""
|
"""
|
||||||
Calculate 3D Vector RMS and component-wise RMS (X, Y, Z) independently.
|
Calculate 3D Vector RMS and component-wise RMS (X, Y, Z) independently.
|
||||||
@@ -184,18 +319,45 @@ def print_constraint_results(results, threshold=2.0):
|
|||||||
return passed
|
return passed
|
||||||
|
|
||||||
|
|
||||||
def print_summary(rms_passed, constraint_passed):
|
def print_figure_results(results, threshold_percent=0.001):
|
||||||
|
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
|
||||||
|
print("-" * 65)
|
||||||
|
print(f" Requirement: < {threshold_percent:.3f}% differing pixels\n")
|
||||||
|
|
||||||
|
all_passed = True
|
||||||
|
for result in results:
|
||||||
|
passed = result["passed"]
|
||||||
|
all_passed = all_passed and passed
|
||||||
|
status = get_status_text(passed)
|
||||||
|
print(f" {result['name']:32}: {result['diff_percent']:10.6f}% | Status: {status}")
|
||||||
|
|
||||||
|
if result["pages_ref"] != result["pages_target"]:
|
||||||
|
print(f" {'':32} pages(ref/target): {result['pages_ref']}/{result['pages_target']}")
|
||||||
|
|
||||||
|
return all_passed
|
||||||
|
|
||||||
|
|
||||||
|
def print_figure_error(error_message):
|
||||||
|
print(f"\n{Color.BOLD}3. Figure Pixel Comparison (PDF Rasterization){Color.RESET}")
|
||||||
|
print("-" * 65)
|
||||||
|
print(f" {Color.RED}Error: {error_message}{Color.RESET}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def print_summary(rms_passed, constraint_passed, figure_passed):
|
||||||
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||||
print(Color.BOLD + "Verification Summary" + Color.RESET)
|
print(Color.BOLD + "Verification Summary" + Color.RESET)
|
||||||
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
|
||||||
|
|
||||||
all_passed = rms_passed and constraint_passed
|
all_passed = rms_passed and constraint_passed and figure_passed
|
||||||
|
|
||||||
res_rms = get_status_text(rms_passed)
|
res_rms = get_status_text(rms_passed)
|
||||||
res_con = get_status_text(constraint_passed)
|
res_con = get_status_text(constraint_passed)
|
||||||
|
res_fig = get_status_text(figure_passed)
|
||||||
|
|
||||||
print(f" [1] Comprehensive RMS check: {res_rms}")
|
print(f" [1] Comprehensive RMS check: {res_rms}")
|
||||||
print(f" [2] ADM constraint check: {res_con}")
|
print(f" [2] ADM constraint check: {res_con}")
|
||||||
|
print(f" [3] Figure pixel comparison: {res_fig}")
|
||||||
|
|
||||||
final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
|
final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
|
||||||
print(f"\n Overall result: {final_status}")
|
print(f"\n Overall result: {final_status}")
|
||||||
@@ -212,6 +374,8 @@ def main():
|
|||||||
|
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
|
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
|
||||||
|
target_figure_dir = resolve_figure_dir(target_dir)
|
||||||
|
reference_figure_dir = os.path.join(script_dir, "GW150914-origin/figure")
|
||||||
|
|
||||||
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
|
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
|
||||||
bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
|
bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
|
||||||
@@ -230,6 +394,8 @@ def main():
|
|||||||
print_header()
|
print_header()
|
||||||
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
|
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
|
||||||
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
|
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
|
||||||
|
print(f"{Color.BOLD}Reference Figures: {Color.RESET} {Color.BLUE}{reference_figure_dir}{Color.RESET}")
|
||||||
|
print(f"{Color.BOLD}Target Figures: {Color.RESET} {Color.BLUE}{target_figure_dir}{Color.RESET}")
|
||||||
|
|
||||||
bh_data_ref = load_bh_trajectory(bh_file_ref)
|
bh_data_ref = load_bh_trajectory(bh_file_ref)
|
||||||
bh_data_target = load_bh_trajectory(bh_file_target)
|
bh_data_target = load_bh_trajectory(bh_file_target)
|
||||||
@@ -243,7 +409,13 @@ def main():
|
|||||||
constraint_results = analyze_constraint_violation(constraint_data)
|
constraint_results = analyze_constraint_violation(constraint_data)
|
||||||
constraint_passed = print_constraint_results(constraint_results)
|
constraint_passed = print_constraint_results(constraint_results)
|
||||||
|
|
||||||
all_passed = print_summary(rms_passed, constraint_passed)
|
try:
|
||||||
|
figure_results = compare_required_figures(reference_figure_dir, target_figure_dir)
|
||||||
|
figure_passed = print_figure_results(figure_results)
|
||||||
|
except (FileNotFoundError, RuntimeError) as exc:
|
||||||
|
figure_passed = print_figure_error(str(exc))
|
||||||
|
|
||||||
|
all_passed = print_summary(rms_passed, constraint_passed, figure_passed)
|
||||||
sys.exit(0 if all_passed else 1)
|
sys.exit(0 if all_passed else 1)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -37,56 +37,51 @@ close(77)
|
|||||||
end program checkFFT
|
end program checkFFT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
!-------------
|
||||||
|
! Optimized FFT using Intel oneMKL DFTI
|
||||||
|
! Mathematical equivalence: Standard DFT definition
|
||||||
|
! Forward (isign=1): X[k] = sum_{n=0}^{N-1} x[n] * exp(-2*pi*i*k*n/N)
|
||||||
|
! Backward (isign=-1): X[k] = sum_{n=0}^{N-1} x[n] * exp(+2*pi*i*k*n/N)
|
||||||
|
! Input/Output: dataa is interleaved complex array [Re(0),Im(0),Re(1),Im(1),...]
|
||||||
|
!-------------
|
||||||
SUBROUTINE four1(dataa,nn,isign)
|
SUBROUTINE four1(dataa,nn,isign)
|
||||||
|
use MKL_DFTI
|
||||||
implicit none
|
implicit none
|
||||||
INTEGER::isign,nn
|
INTEGER, intent(in) :: isign, nn
|
||||||
double precision,dimension(2*nn)::dataa
|
DOUBLE PRECISION, dimension(2*nn), intent(inout) :: dataa
|
||||||
INTEGER::i,istep,j,m,mmax,n
|
|
||||||
double precision::tempi,tempr
|
type(DFTI_DESCRIPTOR), pointer :: desc
|
||||||
DOUBLE PRECISION::theta,wi,wpi,wpr,wr,wtemp
|
integer :: status
|
||||||
n=2*nn
|
|
||||||
j=1
|
! Create DFTI descriptor for 1D complex-to-complex transform
|
||||||
do i=1,n,2
|
status = DftiCreateDescriptor(desc, DFTI_DOUBLE, DFTI_COMPLEX, 1, nn)
|
||||||
if(j.gt.i)then
|
if (status /= 0) return
|
||||||
tempr=dataa(j)
|
|
||||||
tempi=dataa(j+1)
|
! Set input/output storage as interleaved complex (default)
|
||||||
dataa(j)=dataa(i)
|
status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_INPLACE)
|
||||||
dataa(j+1)=dataa(i+1)
|
if (status /= 0) then
|
||||||
dataa(i)=tempr
|
status = DftiFreeDescriptor(desc)
|
||||||
dataa(i+1)=tempi
|
return
|
||||||
endif
|
|
||||||
m=nn
|
|
||||||
1 if ((m.ge.2).and.(j.gt.m)) then
|
|
||||||
j=j-m
|
|
||||||
m=m/2
|
|
||||||
goto 1
|
|
||||||
endif
|
|
||||||
j=j+m
|
|
||||||
enddo
|
|
||||||
mmax=2
|
|
||||||
2 if (n.gt.mmax) then
|
|
||||||
istep=2*mmax
|
|
||||||
theta=6.28318530717959d0/(isign*mmax)
|
|
||||||
wpr=-2.d0*sin(0.5d0*theta)**2
|
|
||||||
wpi=sin(theta)
|
|
||||||
wr=1.d0
|
|
||||||
wi=0.d0
|
|
||||||
do m=1,mmax,2
|
|
||||||
do i=m,n,istep
|
|
||||||
j=i+mmax
|
|
||||||
tempr=sngl(wr)*dataa(j)-sngl(wi)*dataa(j+1)
|
|
||||||
tempi=sngl(wr)*dataa(j+1)+sngl(wi)*dataa(j)
|
|
||||||
dataa(j)=dataa(i)-tempr
|
|
||||||
dataa(j+1)=dataa(i+1)-tempi
|
|
||||||
dataa(i)=dataa(i)+tempr
|
|
||||||
dataa(i+1)=dataa(i+1)+tempi
|
|
||||||
enddo
|
|
||||||
wtemp=wr
|
|
||||||
wr=wr*wpr-wi*wpi+wr
|
|
||||||
wi=wi*wpr+wtemp*wpi+wi
|
|
||||||
enddo
|
|
||||||
mmax=istep
|
|
||||||
goto 2
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
! Commit the descriptor
|
||||||
|
status = DftiCommitDescriptor(desc)
|
||||||
|
if (status /= 0) then
|
||||||
|
status = DftiFreeDescriptor(desc)
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Execute FFT based on direction
|
||||||
|
if (isign == 1) then
|
||||||
|
! Forward FFT: exp(-2*pi*i*k*n/N)
|
||||||
|
status = DftiComputeForward(desc, dataa)
|
||||||
|
else
|
||||||
|
! Backward FFT: exp(+2*pi*i*k*n/N)
|
||||||
|
status = DftiComputeBackward(desc, dataa)
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Free descriptor
|
||||||
|
status = DftiFreeDescriptor(desc)
|
||||||
|
|
||||||
return
|
return
|
||||||
END SUBROUTINE four1
|
END SUBROUTINE four1
|
||||||
|
|||||||
@@ -5,6 +5,42 @@
|
|||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "parameters.h"
|
#include "parameters.h"
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
enum { MAX_DATA_PACKER_VARS = 64 };
|
||||||
|
|
||||||
|
int expand_var_list_pack_info(MyList<var> *src_list, MyList<var> *dst_list,
|
||||||
|
int *src_sgfn, int *dst_sgfn, double **src_soa)
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
MyList<var> *src_it = src_list;
|
||||||
|
MyList<var> *dst_it = dst_list;
|
||||||
|
|
||||||
|
while (src_it && dst_it)
|
||||||
|
{
|
||||||
|
if (count >= MAX_DATA_PACKER_VARS)
|
||||||
|
{
|
||||||
|
cout << "Parallel::data_packer: too many variables in communication list." << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
src_sgfn[count] = src_it->data->sgfn;
|
||||||
|
dst_sgfn[count] = dst_it->data->sgfn;
|
||||||
|
src_soa[count] = src_it->data->SoA;
|
||||||
|
count++;
|
||||||
|
src_it = src_it->next;
|
||||||
|
dst_it = dst_it->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src_it || dst_it)
|
||||||
|
{
|
||||||
|
cout << "error in short data packer, var lists does not match." << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
|
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
|
||||||
{
|
{
|
||||||
nx = Mymax(1, shape / min_width);
|
nx = Mymax(1, shape / min_width);
|
||||||
@@ -3730,21 +3766,10 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
|
|||||||
if (!src || !dst)
|
if (!src || !dst)
|
||||||
return size_out;
|
return size_out;
|
||||||
|
|
||||||
MyList<var> *varls, *varld;
|
int src_sgfn[MAX_DATA_PACKER_VARS];
|
||||||
|
int dst_sgfn[MAX_DATA_PACKER_VARS];
|
||||||
varls = VarLists;
|
double *src_soa[MAX_DATA_PACKER_VARS];
|
||||||
varld = VarListd;
|
const int var_count = expand_var_list_pack_info(VarLists, VarListd, src_sgfn, dst_sgfn, src_soa);
|
||||||
while (varls && varld)
|
|
||||||
{
|
|
||||||
varls = varls->next;
|
|
||||||
varld = varld->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (varls || varld)
|
|
||||||
{
|
|
||||||
cout << "error in short data packer, var lists does not match." << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int type; /* 1 copy, 2 restrict, 3 prolong */
|
int type; /* 1 copy, 2 restrict, 3 prolong */
|
||||||
if (src->data->Bg->lev == dst->data->Bg->lev)
|
if (src->data->Bg->lev == dst->data->Bg->lev)
|
||||||
@@ -3756,43 +3781,57 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
|
|||||||
|
|
||||||
while (src && dst)
|
while (src && dst)
|
||||||
{
|
{
|
||||||
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
|
const bool rank_match =
|
||||||
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank))
|
(dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
|
||||||
|
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank);
|
||||||
|
|
||||||
|
if (rank_match)
|
||||||
{
|
{
|
||||||
varls = VarLists;
|
const int segment_size = dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2];
|
||||||
varld = VarListd;
|
int offset = size_out;
|
||||||
while (varls && varld)
|
|
||||||
|
if (data)
|
||||||
{
|
{
|
||||||
if (data)
|
if (dir == PACK)
|
||||||
{
|
{
|
||||||
if (dir == PACK)
|
switch (type)
|
||||||
switch (type)
|
{
|
||||||
{
|
|
||||||
// attention must be paied to the difference between src's llb,uub and dst's llb,uub
|
|
||||||
case 1:
|
case 1:
|
||||||
f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
|
||||||
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + offset,
|
||||||
dst->data->llb, dst->data->uub);
|
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape,
|
||||||
|
src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
|
||||||
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + offset,
|
||||||
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
|
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape,
|
||||||
|
src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub,
|
||||||
|
src_soa[iv], Symmetry);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
|
||||||
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape,
|
||||||
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
|
src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub,
|
||||||
}
|
dst->data->shape, data + offset, dst->data->llb, dst->data->uub,
|
||||||
if (dir == UNPACK) // from target data to corresponding grid
|
src_soa[iv], Symmetry);
|
||||||
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
|
break;
|
||||||
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
default:
|
||||||
dst->data->llb, dst->data->uub);
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
|
||||||
|
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape,
|
||||||
|
dst->data->Bg->fgfs[dst_sgfn[iv]], dst->data->llb, dst->data->uub,
|
||||||
|
dst->data->shape, data + offset, dst->data->llb, dst->data->uub);
|
||||||
}
|
}
|
||||||
size_out += dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2];
|
|
||||||
varls = varls->next;
|
|
||||||
varld = varld->next;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_out = offset + ((!data) ? segment_size * var_count : 0);
|
||||||
|
if (data)
|
||||||
|
size_out = offset;
|
||||||
}
|
}
|
||||||
dst = dst->next;
|
dst = dst->next;
|
||||||
src = src->next;
|
src = src->next;
|
||||||
@@ -3819,21 +3858,10 @@ int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyLis
|
|||||||
if (!src || !dst)
|
if (!src || !dst)
|
||||||
return size_out;
|
return size_out;
|
||||||
|
|
||||||
MyList<var> *varls, *varld;
|
int src_sgfn[MAX_DATA_PACKER_VARS];
|
||||||
|
int dst_sgfn[MAX_DATA_PACKER_VARS];
|
||||||
varls = VarLists;
|
double *src_soa[MAX_DATA_PACKER_VARS];
|
||||||
varld = VarListd;
|
const int var_count = expand_var_list_pack_info(VarLists, VarListd, src_sgfn, dst_sgfn, src_soa);
|
||||||
while (varls && varld)
|
|
||||||
{
|
|
||||||
varls = varls->next;
|
|
||||||
varld = varld->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (varls || varld)
|
|
||||||
{
|
|
||||||
cout << "error in short data packer, var lists does not match." << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int type; /* 1 copy, 2 restrict, 3 prolong */
|
int type; /* 1 copy, 2 restrict, 3 prolong */
|
||||||
if (src->data->Bg->lev == dst->data->Bg->lev)
|
if (src->data->Bg->lev == dst->data->Bg->lev)
|
||||||
@@ -3851,30 +3879,41 @@ int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyLis
|
|||||||
|
|
||||||
while (src && dst)
|
while (src && dst)
|
||||||
{
|
{
|
||||||
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
|
const bool rank_match =
|
||||||
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank))
|
(dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
|
||||||
|
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank);
|
||||||
|
|
||||||
|
if (rank_match)
|
||||||
{
|
{
|
||||||
varls = VarLists;
|
const int segment_size =
|
||||||
varld = VarListd;
|
(src->data->shape[0] + 2 * ghost_width) *
|
||||||
while (varls && varld)
|
(src->data->shape[1] + 2 * ghost_width) *
|
||||||
|
(src->data->shape[2] + 2 * ghost_width);
|
||||||
|
int offset = size_out;
|
||||||
|
|
||||||
|
if (data)
|
||||||
{
|
{
|
||||||
if (data)
|
if (dir == PACK)
|
||||||
{
|
{
|
||||||
if (dir == PACK)
|
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
|
||||||
f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape,
|
||||||
dst->data->llb, dst->data->uub, src->data->shape, data + size_out,
|
src->data->Bg->fgfs[src_sgfn[iv]], dst->data->llb, dst->data->uub,
|
||||||
src->data->llb, src->data->uub, varls->data->SoA, Symmetry);
|
src->data->shape, data + offset, src->data->llb, src->data->uub,
|
||||||
if (dir == UNPACK) // from target data to corresponding grid
|
src_soa[iv], Symmetry);
|
||||||
f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
|
}
|
||||||
src->data->llb, src->data->uub, src->data->shape, data + size_out,
|
else
|
||||||
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry, dst->data->illb, dst->data->iuub);
|
{
|
||||||
|
for (int iv = 0; iv < var_count; iv++, offset += segment_size)
|
||||||
|
f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape,
|
||||||
|
dst->data->Bg->fgfs[dst_sgfn[iv]], src->data->llb, src->data->uub,
|
||||||
|
src->data->shape, data + offset, dst->data->llb, dst->data->uub,
|
||||||
|
src_soa[iv], Symmetry, dst->data->illb, dst->data->iuub);
|
||||||
}
|
}
|
||||||
// the symmetry problem should be dealt in prolongcopy3,
|
|
||||||
// so we always have ghost_width for both sides
|
|
||||||
size_out += (src->data->shape[0] + 2 * ghost_width) * (src->data->shape[1] + 2 * ghost_width) * (src->data->shape[2] + 2 * ghost_width);
|
|
||||||
varls = varls->next;
|
|
||||||
varld = varld->next;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_out = offset + ((!data) ? segment_size * var_count : 0);
|
||||||
|
if (data)
|
||||||
|
size_out = offset;
|
||||||
}
|
}
|
||||||
dst = dst->next;
|
dst = dst->next;
|
||||||
src = src->next;
|
src = src->next;
|
||||||
|
|||||||
@@ -27,21 +27,7 @@ using namespace std;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "TwoPunctures.h"
|
#include "TwoPunctures.h"
|
||||||
|
#include <mkl_cblas.h>
|
||||||
extern "C" {
|
|
||||||
double cblas_ddot(const int, const double *, const int, const double *, const int);
|
|
||||||
double cblas_dnrm2(const int, const double *, const int);
|
|
||||||
void cblas_dgemm(const int, const int, const int,
|
|
||||||
const int, const int, const int,
|
|
||||||
const double, const double *, const int,
|
|
||||||
const double *, const int, const double,
|
|
||||||
double *, const int);
|
|
||||||
}
|
|
||||||
|
|
||||||
enum {
|
|
||||||
CblasRowMajor = 101,
|
|
||||||
CblasNoTrans = 111
|
|
||||||
};
|
|
||||||
|
|
||||||
TwoPunctures::TwoPunctures(double mp, double mm, double b,
|
TwoPunctures::TwoPunctures(double mp, double mm, double b,
|
||||||
double P_plusx, double P_plusy, double P_plusz,
|
double P_plusx, double P_plusy, double P_plusz,
|
||||||
|
|||||||
@@ -258,6 +258,8 @@ void bssnEM_class::Initialize()
|
|||||||
PhysTime = StartTime;
|
PhysTime = StartTime;
|
||||||
Setup_Black_Hole_position();
|
Setup_Black_Hole_position();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setup_transfer_caches();
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|||||||
@@ -26,6 +26,12 @@ using namespace std;
|
|||||||
#include "shellfunctions.h"
|
#include "shellfunctions.h"
|
||||||
#include "parameters.h"
|
#include "parameters.h"
|
||||||
|
|
||||||
|
#if BSSN_USE_ESCALAR_C_KERNEL
|
||||||
|
#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar_c
|
||||||
|
#else
|
||||||
|
#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef With_AHF
|
#ifdef With_AHF
|
||||||
#include "derivatives.h"
|
#include "derivatives.h"
|
||||||
#include "myglobal.h"
|
#include "myglobal.h"
|
||||||
@@ -133,6 +139,9 @@ void bssnEScalar_class::Initialize()
|
|||||||
}
|
}
|
||||||
|
|
||||||
GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor);
|
GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor);
|
||||||
|
ConstraintRefreshLevels = new int[GH->levels];
|
||||||
|
for (int il = 0; il < GH->levels; il++)
|
||||||
|
ConstraintRefreshLevels[il] = 0;
|
||||||
if (checkrun)
|
if (checkrun)
|
||||||
CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry);
|
CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry);
|
||||||
else
|
else
|
||||||
@@ -165,6 +174,8 @@ void bssnEScalar_class::Initialize()
|
|||||||
PhysTime = StartTime;
|
PhysTime = StartTime;
|
||||||
Setup_Black_Hole_position();
|
Setup_Black_Hole_position();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setup_transfer_caches();
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -230,6 +241,9 @@ void bssnEScalar_class::Read_Ansorg()
|
|||||||
}
|
}
|
||||||
int BH_NM;
|
int BH_NM;
|
||||||
double *Porg_here;
|
double *Porg_here;
|
||||||
|
double *pmom_local;
|
||||||
|
double *spin_local;
|
||||||
|
double *mass_local;
|
||||||
// read parameter from file
|
// read parameter from file
|
||||||
{
|
{
|
||||||
const int LEN = 256;
|
const int LEN = 256;
|
||||||
@@ -271,9 +285,9 @@ void bssnEScalar_class::Read_Ansorg()
|
|||||||
}
|
}
|
||||||
|
|
||||||
Porg_here = new double[3 * BH_NM];
|
Porg_here = new double[3 * BH_NM];
|
||||||
Pmom = new double[3 * BH_NM];
|
pmom_local = new double[3 * BH_NM];
|
||||||
Spin = new double[3 * BH_NM];
|
spin_local = new double[3 * BH_NM];
|
||||||
Mass = new double[BH_NM];
|
mass_local = new double[BH_NM];
|
||||||
// read parameter from file
|
// read parameter from file
|
||||||
{
|
{
|
||||||
const int LEN = 256;
|
const int LEN = 256;
|
||||||
@@ -308,7 +322,7 @@ void bssnEScalar_class::Read_Ansorg()
|
|||||||
if (sgrp == "BSSN" && sind < BH_NM)
|
if (sgrp == "BSSN" && sind < BH_NM)
|
||||||
{
|
{
|
||||||
if (skey == "Mass")
|
if (skey == "Mass")
|
||||||
Mass[sind] = atof(sval.c_str());
|
mass_local[sind] = atof(sval.c_str());
|
||||||
else if (skey == "Porgx")
|
else if (skey == "Porgx")
|
||||||
Porg_here[sind * 3] = atof(sval.c_str());
|
Porg_here[sind * 3] = atof(sval.c_str());
|
||||||
else if (skey == "Porgy")
|
else if (skey == "Porgy")
|
||||||
@@ -316,17 +330,17 @@ void bssnEScalar_class::Read_Ansorg()
|
|||||||
else if (skey == "Porgz")
|
else if (skey == "Porgz")
|
||||||
Porg_here[sind * 3 + 2] = atof(sval.c_str());
|
Porg_here[sind * 3 + 2] = atof(sval.c_str());
|
||||||
else if (skey == "Spinx")
|
else if (skey == "Spinx")
|
||||||
Spin[sind * 3] = atof(sval.c_str());
|
spin_local[sind * 3] = atof(sval.c_str());
|
||||||
else if (skey == "Spiny")
|
else if (skey == "Spiny")
|
||||||
Spin[sind * 3 + 1] = atof(sval.c_str());
|
spin_local[sind * 3 + 1] = atof(sval.c_str());
|
||||||
else if (skey == "Spinz")
|
else if (skey == "Spinz")
|
||||||
Spin[sind * 3 + 2] = atof(sval.c_str());
|
spin_local[sind * 3 + 2] = atof(sval.c_str());
|
||||||
else if (skey == "Pmomx")
|
else if (skey == "Pmomx")
|
||||||
Pmom[sind * 3] = atof(sval.c_str());
|
pmom_local[sind * 3] = atof(sval.c_str());
|
||||||
else if (skey == "Pmomy")
|
else if (skey == "Pmomy")
|
||||||
Pmom[sind * 3 + 1] = atof(sval.c_str());
|
pmom_local[sind * 3 + 1] = atof(sval.c_str());
|
||||||
else if (skey == "Pmomz")
|
else if (skey == "Pmomz")
|
||||||
Pmom[sind * 3 + 2] = atof(sval.c_str());
|
pmom_local[sind * 3 + 2] = atof(sval.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inf.close();
|
inf.close();
|
||||||
@@ -362,7 +376,7 @@ void bssnEScalar_class::Read_Ansorg()
|
|||||||
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
||||||
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
||||||
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
|
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
|
||||||
Mass, Porg_here, Pmom, Spin, BH_NM);
|
mass_local, Porg_here, pmom_local, spin_local, BH_NM);
|
||||||
}
|
}
|
||||||
if (BL == Pp->data->ble)
|
if (BL == Pp->data->ble)
|
||||||
break;
|
break;
|
||||||
@@ -404,7 +418,7 @@ void bssnEScalar_class::Read_Ansorg()
|
|||||||
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
||||||
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
||||||
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
|
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
|
||||||
Mass, Porg_here, Pmom, Spin, BH_NM);
|
mass_local, Porg_here, pmom_local, spin_local, BH_NM);
|
||||||
}
|
}
|
||||||
if (BL == Pp->data->ble)
|
if (BL == Pp->data->ble)
|
||||||
break;
|
break;
|
||||||
@@ -415,6 +429,9 @@ void bssnEScalar_class::Read_Ansorg()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
delete[] Porg_here;
|
delete[] Porg_here;
|
||||||
|
delete[] pmom_local;
|
||||||
|
delete[] spin_local;
|
||||||
|
delete[] mass_local;
|
||||||
// dump read_in initial data
|
// dump read_in initial data
|
||||||
// for(int lev=0;lev<GH->levels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT);
|
// for(int lev=0;lev<GH->levels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT);
|
||||||
}
|
}
|
||||||
@@ -455,6 +472,9 @@ void bssnEScalar_class::Read_Pablo()
|
|||||||
}
|
}
|
||||||
int BH_NM;
|
int BH_NM;
|
||||||
double *Porg_here;
|
double *Porg_here;
|
||||||
|
double *pmom_local;
|
||||||
|
double *spin_local;
|
||||||
|
double *mass_local;
|
||||||
// read parameter from file
|
// read parameter from file
|
||||||
{
|
{
|
||||||
const int LEN = 256;
|
const int LEN = 256;
|
||||||
@@ -496,9 +516,9 @@ void bssnEScalar_class::Read_Pablo()
|
|||||||
}
|
}
|
||||||
|
|
||||||
Porg_here = new double[3 * BH_NM];
|
Porg_here = new double[3 * BH_NM];
|
||||||
Pmom = new double[3 * BH_NM];
|
pmom_local = new double[3 * BH_NM];
|
||||||
Spin = new double[3 * BH_NM];
|
spin_local = new double[3 * BH_NM];
|
||||||
Mass = new double[BH_NM];
|
mass_local = new double[BH_NM];
|
||||||
// read parameter from file
|
// read parameter from file
|
||||||
{
|
{
|
||||||
const int LEN = 256;
|
const int LEN = 256;
|
||||||
@@ -533,7 +553,7 @@ void bssnEScalar_class::Read_Pablo()
|
|||||||
if (sgrp == "BSSN" && sind < BH_NM)
|
if (sgrp == "BSSN" && sind < BH_NM)
|
||||||
{
|
{
|
||||||
if (skey == "Mass")
|
if (skey == "Mass")
|
||||||
Mass[sind] = atof(sval.c_str());
|
mass_local[sind] = atof(sval.c_str());
|
||||||
else if (skey == "Porgx")
|
else if (skey == "Porgx")
|
||||||
Porg_here[sind * 3] = atof(sval.c_str());
|
Porg_here[sind * 3] = atof(sval.c_str());
|
||||||
else if (skey == "Porgy")
|
else if (skey == "Porgy")
|
||||||
@@ -541,17 +561,17 @@ void bssnEScalar_class::Read_Pablo()
|
|||||||
else if (skey == "Porgz")
|
else if (skey == "Porgz")
|
||||||
Porg_here[sind * 3 + 2] = atof(sval.c_str());
|
Porg_here[sind * 3 + 2] = atof(sval.c_str());
|
||||||
else if (skey == "Spinx")
|
else if (skey == "Spinx")
|
||||||
Spin[sind * 3] = atof(sval.c_str());
|
spin_local[sind * 3] = atof(sval.c_str());
|
||||||
else if (skey == "Spiny")
|
else if (skey == "Spiny")
|
||||||
Spin[sind * 3 + 1] = atof(sval.c_str());
|
spin_local[sind * 3 + 1] = atof(sval.c_str());
|
||||||
else if (skey == "Spinz")
|
else if (skey == "Spinz")
|
||||||
Spin[sind * 3 + 2] = atof(sval.c_str());
|
spin_local[sind * 3 + 2] = atof(sval.c_str());
|
||||||
else if (skey == "Pmomx")
|
else if (skey == "Pmomx")
|
||||||
Pmom[sind * 3] = atof(sval.c_str());
|
pmom_local[sind * 3] = atof(sval.c_str());
|
||||||
else if (skey == "Pmomy")
|
else if (skey == "Pmomy")
|
||||||
Pmom[sind * 3 + 1] = atof(sval.c_str());
|
pmom_local[sind * 3 + 1] = atof(sval.c_str());
|
||||||
else if (skey == "Pmomz")
|
else if (skey == "Pmomz")
|
||||||
Pmom[sind * 3 + 2] = atof(sval.c_str());
|
pmom_local[sind * 3 + 2] = atof(sval.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inf.close();
|
inf.close();
|
||||||
@@ -598,7 +618,7 @@ void bssnEScalar_class::Read_Pablo()
|
|||||||
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
||||||
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
||||||
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
|
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
|
||||||
Mass, Porg_here, Pmom, Spin, BH_NM);
|
mass_local, Porg_here, pmom_local, spin_local, BH_NM);
|
||||||
}
|
}
|
||||||
if (BL == Pp->data->ble)
|
if (BL == Pp->data->ble)
|
||||||
break;
|
break;
|
||||||
@@ -662,7 +682,7 @@ void bssnEScalar_class::Read_Pablo()
|
|||||||
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
||||||
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
||||||
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
|
cg->fgfs[Sphi0->sgfn], cg->fgfs[Spi0->sgfn],
|
||||||
Mass, Porg_here, Pmom, Spin, BH_NM);
|
mass_local, Porg_here, pmom_local, spin_local, BH_NM);
|
||||||
}
|
}
|
||||||
if (BL == Pp->data->ble)
|
if (BL == Pp->data->ble)
|
||||||
break;
|
break;
|
||||||
@@ -686,6 +706,9 @@ void bssnEScalar_class::Read_Pablo()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
delete[] Porg_here;
|
delete[] Porg_here;
|
||||||
|
delete[] pmom_local;
|
||||||
|
delete[] spin_local;
|
||||||
|
delete[] mass_local;
|
||||||
if (flag && myrank == 0)
|
if (flag && myrank == 0)
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
// dump read_in initial data
|
// dump read_in initial data
|
||||||
@@ -739,7 +762,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
|||||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
@@ -993,7 +1016,8 @@ void bssnEScalar_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
Parallel::AsyncSyncState async_pre;
|
||||||
|
sync_predictor_start(lev, SynchList_pre, async_pre);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -1012,6 +1036,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
sync_predictor_finish(lev, async_pre, SynchList_pre);
|
||||||
|
|
||||||
// for black hole position
|
// for black hole position
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
@@ -1081,7 +1106,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
|||||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||||
@@ -1349,7 +1374,8 @@ void bssnEScalar_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
Parallel::AsyncSyncState async_cor;
|
||||||
|
sync_corrector_start(lev, SynchList_cor, async_cor);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -1368,6 +1394,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
sync_corrector_finish(lev, async_cor, SynchList_cor);
|
||||||
// for black hole position
|
// for black hole position
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
{
|
{
|
||||||
@@ -1835,8 +1862,11 @@ void bssnEScalar_class::AnalysisStuff_EScalar(int lev, double dT_lev)
|
|||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|
||||||
void bssnEScalar_class::Interp_Constraint()
|
void bssnEScalar_class::Interp_Constraint(bool infg)
|
||||||
{
|
{
|
||||||
|
if (!infg)
|
||||||
|
return;
|
||||||
|
|
||||||
// we do not support a_lev != 0 yet.
|
// we do not support a_lev != 0 yet.
|
||||||
if (a_lev > 0)
|
if (a_lev > 0)
|
||||||
return;
|
return;
|
||||||
@@ -1858,7 +1888,7 @@ void bssnEScalar_class::Interp_Constraint()
|
|||||||
if (myrank == cg->rank)
|
if (myrank == cg->rank)
|
||||||
{
|
{
|
||||||
if (lev > 0)
|
if (lev > 0)
|
||||||
f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
@@ -2078,7 +2108,7 @@ void bssnEScalar_class::Constraint_Out()
|
|||||||
if (myrank == cg->rank)
|
if (myrank == cg->rank)
|
||||||
{
|
{
|
||||||
if (lev > 0)
|
if (lev > 0)
|
||||||
f_compute_rhs_bssn_escalar(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ public:
|
|||||||
void Compute_Psi4(int lev);
|
void Compute_Psi4(int lev);
|
||||||
void Step(int lev, int YN);
|
void Step(int lev, int YN);
|
||||||
void AnalysisStuff_EScalar(int lev, double dT_lev);
|
void AnalysisStuff_EScalar(int lev, double dT_lev);
|
||||||
void Interp_Constraint();
|
void Interp_Constraint(bool infg);
|
||||||
void Constraint_Out();
|
void Constraint_Out();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|||||||
@@ -299,6 +299,28 @@ bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
|
|||||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
|
||||||
|
// Derived classes override Initialize(), so ownership-sensitive members must
|
||||||
|
// be in a known state before any specialized setup path runs.
|
||||||
|
GH = 0;
|
||||||
|
SH = 0;
|
||||||
|
PhysTime = 0.0;
|
||||||
|
BH_num = 0;
|
||||||
|
BH_num_input = 0;
|
||||||
|
Porg0 = 0;
|
||||||
|
Porgbr = 0;
|
||||||
|
Porg = 0;
|
||||||
|
Porg1 = 0;
|
||||||
|
Porg_rhs = 0;
|
||||||
|
Mass = 0;
|
||||||
|
Pmom = 0;
|
||||||
|
Spin = 0;
|
||||||
|
sync_cache_pre = 0;
|
||||||
|
sync_cache_cor = 0;
|
||||||
|
sync_cache_rp_coarse = 0;
|
||||||
|
sync_cache_rp_fine = 0;
|
||||||
|
sync_cache_restrict = 0;
|
||||||
|
sync_cache_outbd = 0;
|
||||||
|
|
||||||
// setup Monitors
|
// setup Monitors
|
||||||
{
|
{
|
||||||
stringstream a_stream;
|
stringstream a_stream;
|
||||||
@@ -986,13 +1008,7 @@ void bssn_class::Initialize()
|
|||||||
Setup_Black_Hole_position();
|
Setup_Black_Hole_position();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize sync caches (per-level, for predictor and corrector)
|
setup_transfer_caches();
|
||||||
sync_cache_pre = new Parallel::SyncCache[GH->levels];
|
|
||||||
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
|
||||||
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
|
||||||
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
|
||||||
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
|
|
||||||
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -1247,30 +1263,7 @@ bssn_class::~bssn_class()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Destroy sync caches before GH
|
// Destroy sync caches before GH
|
||||||
if (sync_cache_pre)
|
destroy_transfer_caches();
|
||||||
{
|
|
||||||
for (int i = 0; i < GH->levels; i++)
|
|
||||||
sync_cache_pre[i].destroy();
|
|
||||||
delete[] sync_cache_pre;
|
|
||||||
}
|
|
||||||
if (sync_cache_cor)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < GH->levels; i++)
|
|
||||||
sync_cache_cor[i].destroy();
|
|
||||||
delete[] sync_cache_cor;
|
|
||||||
}
|
|
||||||
if (sync_cache_rp_coarse)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < GH->levels; i++)
|
|
||||||
sync_cache_rp_coarse[i].destroy();
|
|
||||||
delete[] sync_cache_rp_coarse;
|
|
||||||
}
|
|
||||||
if (sync_cache_rp_fine)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < GH->levels; i++)
|
|
||||||
sync_cache_rp_fine[i].destroy();
|
|
||||||
delete[] sync_cache_rp_fine;
|
|
||||||
}
|
|
||||||
|
|
||||||
delete GH;
|
delete GH;
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
@@ -2489,7 +2482,7 @@ void bssn_class::Evolve(int Steps)
|
|||||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
invalidate_transfer_caches();
|
||||||
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
|
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -2730,7 +2723,7 @@ void bssn_class::RecursiveStep(int lev)
|
|||||||
{
|
{
|
||||||
if (ConstraintRefreshLevels)
|
if (ConstraintRefreshLevels)
|
||||||
ConstraintRefreshLevels[lev] = 1;
|
ConstraintRefreshLevels[lev] = 1;
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
invalidate_transfer_caches();
|
||||||
}
|
}
|
||||||
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
|
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
|
||||||
#endif
|
#endif
|
||||||
@@ -2911,7 +2904,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
invalidate_transfer_caches();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3075,10 +3068,10 @@ void bssn_class::ParallelStep()
|
|||||||
if (lev + 1 >= GH->movls)
|
if (lev + 1 >= GH->movls)
|
||||||
{
|
{
|
||||||
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
||||||
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
invalidate_transfer_caches();
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -3093,7 +3086,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
invalidate_transfer_caches();
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -3112,7 +3105,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
invalidate_transfer_caches();
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -3128,7 +3121,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
invalidate_transfer_caches();
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -3659,7 +3652,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
STEP_TIMER_DECL(timer_predictor_sync);
|
STEP_TIMER_DECL(timer_predictor_sync);
|
||||||
Parallel::AsyncSyncState async_pre;
|
Parallel::AsyncSyncState async_pre;
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
sync_predictor_start(lev, SynchList_pre, async_pre);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -3678,7 +3671,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
sync_predictor_finish(lev, async_pre, SynchList_pre);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
// Complete non-blocking error reduction and check
|
// Complete non-blocking error reduction and check
|
||||||
@@ -4024,7 +4017,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
STEP_TIMER_DECL(timer_corrector_sync);
|
STEP_TIMER_DECL(timer_corrector_sync);
|
||||||
Parallel::AsyncSyncState async_cor;
|
Parallel::AsyncSyncState async_cor;
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
sync_corrector_start(lev, SynchList_cor, async_cor);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -4043,7 +4036,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
sync_corrector_finish(lev, async_cor, SynchList_cor);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
// Complete non-blocking error reduction and check
|
// Complete non-blocking error reduction and check
|
||||||
@@ -4532,7 +4525,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::AsyncSyncState async_pre;
|
Parallel::AsyncSyncState async_pre;
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
sync_predictor_start(lev, SynchList_pre, async_pre);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -4551,7 +4544,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
sync_predictor_finish(lev, async_pre, SynchList_pre);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
// Complete non-blocking error reduction and check
|
// Complete non-blocking error reduction and check
|
||||||
@@ -4880,7 +4873,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::AsyncSyncState async_cor;
|
Parallel::AsyncSyncState async_cor;
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
sync_corrector_start(lev, SynchList_cor, async_cor);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -4899,7 +4892,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
sync_corrector_finish(lev, async_cor, SynchList_cor);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
// Complete non-blocking error reduction and check
|
// Complete non-blocking error reduction and check
|
||||||
@@ -5291,7 +5284,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
|
sync_evolution(lev, SynchList_pre, sync_cache_pre);
|
||||||
|
|
||||||
// Complete non-blocking error reduction and check
|
// Complete non-blocking error reduction and check
|
||||||
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
||||||
@@ -5492,7 +5485,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
|
sync_evolution(lev, SynchList_cor, sync_cache_cor);
|
||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
|
||||||
|
|
||||||
@@ -6081,6 +6074,92 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
//
|
//
|
||||||
// SynchList_cor old -----------
|
// SynchList_cor old -----------
|
||||||
{
|
{
|
||||||
|
#if (ABEtype == 1)
|
||||||
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
|
// stringstream a_stream;
|
||||||
|
// a_stream.setf(ios::left);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (lev > 0)
|
||||||
|
{
|
||||||
|
MyList<Patch> *Pp, *Ppc;
|
||||||
|
if (lev > trfls && YN == 0)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev - 1];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
if (BB)
|
||||||
|
Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL,
|
||||||
|
SynchList_pre, 0);
|
||||||
|
else
|
||||||
|
Parallel::prepare_inter_time_level(Pp->data, SL, OL,
|
||||||
|
SynchList_pre, 0);
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
#if (MIXOUTB == 0)
|
||||||
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
|
#elif (MIXOUTB == 1)
|
||||||
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#if (RPB == 0)
|
||||||
|
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
#if (MIXOUTB == 0)
|
||||||
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
|
#elif (MIXOUTB == 1)
|
||||||
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
STEP_TIMER_DECL(timer_restrict_prolong);
|
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// stringstream a_stream;
|
// stringstream a_stream;
|
||||||
@@ -6123,7 +6202,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
|
restrict_evolution(lev, SL, SynchList_pre);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
@@ -6136,7 +6215,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
|
||||||
|
|
||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -6147,7 +6226,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
|
outbdlow2hi_evolution(lev, SynchList_pre, SL);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6174,7 +6253,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
|
restrict_evolution(lev, SL, SL);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||||
@@ -6187,7 +6266,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
sync_evolution(lev - 1, SL, sync_cache_rp_coarse);
|
||||||
|
|
||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -6198,7 +6277,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
|
outbdlow2hi_evolution(lev, SL, SL);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6215,7 +6294,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
sync_evolution(lev, SL, sync_cache_rp_fine);
|
||||||
|
|
||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -6244,6 +6323,91 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
//
|
//
|
||||||
// SynchList_cor old -----------
|
// SynchList_cor old -----------
|
||||||
{
|
{
|
||||||
|
#if (ABEtype == 1)
|
||||||
|
if (lev >= GH->levels - 1)
|
||||||
|
return;
|
||||||
|
lev = lev + 1;
|
||||||
|
|
||||||
|
if (lev > 0)
|
||||||
|
{
|
||||||
|
MyList<Patch> *Pp, *Ppc;
|
||||||
|
if (lev > trfls && YN == 0)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev - 1];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
if (BB)
|
||||||
|
Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL,
|
||||||
|
SynchList_pre, 0);
|
||||||
|
else
|
||||||
|
Parallel::prepare_inter_time_level(Pp->data, SL, OL,
|
||||||
|
SynchList_pre, 0);
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
#if (MIXOUTB == 0)
|
||||||
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
|
#elif (MIXOUTB == 1)
|
||||||
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#if (RPB == 0)
|
||||||
|
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
#if (MIXOUTB == 0)
|
||||||
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
|
#elif (MIXOUTB == 1)
|
||||||
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
STEP_TIMER_DECL(timer_restrict_prolong);
|
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
|
||||||
|
|
||||||
@@ -6269,17 +6433,17 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
|
restrict_evolution(lev, SL, SynchList_pre);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
|
outbdlow2hi_evolution(lev, SynchList_pre, SL);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6291,17 +6455,17 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
else // no time refinement levels and for all same time levels
|
else // no time refinement levels and for all same time levels
|
||||||
{
|
{
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
|
restrict_evolution(lev, SL, SL);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
sync_evolution(lev - 1, SL, sync_cache_rp_coarse);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
|
outbdlow2hi_evolution(lev, SL, SL);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6311,7 +6475,11 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
#if (ABEtype == 1)
|
||||||
|
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
|
||||||
|
#else
|
||||||
|
sync_evolution(lev, SL, sync_cache_rp_fine);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||||
}
|
}
|
||||||
@@ -6324,8 +6492,93 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||||
{
|
{
|
||||||
STEP_TIMER_DECL(timer_restrict_prolong);
|
|
||||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||||
|
#if (ABEtype == 1)
|
||||||
|
if (lev > 0)
|
||||||
|
{
|
||||||
|
MyList<Patch> *Pp, *Ppc;
|
||||||
|
if (lev > trfls && YN == 0)
|
||||||
|
{
|
||||||
|
if (myrank == 0)
|
||||||
|
cout << "/=: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
|
||||||
|
Pp = GH->PatL[lev - 1];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
if (BB)
|
||||||
|
Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor,
|
||||||
|
SynchList_pre, 0);
|
||||||
|
else
|
||||||
|
Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList,
|
||||||
|
SynchList_pre, 0);
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry);
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
#if (MIXOUTB == 0)
|
||||||
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
|
#elif (MIXOUTB == 1)
|
||||||
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (myrank == 0)
|
||||||
|
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
|
||||||
|
#if (RPB == 0)
|
||||||
|
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
|
||||||
|
|
||||||
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
#if (MIXOUTB == 0)
|
||||||
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
|
#elif (MIXOUTB == 1)
|
||||||
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
|
#elif (RPB == 1)
|
||||||
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||||
// we assume for fine
|
// we assume for fine
|
||||||
// SynchList_cor 1 -----------
|
// SynchList_cor 1 -----------
|
||||||
//
|
//
|
||||||
@@ -6358,17 +6611,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
|
restrict_evolution(lev, SynchList_cor, SynchList_pre);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6382,17 +6635,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
|
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]);
|
restrict_evolution(lev, SynchList_cor, StateList);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
sync_evolution(lev - 1, StateList, sync_cache_rp_coarse);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
outbdlow2hi_evolution(lev, StateList, SynchList_cor);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6402,7 +6655,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
sync_evolution(lev, SynchList_cor, sync_cache_rp_fine);
|
||||||
}
|
}
|
||||||
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||||
}
|
}
|
||||||
@@ -6434,7 +6687,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6447,7 +6700,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
{
|
{
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
outbdlow2hi_evolution(lev, StateList, SynchList_cor);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6466,10 +6719,10 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
#else
|
#else
|
||||||
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
sync_evolution(lev - 1, StateList, sync_cache_rp_coarse);
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
sync_evolution(lev, SynchList_cor, sync_cache_rp_fine);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef MIXOUTB
|
#undef MIXOUTB
|
||||||
@@ -7199,6 +7452,169 @@ void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, va
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool bssn_class::use_transfer_cache() const
|
||||||
|
{
|
||||||
|
#if BSSN_USE_TRANSFER_CACHE
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::setup_transfer_caches()
|
||||||
|
{
|
||||||
|
sync_cache_pre = 0;
|
||||||
|
sync_cache_cor = 0;
|
||||||
|
sync_cache_rp_coarse = 0;
|
||||||
|
sync_cache_rp_fine = 0;
|
||||||
|
sync_cache_restrict = 0;
|
||||||
|
sync_cache_outbd = 0;
|
||||||
|
|
||||||
|
if (!use_transfer_cache() || !GH)
|
||||||
|
return;
|
||||||
|
|
||||||
|
sync_cache_pre = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::invalidate_transfer_caches()
|
||||||
|
{
|
||||||
|
if (!use_transfer_cache() || !GH || !sync_cache_pre || !sync_cache_cor ||
|
||||||
|
!sync_cache_rp_coarse || !sync_cache_rp_fine || !sync_cache_restrict || !sync_cache_outbd)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (int il = 0; il < GH->levels; il++)
|
||||||
|
{
|
||||||
|
sync_cache_pre[il].invalidate();
|
||||||
|
sync_cache_cor[il].invalidate();
|
||||||
|
sync_cache_rp_coarse[il].invalidate();
|
||||||
|
sync_cache_rp_fine[il].invalidate();
|
||||||
|
sync_cache_restrict[il].invalidate();
|
||||||
|
sync_cache_outbd[il].invalidate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::destroy_transfer_caches()
|
||||||
|
{
|
||||||
|
if (sync_cache_pre)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache() && GH)
|
||||||
|
for (int i = 0; i < GH->levels; i++)
|
||||||
|
sync_cache_pre[i].destroy();
|
||||||
|
delete[] sync_cache_pre;
|
||||||
|
sync_cache_pre = 0;
|
||||||
|
}
|
||||||
|
if (sync_cache_cor)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache() && GH)
|
||||||
|
for (int i = 0; i < GH->levels; i++)
|
||||||
|
sync_cache_cor[i].destroy();
|
||||||
|
delete[] sync_cache_cor;
|
||||||
|
sync_cache_cor = 0;
|
||||||
|
}
|
||||||
|
if (sync_cache_rp_coarse)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache() && GH)
|
||||||
|
for (int i = 0; i < GH->levels; i++)
|
||||||
|
sync_cache_rp_coarse[i].destroy();
|
||||||
|
delete[] sync_cache_rp_coarse;
|
||||||
|
sync_cache_rp_coarse = 0;
|
||||||
|
}
|
||||||
|
if (sync_cache_rp_fine)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache() && GH)
|
||||||
|
for (int i = 0; i < GH->levels; i++)
|
||||||
|
sync_cache_rp_fine[i].destroy();
|
||||||
|
delete[] sync_cache_rp_fine;
|
||||||
|
sync_cache_rp_fine = 0;
|
||||||
|
}
|
||||||
|
if (sync_cache_restrict)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache() && GH)
|
||||||
|
for (int i = 0; i < GH->levels; i++)
|
||||||
|
sync_cache_restrict[i].destroy();
|
||||||
|
delete[] sync_cache_restrict;
|
||||||
|
sync_cache_restrict = 0;
|
||||||
|
}
|
||||||
|
if (sync_cache_outbd)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache() && GH)
|
||||||
|
for (int i = 0; i < GH->levels; i++)
|
||||||
|
sync_cache_outbd[i].destroy();
|
||||||
|
delete[] sync_cache_outbd;
|
||||||
|
sync_cache_outbd = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::sync_predictor_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache())
|
||||||
|
Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_pre[lev], async_state);
|
||||||
|
else
|
||||||
|
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache())
|
||||||
|
Parallel::Sync_finish(sync_cache_pre[lev], async_state, VarList, Symmetry);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::sync_corrector_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache())
|
||||||
|
Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_cor[lev], async_state);
|
||||||
|
else
|
||||||
|
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache())
|
||||||
|
Parallel::Sync_finish(sync_cache_cor[lev], async_state, VarList, Symmetry);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::sync_evolution(int lev, MyList<var> *VarList, Parallel::SyncCache *cache_array)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache() && cache_array)
|
||||||
|
Parallel::Sync_cached(GH->PatL[lev], VarList, Symmetry, cache_array[lev]);
|
||||||
|
else
|
||||||
|
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::restrict_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache())
|
||||||
|
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_restrict[lev]);
|
||||||
|
else
|
||||||
|
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_class::outbdlow2hi_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list)
|
||||||
|
{
|
||||||
|
if (use_transfer_cache())
|
||||||
|
{
|
||||||
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_outbd[lev]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MyList<Patch> *Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, src_var_list, dst_var_list, Symmetry);
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|||||||
@@ -33,6 +33,14 @@ using namespace std;
|
|||||||
|
|
||||||
extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
|
extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
|
||||||
|
|
||||||
|
#ifndef BSSN_USE_TRANSFER_CACHE
|
||||||
|
#define BSSN_USE_TRANSFER_CACHE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BSSN_USE_ESCALAR_C_KERNEL
|
||||||
|
#define BSSN_USE_ESCALAR_C_KERNEL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
class bssn_class
|
class bssn_class
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@@ -171,6 +179,17 @@ public:
|
|||||||
void testOutBd();
|
void testOutBd();
|
||||||
|
|
||||||
bool check_Stdin_Abort();
|
bool check_Stdin_Abort();
|
||||||
|
bool use_transfer_cache() const;
|
||||||
|
void setup_transfer_caches();
|
||||||
|
void invalidate_transfer_caches();
|
||||||
|
void destroy_transfer_caches();
|
||||||
|
void sync_predictor_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state);
|
||||||
|
void sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList);
|
||||||
|
void sync_corrector_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state);
|
||||||
|
void sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList);
|
||||||
|
void sync_evolution(int lev, MyList<var> *VarList, Parallel::SyncCache *cache_array = 0);
|
||||||
|
void restrict_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list);
|
||||||
|
void outbdlow2hi_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list);
|
||||||
|
|
||||||
virtual void Setup_Initial_Data_Cao();
|
virtual void Setup_Initial_Data_Cao();
|
||||||
virtual void Setup_Initial_Data_Lousto();
|
virtual void Setup_Initial_Data_Lousto();
|
||||||
|
|||||||
169
AMSS_NCKU_source/bssn_escalar_rhs_c.C
Normal file
169
AMSS_NCKU_source/bssn_escalar_rhs_c.C
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
#include "macrodef.h"
|
||||||
|
#include "bssn_rhs.h"
|
||||||
|
#include "share_func.h"
|
||||||
|
#include "tool.h"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
// Reuse the temporary workspace across block calls to avoid repeated heap churn
|
||||||
|
// in the EScalar wrapper. MPI ranks execute this path sequentially, so a single
|
||||||
|
// process-local buffer is sufficient here.
|
||||||
|
std::vector<double> g_escalar_tmp_store;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef fortran1
|
||||||
|
#define f_frpotential frpotential
|
||||||
|
#endif
|
||||||
|
#ifdef fortran2
|
||||||
|
#define f_frpotential FRPOTENTIAL
|
||||||
|
#endif
|
||||||
|
#ifdef fortran3
|
||||||
|
#define f_frpotential frpotential_
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
void f_frpotential(int *, double *, double *, double *);
|
||||||
|
}
|
||||||
|
|
||||||
|
int f_compute_rhs_bssn_escalar_c(int *ex, double &T,
|
||||||
|
double *X, double *Y, double *Z,
|
||||||
|
double *chi, double *trK,
|
||||||
|
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||||
|
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||||
|
double *Gamx, double *Gamy, double *Gamz,
|
||||||
|
double *Lap, double *betax, double *betay, double *betaz,
|
||||||
|
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||||
|
double *Sphi, double *Spi,
|
||||||
|
double *chi_rhs, double *trK_rhs,
|
||||||
|
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||||
|
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||||
|
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||||
|
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||||
|
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||||
|
double *Sphi_rhs, double *Spi_rhs,
|
||||||
|
double *rho, double *Sx, double *Sy, double *Sz,
|
||||||
|
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||||
|
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||||
|
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||||
|
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||||
|
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||||
|
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
||||||
|
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
||||||
|
int &Symmetry, int &Lev, double &eps, int &co)
|
||||||
|
{
|
||||||
|
const int nx = ex[0], ny = ex[1], nz = ex[2];
|
||||||
|
const int all = nx * ny * nz;
|
||||||
|
|
||||||
|
const size_t workspace_size = size_t(all) * 17;
|
||||||
|
if (g_escalar_tmp_store.size() < workspace_size)
|
||||||
|
g_escalar_tmp_store.resize(workspace_size);
|
||||||
|
|
||||||
|
double *tmp_ptr = g_escalar_tmp_store.data();
|
||||||
|
auto alloc_tmp = [&](int n = 1) -> double *
|
||||||
|
{
|
||||||
|
double *ptr = tmp_ptr;
|
||||||
|
tmp_ptr += size_t(all) * n;
|
||||||
|
return ptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
double *chix = alloc_tmp(), *chiy = alloc_tmp(), *chiz = alloc_tmp();
|
||||||
|
double *Kx = alloc_tmp(), *Ky = alloc_tmp(), *Kz = alloc_tmp();
|
||||||
|
double *fxx = alloc_tmp(), *fxy = alloc_tmp(), *fxz = alloc_tmp();
|
||||||
|
double *fyy = alloc_tmp(), *fyz = alloc_tmp(), *fzz = alloc_tmp();
|
||||||
|
double *Lapx = alloc_tmp(), *Lapy = alloc_tmp(), *Lapz = alloc_tmp();
|
||||||
|
double *V = alloc_tmp(), *dVdSphi = alloc_tmp();
|
||||||
|
|
||||||
|
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, HALF = 0.5;
|
||||||
|
const double SSS[3] = {1.0, 1.0, 1.0};
|
||||||
|
|
||||||
|
fderivs(ex, chi, chix, chiy, chiz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||||
|
fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||||
|
fderivs(ex, Sphi, Kx, Ky, Kz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||||
|
fdderivs(ex, Sphi, fxx, fxy, fxz, fyy, fyz, fzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
|
||||||
|
|
||||||
|
f_frpotential(ex, Sphi, V, dVdSphi);
|
||||||
|
|
||||||
|
for (int i = 0; i < all; ++i)
|
||||||
|
{
|
||||||
|
const double alpn1 = Lap[i] + ONE;
|
||||||
|
const double chin1 = chi[i] + ONE;
|
||||||
|
const double gxx = dxx[i] + ONE;
|
||||||
|
const double gyy = dyy[i] + ONE;
|
||||||
|
const double gzz = dzz[i] + ONE;
|
||||||
|
const double det = gxx * gyy * gzz + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i]
|
||||||
|
- gxz[i] * gyy * gxz[i] - gxy[i] * gxy[i] * gzz - gxx * gyz[i] * gyz[i];
|
||||||
|
const double gupxx = (gyy * gzz - gyz[i] * gyz[i]) / det;
|
||||||
|
const double gupxy = -(gxy[i] * gzz - gyz[i] * gxz[i]) / det;
|
||||||
|
const double gupxz = (gxy[i] * gyz[i] - gyy * gxz[i]) / det;
|
||||||
|
const double gupyy = (gxx * gzz - gxz[i] * gxz[i]) / det;
|
||||||
|
const double gupyz = -(gxx * gyz[i] - gxy[i] * gxz[i]) / det;
|
||||||
|
const double gupzz = (gxx * gyy - gxy[i] * gxy[i]) / det;
|
||||||
|
|
||||||
|
Sphi_rhs[i] = alpn1 * Spi[i];
|
||||||
|
|
||||||
|
Spi_rhs[i] = gupxx * fxx[i] + gupyy * fyy[i] + gupzz * fzz[i]
|
||||||
|
+ TWO * (gupxy * fxy[i] + gupxz * fxz[i] + gupyz * fyz[i])
|
||||||
|
- ((Gamx[i] + (gupxx * chix[i] + gupxy * chiy[i] + gupxz * chiz[i]) / TWO / chin1) * Kx[i]
|
||||||
|
+ (Gamy[i] + (gupxy * chix[i] + gupyy * chiy[i] + gupyz * chiz[i]) / TWO / chin1) * Ky[i]
|
||||||
|
+ (Gamz[i] + (gupxz * chix[i] + gupyz * chiy[i] + gupzz * chiz[i]) / TWO / chin1) * Kz[i]);
|
||||||
|
|
||||||
|
Spi_rhs[i] = Spi_rhs[i] * alpn1
|
||||||
|
+ gupxx * Lapx[i] * Kx[i] + gupxy * Lapx[i] * Ky[i] + gupxz * Lapx[i] * Kz[i]
|
||||||
|
+ gupxy * Lapy[i] * Kx[i] + gupyy * Lapy[i] * Ky[i] + gupyz * Lapy[i] * Kz[i]
|
||||||
|
+ gupxz * Lapz[i] * Kx[i] + gupyz * Lapz[i] * Ky[i] + gupzz * Lapz[i] * Kz[i];
|
||||||
|
|
||||||
|
Spi_rhs[i] = Spi_rhs[i] * chin1 + alpn1 * (trK[i] * Spi[i] - dVdSphi[i]);
|
||||||
|
|
||||||
|
rho[i] = chin1 * ((gupxx * Kx[i] * Kx[i] + gupyy * Ky[i] * Ky[i] + gupzz * Kz[i] * Kz[i]) * HALF
|
||||||
|
+ gupxy * Kx[i] * Ky[i] + gupxz * Kx[i] * Kz[i] + gupyz * Ky[i] * Kz[i])
|
||||||
|
+ Spi[i] * Spi[i] * HALF + V[i];
|
||||||
|
Sx[i] = -Spi[i] * Kx[i];
|
||||||
|
Sy[i] = -Spi[i] * Ky[i];
|
||||||
|
Sz[i] = -Spi[i] * Kz[i];
|
||||||
|
|
||||||
|
const double pressure = (rho[i] - Spi[i] * Spi[i]) / chin1;
|
||||||
|
Sxx[i] = Kx[i] * Kx[i] - pressure * gxx;
|
||||||
|
Sxy[i] = Kx[i] * Ky[i] - pressure * gxy[i];
|
||||||
|
Sxz[i] = Kx[i] * Kz[i] - pressure * gxz[i];
|
||||||
|
Syy[i] = Ky[i] * Ky[i] - pressure * gyy;
|
||||||
|
Syz[i] = Ky[i] * Kz[i] - pressure * gyz[i];
|
||||||
|
Szz[i] = Kz[i] * Kz[i] - pressure * gzz;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (f_compute_rhs_bssn(ex, T, X, Y, Z,
|
||||||
|
chi, trK,
|
||||||
|
dxx, gxy, gxz, dyy, gyz, dzz,
|
||||||
|
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||||
|
Gamx, Gamy, Gamz,
|
||||||
|
Lap, betax, betay, betaz,
|
||||||
|
dtSfx, dtSfy, dtSfz,
|
||||||
|
chi_rhs, trK_rhs,
|
||||||
|
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
|
||||||
|
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
|
||||||
|
Gamx_rhs, Gamy_rhs, Gamz_rhs,
|
||||||
|
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
|
||||||
|
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
|
||||||
|
rho, Sx, Sy, Sz,
|
||||||
|
Sxx, Sxy, Sxz, Syy, Syz, Szz,
|
||||||
|
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||||
|
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||||
|
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||||
|
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||||
|
ham_Res, movx_Res, movy_Res, movz_Res,
|
||||||
|
Gmx_Res, Gmy_Res, Gmz_Res,
|
||||||
|
Symmetry, Lev, eps, co))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
lopsided_kodis(ex, X, Y, Z, Sphi, Sphi_rhs, betax, betay, betaz, Symmetry, SSS, eps);
|
||||||
|
lopsided_kodis(ex, X, Y, Z, Spi, Spi_rhs, betax, betay, betaz, Symmetry, SSS, eps);
|
||||||
|
|
||||||
|
for (int i = 0; i < all; ++i)
|
||||||
|
{
|
||||||
|
if (Sphi_rhs[i] != Sphi_rhs[i] || Spi_rhs[i] != Spi_rhs[i] || rho[i] != rho[i])
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -67,6 +67,27 @@ extern "C"
|
|||||||
int &, int &, double &, int &);
|
int &, int &, double &, int &);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int f_compute_rhs_bssn_escalar_c(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
||||||
|
double *, double *, // chi, trK
|
||||||
|
double *, double *, double *, double *, double *, double *, // gij
|
||||||
|
double *, double *, double *, double *, double *, double *, // Aij
|
||||||
|
double *, double *, double *, // Gam
|
||||||
|
double *, double *, double *, double *, double *, double *, double *, // Gauge
|
||||||
|
double *, double *, // Sphi, Spi
|
||||||
|
double *, double *, // chi, trK
|
||||||
|
double *, double *, double *, double *, double *, double *, // gij
|
||||||
|
double *, double *, double *, double *, double *, double *, // Aij
|
||||||
|
double *, double *, double *, // Gam
|
||||||
|
double *, double *, double *, double *, double *, double *, double *, // Gauge
|
||||||
|
double *, double *, // Sphi, Spi
|
||||||
|
double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
|
||||||
|
double *, double *, double *, double *, double *, double *, // Christoffel
|
||||||
|
double *, double *, double *, double *, double *, double *, // Christoffel
|
||||||
|
double *, double *, double *, double *, double *, double *, // Christoffel
|
||||||
|
double *, double *, double *, double *, double *, double *, // Ricci
|
||||||
|
double *, double *, double *, double *, double *, double *, double *, // constraint violation
|
||||||
|
int &, int &, double &, int &);
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R
|
int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R
|
||||||
|
|||||||
@@ -17,103 +17,65 @@ using namespace std;
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Linear equation solution by Gauss-Jordan elimination.
|
// Intel oneMKL LAPACK interface
|
||||||
|
#include <mkl_lapacke.h>
|
||||||
|
/* Linear equation solution using Intel oneMKL LAPACK.
|
||||||
a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input
|
a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input
|
||||||
containing the right-hand side vectors. On output a is
|
containing the right-hand side vectors. On output a is
|
||||||
replaced by its matrix inverse, and b is replaced by the
|
replaced by its matrix inverse, and b is replaced by the
|
||||||
corresponding set of solution vectors. */
|
corresponding set of solution vectors.
|
||||||
|
|
||||||
|
Mathematical equivalence:
|
||||||
|
Solves: A * x = b => x = A^(-1) * b
|
||||||
|
Original Gauss-Jordan and LAPACK dgesv/dgetri produce identical results
|
||||||
|
within numerical precision. */
|
||||||
|
|
||||||
int gaussj(double *a, double *b, int n)
|
int gaussj(double *a, double *b, int n)
|
||||||
{
|
{
|
||||||
double swap;
|
// Allocate pivot array and workspace
|
||||||
|
lapack_int *ipiv = new lapack_int[n];
|
||||||
|
lapack_int info;
|
||||||
|
|
||||||
int *indxc, *indxr, *ipiv;
|
// Make a copy of matrix a for solving (dgesv modifies it to LU form)
|
||||||
indxc = new int[n];
|
double *a_copy = new double[n * n];
|
||||||
indxr = new int[n];
|
for (int i = 0; i < n * n; i++) {
|
||||||
ipiv = new int[n];
|
a_copy[i] = a[i];
|
||||||
|
|
||||||
int i, icol, irow, j, k, l, ll;
|
|
||||||
double big, dum, pivinv;
|
|
||||||
|
|
||||||
for (j = 0; j < n; j++)
|
|
||||||
ipiv[j] = 0;
|
|
||||||
for (i = 0; i < n; i++)
|
|
||||||
{
|
|
||||||
big = 0.0;
|
|
||||||
for (j = 0; j < n; j++)
|
|
||||||
if (ipiv[j] != 1)
|
|
||||||
for (k = 0; k < n; k++)
|
|
||||||
{
|
|
||||||
if (ipiv[k] == 0)
|
|
||||||
{
|
|
||||||
if (fabs(a[j * n + k]) >= big)
|
|
||||||
{
|
|
||||||
big = fabs(a[j * n + k]);
|
|
||||||
irow = j;
|
|
||||||
icol = k;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (ipiv[k] > 1)
|
|
||||||
{
|
|
||||||
cout << "gaussj: Singular Matrix-1" << endl;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ipiv[icol] = ipiv[icol] + 1;
|
|
||||||
if (irow != icol)
|
|
||||||
{
|
|
||||||
for (l = 0; l < n; l++)
|
|
||||||
{
|
|
||||||
swap = a[irow * n + l];
|
|
||||||
a[irow * n + l] = a[icol * n + l];
|
|
||||||
a[icol * n + l] = swap;
|
|
||||||
}
|
|
||||||
|
|
||||||
swap = b[irow];
|
|
||||||
b[irow] = b[icol];
|
|
||||||
b[icol] = swap;
|
|
||||||
}
|
|
||||||
|
|
||||||
indxr[i] = irow;
|
|
||||||
indxc[i] = icol;
|
|
||||||
|
|
||||||
if (a[icol * n + icol] == 0.0)
|
|
||||||
{
|
|
||||||
cout << "gaussj: Singular Matrix-2" << endl;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
pivinv = 1.0 / a[icol * n + icol];
|
|
||||||
a[icol * n + icol] = 1.0;
|
|
||||||
for (l = 0; l < n; l++)
|
|
||||||
a[icol * n + l] *= pivinv;
|
|
||||||
b[icol] *= pivinv;
|
|
||||||
for (ll = 0; ll < n; ll++)
|
|
||||||
if (ll != icol)
|
|
||||||
{
|
|
||||||
dum = a[ll * n + icol];
|
|
||||||
a[ll * n + icol] = 0.0;
|
|
||||||
for (l = 0; l < n; l++)
|
|
||||||
a[ll * n + l] -= a[icol * n + l] * dum;
|
|
||||||
b[ll] -= b[icol] * dum;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = n - 1; l >= 0; l--)
|
// Step 1: Solve linear system A*x = b using LU decomposition
|
||||||
{
|
// LAPACKE_dgesv uses column-major by default, but we use row-major
|
||||||
if (indxr[l] != indxc[l])
|
info = LAPACKE_dgesv(LAPACK_ROW_MAJOR, n, 1, a_copy, n, ipiv, b, 1);
|
||||||
for (k = 0; k < n; k++)
|
|
||||||
{
|
if (info != 0) {
|
||||||
swap = a[k * n + indxr[l]];
|
cout << "gaussj: Singular Matrix (dgesv info=" << info << ")" << endl;
|
||||||
a[k * n + indxr[l]] = a[k * n + indxc[l]];
|
delete[] ipiv;
|
||||||
a[k * n + indxc[l]] = swap;
|
delete[] a_copy;
|
||||||
}
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Compute matrix inverse A^(-1) using LU factorization
|
||||||
|
// First do LU factorization of original matrix a
|
||||||
|
info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, n, n, a, n, ipiv);
|
||||||
|
|
||||||
|
if (info != 0) {
|
||||||
|
cout << "gaussj: Singular Matrix (dgetrf info=" << info << ")" << endl;
|
||||||
|
delete[] ipiv;
|
||||||
|
delete[] a_copy;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then compute inverse from LU factorization
|
||||||
|
info = LAPACKE_dgetri(LAPACK_ROW_MAJOR, n, a, n, ipiv);
|
||||||
|
|
||||||
|
if (info != 0) {
|
||||||
|
cout << "gaussj: Singular Matrix (dgetri info=" << info << ")" << endl;
|
||||||
|
delete[] ipiv;
|
||||||
|
delete[] a_copy;
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
delete[] indxc;
|
|
||||||
delete[] indxr;
|
|
||||||
delete[] ipiv;
|
delete[] ipiv;
|
||||||
|
delete[] a_copy;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,22 +2,67 @@
|
|||||||
|
|
||||||
include makefile.inc
|
include makefile.inc
|
||||||
|
|
||||||
|
-include AMSS_NCKU_build.mk
|
||||||
|
|
||||||
|
ABE_TYPE ?= $(shell awk '/^[[:space:]]*\#define[[:space:]]+ABEtype/ {print $$3; exit}' macrodef.h 2>/dev/null)
|
||||||
|
|
||||||
|
ifeq ($(USE_TRANSFER_CACHE),auto)
|
||||||
|
ifeq ($(ABE_TYPE),0)
|
||||||
|
EFFECTIVE_USE_TRANSFER_CACHE = 1
|
||||||
|
else
|
||||||
|
EFFECTIVE_USE_TRANSFER_CACHE = 0
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
EFFECTIVE_USE_TRANSFER_CACHE = $(USE_TRANSFER_CACHE)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(USE_CXX_ESCALAR_KERNEL),1)
|
||||||
|
ifeq ($(ABE_TYPE),1)
|
||||||
|
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 1
|
||||||
|
else
|
||||||
|
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1)
|
||||||
|
ifeq ($(USE_CXX_KERNELS),0)
|
||||||
|
$(error USE_CXX_ESCALAR_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_escalar_rhs_c.C reuses the C BSSN kernel)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
## polint(ordn=6) kernel selector:
|
## polint(ordn=6) kernel selector:
|
||||||
## 1 (default): barycentric fast path
|
## 1 (default): barycentric fast path
|
||||||
## 0 : fallback to Neville path
|
## 0 : fallback to Neville path
|
||||||
POLINT6_USE_BARY ?= 1
|
POLINT6_USE_BARY ?= 1
|
||||||
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
||||||
|
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
|
||||||
|
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
|
||||||
|
|
||||||
## Legacy GNU/OpenMPI flags
|
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
|
||||||
CXXBASEFLAGS = -O3 -march=native -Wno-deprecated -Dfortran3 -Dnewc $(INTERP_LB_FLAGS)
|
## make -> opt (PGO-guided, maximum performance)
|
||||||
F90BASEFLAGS = -O3 -march=native -cpp -fallow-argument-mismatch $(POLINT6_FLAG)
|
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data)
|
||||||
|
PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
||||||
|
|
||||||
ifeq ($(PGO_MODE),instrument)
|
ifeq ($(PGO_MODE),instrument)
|
||||||
CXXAPPFLAGS = $(CXXBASEFLAGS)
|
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
||||||
f90appflags = $(F90BASEFLAGS)
|
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||||
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||||
|
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
|
||||||
|
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||||
|
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||||
else
|
else
|
||||||
CXXAPPFLAGS = $(CXXBASEFLAGS)
|
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
||||||
f90appflags = $(F90BASEFLAGS)
|
## PGO has been turned off, now tested and found to be negative optimization
|
||||||
|
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
||||||
|
|
||||||
|
|
||||||
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||||
|
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
|
||||||
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
|
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
.SUFFIXES: .o .f90 .C .for .cu
|
.SUFFIXES: .o .f90 .C .for .cu
|
||||||
@@ -57,13 +102,16 @@ lopsided_kodis_c.o: lopsided_kodis_c.C
|
|||||||
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||||
|
|
||||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||||
TP_OPTFLAGS = $(CXXBASEFLAGS) $(TP_OPENMP_FLAGS)
|
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||||
|
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
|
-fprofile-instr-use=$(TP_PROFDATA) \
|
||||||
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||||
|
|
||||||
TwoPunctures.o: TwoPunctures.C
|
TwoPunctures.o: TwoPunctures.C
|
||||||
${CXX} $(TP_OPTFLAGS) -c $< -o $@
|
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
||||||
|
|
||||||
TwoPunctureABE.o: TwoPunctureABE.C
|
TwoPunctureABE.o: TwoPunctureABE.C
|
||||||
${CXX} $(TP_OPTFLAGS) -c $< -o $@
|
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
||||||
|
|
||||||
# Input files
|
# Input files
|
||||||
|
|
||||||
@@ -72,8 +120,11 @@ ifeq ($(USE_CXX_KERNELS),0)
|
|||||||
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
||||||
CFILES =
|
CFILES =
|
||||||
else
|
else
|
||||||
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
# C++ mode (default): C rewrite of bssn/bssn-escalar rhs and helper kernels
|
||||||
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||||
|
ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1)
|
||||||
|
CFILES += bssn_escalar_rhs_c.o
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
||||||
@@ -171,7 +222,7 @@ ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILE
|
|||||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||||
|
|
||||||
TwoPunctureABE: $(TwoPunctureFILES)
|
TwoPunctureABE: $(TwoPunctureFILES)
|
||||||
$(CLINKER) $(TP_OPTFLAGS) -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||||
|
|||||||
68
AMSS_NCKU_source/makefile.inc
Normal file → Executable file
68
AMSS_NCKU_source/makefile.inc
Normal file → Executable file
@@ -1,27 +1,33 @@
|
|||||||
## Legacy GNU/OpenMPI toolchain configuration
|
## GCC version (commented out)
|
||||||
|
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
||||||
|
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
||||||
|
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
|
||||||
|
|
||||||
## OpenMPI wrappers are installed but may not be on PATH.
|
## Intel oneAPI version with oneMKL (Optimized for performance)
|
||||||
OMPI_BIN ?= /usr/lib64/openmpi/bin
|
filein = -I/usr/include/ -I${MKLROOT}/include
|
||||||
|
|
||||||
## Wrapper compilers
|
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
||||||
f90 = $(OMPI_BIN)/mpifort
|
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
||||||
f77 = $(OMPI_BIN)/mpifort
|
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
|
||||||
CXX = $(OMPI_BIN)/mpicxx
|
|
||||||
CC = $(OMPI_BIN)/mpicc
|
|
||||||
CLINKER = $(OMPI_BIN)/mpicxx
|
|
||||||
|
|
||||||
## Extra include flags are not needed when using the OpenMPI wrappers.
|
## Memory allocator switch
|
||||||
filein =
|
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
|
||||||
|
## 0 : use system default allocator (ptmalloc)
|
||||||
|
USE_TBBMALLOC ?= 1
|
||||||
|
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
|
||||||
|
ifneq ($(wildcard $(TBBMALLOC_SO)),)
|
||||||
|
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
|
||||||
|
else
|
||||||
|
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
|
||||||
|
endif
|
||||||
|
ifeq ($(USE_TBBMALLOC),1)
|
||||||
|
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
|
||||||
|
endif
|
||||||
|
|
||||||
## BLAS/LAPACK backend:
|
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
|
||||||
## OpenBLAS on this system provides BLAS, CBLAS and LAPACK symbols.
|
## opt : (default) maximum performance with PGO profile-guided optimization
|
||||||
BLAS_LAPACK_LIB ?= /lib64/libopenblaso.so.0
|
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
|
||||||
LDLIBS = $(BLAS_LAPACK_LIB) -lgfortran -lpthread -lm -ldl
|
PGO_MODE ?= opt
|
||||||
|
|
||||||
## PGO build mode switch
|
|
||||||
## off : default legacy GNU build without PGO
|
|
||||||
## instrument : accepted for compatibility, currently same as off
|
|
||||||
PGO_MODE ?= off
|
|
||||||
|
|
||||||
## Interp_Points load balance profiling mode
|
## Interp_Points load balance profiling mode
|
||||||
## off : (default) no load balance instrumentation
|
## off : (default) no load balance instrumentation
|
||||||
@@ -42,14 +48,30 @@ endif
|
|||||||
## 0 : fall back to original Fortran kernels
|
## 0 : fall back to original Fortran kernels
|
||||||
USE_CXX_KERNELS ?= 1
|
USE_CXX_KERNELS ?= 1
|
||||||
|
|
||||||
|
## BSSN-EScalar RHS switch
|
||||||
|
## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path
|
||||||
|
## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
|
||||||
|
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
|
||||||
|
USE_CXX_ESCALAR_KERNEL ?= 1
|
||||||
|
|
||||||
|
## Cached transfer switch
|
||||||
|
## auto (default): enable for BSSN vacuum, keep other paths on the safe uncached path
|
||||||
|
## 1 : force cached Sync/Restrict/OutBd transfer on evolution hot paths
|
||||||
|
## 0 : force the original uncached transfer path
|
||||||
|
USE_TRANSFER_CACHE ?= auto
|
||||||
|
|
||||||
## RK4 kernel implementation switch
|
## RK4 kernel implementation switch
|
||||||
## 1 (default) : use C/C++ rewrite of rungekutta4_rout
|
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
||||||
## 0 : use original Fortran rungekutta4_rout.o
|
## 0 : use original Fortran rungekutta4_rout.o
|
||||||
USE_CXX_RK4 ?= 1
|
USE_CXX_RK4 ?= 1
|
||||||
|
|
||||||
## OpenMP is only used for TwoPunctures on the legacy toolchain.
|
f90 = ifx
|
||||||
TP_OPENMP_FLAGS ?= -fopenmp
|
f77 = ifx
|
||||||
|
CXX = icpx
|
||||||
|
CC = icx
|
||||||
|
CLINKER = mpiicpx
|
||||||
|
|
||||||
Cu = nvcc
|
Cu = nvcc
|
||||||
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
||||||
|
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
||||||
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
||||||
|
|||||||
211
BSSN_BUILD_CONFIG_MIGRATION.md
Normal file
211
BSSN_BUILD_CONFIG_MIGRATION.md
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
# BSSN Build Config Migration
|
||||||
|
|
||||||
|
This note records the build-configuration fix needed when replacing
|
||||||
|
`AMSS_NCKU_Input.py` or `generate_macrodef.py` with a newer upstream version.
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
`AMSS_NCKU_source/macrodef.h` is not the authoritative file used by normal
|
||||||
|
runs. `AMSS_NCKU_Program.py` first generates macro files under
|
||||||
|
`input_data.File_directory`, copies `AMSS_NCKU_source` to
|
||||||
|
`<File_directory>/AMSS_NCKU_source_copy`, then copies the generated macro files
|
||||||
|
into that copied source tree and compiles there.
|
||||||
|
|
||||||
|
Therefore, makefile logic must not depend only on the stale
|
||||||
|
`AMSS_NCKU_source/macrodef.h`. The actual equation path must be passed to the
|
||||||
|
copied build tree from the same generation step that creates `macrodef.h`.
|
||||||
|
|
||||||
|
The performance regression was caused by compiling/linking the
|
||||||
|
`BSSN-EScalar` C wrapper into BSSN vacuum builds. For BSSN vacuum (`ABEtype=0`),
|
||||||
|
the build must use:
|
||||||
|
|
||||||
|
```make
|
||||||
|
BSSN_USE_TRANSFER_CACHE=1
|
||||||
|
BSSN_USE_ESCALAR_C_KERNEL=0
|
||||||
|
```
|
||||||
|
|
||||||
|
and must not link `bssn_escalar_rhs_c.o`.
|
||||||
|
|
||||||
|
## Required Migration Steps
|
||||||
|
|
||||||
|
### 1. Add an ABE type helper in `generate_macrodef.py`
|
||||||
|
|
||||||
|
Add a helper that maps `input_data.Equation_Class` to the numeric `ABEtype`.
|
||||||
|
Use the same mapping as `macrodef.h`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def get_abe_type():
|
||||||
|
if ( input_data.Equation_Class == "BSSN" ):
|
||||||
|
return 0
|
||||||
|
elif ( input_data.Equation_Class == "BSSN-EScalar" ):
|
||||||
|
return 1
|
||||||
|
elif ( input_data.Equation_Class == "BSSN-EM" ):
|
||||||
|
return 3
|
||||||
|
elif ( input_data.Equation_Class == "Z4C" ):
|
||||||
|
return 2
|
||||||
|
else:
|
||||||
|
raise ValueError("Equation_Class setting error!!!")
|
||||||
|
```
|
||||||
|
|
||||||
|
Update `generate_macrodef_h()` to print `#define ABEtype {get_abe_type()}`
|
||||||
|
instead of duplicating the if/elif mapping.
|
||||||
|
|
||||||
|
### 2. Generate a makefile fragment
|
||||||
|
|
||||||
|
In `generate_macrodef.py`, add:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def generate_build_config():
|
||||||
|
file1 = open(os.path.join(input_data.File_directory, "AMSS_NCKU_build.mk"), "w")
|
||||||
|
print("# Generated by generate_macrodef.py; do not edit manually.", file=file1)
|
||||||
|
print(f"ABE_TYPE := {get_abe_type()}", file=file1)
|
||||||
|
file1.close()
|
||||||
|
```
|
||||||
|
|
||||||
|
This file is the build-time authority for the equation path.
|
||||||
|
|
||||||
|
### 3. Call and copy the generated build config
|
||||||
|
|
||||||
|
In `AMSS_NCKU_Program.py`, after generating `macrodef.h` and `macrodef.fh`, call:
|
||||||
|
|
||||||
|
```python
|
||||||
|
generate_macrodef.generate_build_config()
|
||||||
|
print(" AMSS-NCKU build config AMSS_NCKU_build.mk has been generated. ")
|
||||||
|
```
|
||||||
|
|
||||||
|
When copying generated files into `AMSS_NCKU_source_copy`, also copy:
|
||||||
|
|
||||||
|
```python
|
||||||
|
build_config_path = os.path.join(File_directory, "AMSS_NCKU_build.mk")
|
||||||
|
shutil.copy2(build_config_path, AMSS_NCKU_source_copy)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Make the source makefile consume the generated config
|
||||||
|
|
||||||
|
At the top of `AMSS_NCKU_source/makefile`, after `include makefile.inc`, add:
|
||||||
|
|
||||||
|
```make
|
||||||
|
-include AMSS_NCKU_build.mk
|
||||||
|
|
||||||
|
ABE_TYPE ?= $(shell awk '/^[[:space:]]*\#define[[:space:]]+ABEtype/ {print $$3; exit}' macrodef.h 2>/dev/null)
|
||||||
|
```
|
||||||
|
|
||||||
|
The generated `AMSS_NCKU_build.mk` is used during normal Python-driven builds.
|
||||||
|
The fallback keeps manual source-tree builds usable.
|
||||||
|
|
||||||
|
### 5. Gate path-specific build options by `ABE_TYPE`
|
||||||
|
|
||||||
|
Use effective build switches:
|
||||||
|
|
||||||
|
```make
|
||||||
|
ifeq ($(USE_TRANSFER_CACHE),auto)
|
||||||
|
ifeq ($(ABE_TYPE),0)
|
||||||
|
EFFECTIVE_USE_TRANSFER_CACHE = 1
|
||||||
|
else
|
||||||
|
EFFECTIVE_USE_TRANSFER_CACHE = 0
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
EFFECTIVE_USE_TRANSFER_CACHE = $(USE_TRANSFER_CACHE)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(USE_CXX_ESCALAR_KERNEL),1)
|
||||||
|
ifeq ($(ABE_TYPE),1)
|
||||||
|
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 1
|
||||||
|
else
|
||||||
|
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
EFFECTIVE_USE_CXX_ESCALAR_KERNEL = 0
|
||||||
|
endif
|
||||||
|
|
||||||
|
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
|
||||||
|
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
|
||||||
|
```
|
||||||
|
|
||||||
|
Only add `bssn_escalar_rhs_c.o` when the effective EScalar C kernel switch is
|
||||||
|
enabled:
|
||||||
|
|
||||||
|
```make
|
||||||
|
ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1)
|
||||||
|
CFILES += bssn_escalar_rhs_c.o
|
||||||
|
endif
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Use safe transfer-cache default
|
||||||
|
|
||||||
|
In `AMSS_NCKU_source/makefile.inc`, keep:
|
||||||
|
|
||||||
|
```make
|
||||||
|
USE_TRANSFER_CACHE ?= auto
|
||||||
|
```
|
||||||
|
|
||||||
|
With the effective switch logic above, this enables cached transfer for BSSN
|
||||||
|
vacuum while keeping non-BSSN paths on the uncached path by default.
|
||||||
|
|
||||||
|
## Verification Checklist
|
||||||
|
|
||||||
|
Run these checks after migrating:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -c "import generate_macrodef; generate_macrodef.generate_build_config()"
|
||||||
|
cat GW150914/AMSS_NCKU_build.mk
|
||||||
|
```
|
||||||
|
|
||||||
|
For BSSN, the generated file should contain:
|
||||||
|
|
||||||
|
```make
|
||||||
|
ABE_TYPE := 0
|
||||||
|
```
|
||||||
|
|
||||||
|
Dry-run the copied or source makefile:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make -n -B INTERP_LB_MODE=off ABE | grep -E 'BSSN_USE_TRANSFER_CACHE|BSSN_USE_ESCALAR_C_KERNEL|bssn_escalar_rhs_c'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected BSSN result:
|
||||||
|
|
||||||
|
```text
|
||||||
|
-DBSSN_USE_TRANSFER_CACHE=1 -DBSSN_USE_ESCALAR_C_KERNEL=0
|
||||||
|
```
|
||||||
|
|
||||||
|
and no `bssn_escalar_rhs_c.o` in the final link command.
|
||||||
|
|
||||||
|
Run the full workflow:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 AMSS_NCKU_Program.py
|
||||||
|
```
|
||||||
|
|
||||||
|
For the 10-step BSSN test, compare coordinate output:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 - <<'PY'
|
||||||
|
from pathlib import Path
|
||||||
|
old = Path('../GW150914-06457/AMSS_NCKU_output/bssn_BH.dat')
|
||||||
|
new = Path('GW150914/AMSS_NCKU_output/bssn_BH.dat')
|
||||||
|
|
||||||
|
def rows(path):
|
||||||
|
out = []
|
||||||
|
for line in path.read_text().splitlines():
|
||||||
|
if not line.strip() or line.lstrip().startswith('#'):
|
||||||
|
continue
|
||||||
|
out.append([float(x) for x in line.split()])
|
||||||
|
return out
|
||||||
|
|
||||||
|
ro, rn = rows(old), rows(new)
|
||||||
|
n = min(len(ro), len(rn))
|
||||||
|
max_abs = 0.0
|
||||||
|
for i in range(n):
|
||||||
|
for a, b in zip(ro[i], rn[i]):
|
||||||
|
max_abs = max(max_abs, abs(a - b))
|
||||||
|
print(f"old_rows={len(ro)} new_rows={len(rn)} compared_rows={n}")
|
||||||
|
print(f"max_abs_diff={max_abs:.17g}")
|
||||||
|
PY
|
||||||
|
```
|
||||||
|
|
||||||
|
For the validated migration, the first 10 rows matched exactly:
|
||||||
|
|
||||||
|
```text
|
||||||
|
max_abs_diff=0
|
||||||
|
```
|
||||||
@@ -97,9 +97,7 @@ Here, we take the Ubuntu 22.04 system as an example
|
|||||||
|
|
||||||
Modify the makefile.inc file in the AMSS_NCKU_source directory and change the settings according to your computer.
|
Modify the makefile.inc file in the AMSS_NCKU_source directory and change the settings according to your computer.
|
||||||
|
|
||||||
The default configuration in this branch uses GNU compilers through the OpenMPI wrappers under `/usr/lib64/openmpi/bin`.
|
The settings for the Ubuntu 22.04 system do not need to be modified.
|
||||||
|
|
||||||
If your OpenMPI installation is in another location, update `OMPI_BIN` in `AMSS_NCKU_source/makefile.inc` or export `AMSS_OPENMPI_BIN` before running the Python launcher.
|
|
||||||
|
|
||||||
1. Enter the AMSS-NCKU Python code folder and modify the input.
|
1. Enter the AMSS-NCKU Python code folder and modify the input.
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,37 @@ import os
|
|||||||
import AMSS_NCKU_Input as input_data ## import program input file
|
import AMSS_NCKU_Input as input_data ## import program input file
|
||||||
|
|
||||||
|
|
||||||
|
##################################################################
|
||||||
|
|
||||||
|
def get_abe_type():
|
||||||
|
if ( input_data.Equation_Class == "BSSN" ):
|
||||||
|
return 0
|
||||||
|
elif ( input_data.Equation_Class == "BSSN-EScalar" ):
|
||||||
|
return 1
|
||||||
|
elif ( input_data.Equation_Class == "BSSN-EM" ):
|
||||||
|
return 3
|
||||||
|
elif ( input_data.Equation_Class == "Z4C" ):
|
||||||
|
return 2
|
||||||
|
else:
|
||||||
|
raise ValueError("Equation_Class setting error!!!")
|
||||||
|
|
||||||
|
|
||||||
|
##################################################################
|
||||||
|
|
||||||
|
## Generate the makefile fragment used by the copied source tree.
|
||||||
|
## The source-tree macrodef.h is not authoritative because macro files
|
||||||
|
## are regenerated under File_directory for each run.
|
||||||
|
|
||||||
|
def generate_build_config():
|
||||||
|
|
||||||
|
file1 = open( os.path.join(input_data.File_directory, "AMSS_NCKU_build.mk"), "w")
|
||||||
|
|
||||||
|
print( "# Generated by generate_macrodef.py; do not edit manually.", file=file1 )
|
||||||
|
print( f"ABE_TYPE := {get_abe_type()}", file=file1 )
|
||||||
|
|
||||||
|
file1.close()
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
## Generate the macro file macrodef.h according to user settings
|
## Generate the macro file macrodef.h according to user settings
|
||||||
@@ -58,19 +89,10 @@ def generate_macrodef_h():
|
|||||||
# 2: Z4c vacuum
|
# 2: Z4c vacuum
|
||||||
# 3: coupled to Maxwell field
|
# 3: coupled to Maxwell field
|
||||||
|
|
||||||
if ( input_data.Equation_Class == "BSSN" ):
|
try:
|
||||||
print( "#define ABEtype 0", file=file1 )
|
print( f"#define ABEtype {get_abe_type()}", file=file1 )
|
||||||
print( file=file1 )
|
print( file=file1 )
|
||||||
elif ( input_data.Equation_Class == "BSSN-EScalar" ):
|
except ValueError:
|
||||||
print( "#define ABEtype 1", file=file1 )
|
|
||||||
print( file=file1 )
|
|
||||||
elif ( input_data.Equation_Class == "BSSN-EM" ):
|
|
||||||
print( "#define ABEtype 3", file=file1 )
|
|
||||||
print( file=file1 )
|
|
||||||
elif ( input_data.Equation_Class == "Z4C" ):
|
|
||||||
print( "#define ABEtype 2", file=file1 )
|
|
||||||
print( file=file1 )
|
|
||||||
else:
|
|
||||||
print( "Equation_Class setting error!!!" )
|
print( "Equation_Class setting error!!!" )
|
||||||
print()
|
print()
|
||||||
print( "# Equation type #define ABEtype setting error!!!", file=file1 )
|
print( "# Equation type #define ABEtype setting error!!!", file=file1 )
|
||||||
|
|||||||
@@ -9,7 +9,6 @@
|
|||||||
|
|
||||||
|
|
||||||
import AMSS_NCKU_Input as input_data
|
import AMSS_NCKU_Input as input_data
|
||||||
import os
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@@ -53,8 +52,6 @@ NUMACTL_CPU_BIND = get_last_n_cores_per_socket(n=32)
|
|||||||
|
|
||||||
## Build parallelism: match the number of bound cores
|
## Build parallelism: match the number of bound cores
|
||||||
BUILD_JOBS = 64
|
BUILD_JOBS = 64
|
||||||
OPENMPI_BIN = os.environ.get("AMSS_OPENMPI_BIN", "/usr/lib64/openmpi/bin")
|
|
||||||
MPI_RUNNER = os.path.join(OPENMPI_BIN, "mpirun")
|
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
@@ -150,11 +147,11 @@ def run_ABE():
|
|||||||
## Define the command to run; cast other values to strings as needed
|
## Define the command to run; cast other values to strings as needed
|
||||||
|
|
||||||
if (input_data.GPU_Calculation == "no"):
|
if (input_data.GPU_Calculation == "no"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " " + MPI_RUNNER + " -np " + str(input_data.MPI_processes) + " ./ABE"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
mpi_command_outfile = "ABE_out.log"
|
mpi_command_outfile = "ABE_out.log"
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " " + MPI_RUNNER + " -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||||
mpi_command_outfile = "ABEGPU_out.log"
|
mpi_command_outfile = "ABEGPU_out.log"
|
||||||
|
|
||||||
## Execute the MPI command and stream output
|
## Execute the MPI command and stream output
|
||||||
|
|||||||
Reference in New Issue
Block a user