Compare commits

..

5 Commits

23 changed files with 2101 additions and 1803 deletions

7
.gitignore vendored
View File

@@ -1,6 +1,5 @@
__pycache__
GW150914
GW150914-origin
docs
*.tmp
GW150914*
.codex
docs/

View File

@@ -9,6 +9,16 @@
##################################################################
##################################################################
## Guard against re-execution by multiprocessing child processes.
## Without this, using 'spawn' or 'forkserver' context would cause every
## worker to re-run the entire script.
if __name__ != '__main__':
import sys as _sys
_sys.exit(0)
##################################################################
## Print program introduction
@@ -424,26 +434,31 @@ print(
import plot_xiaoqu
import plot_GW_strain_amplitude_xiaoqu
from parallel_plot_helper import run_plot_tasks_parallel
plot_tasks = []
## Plot black hole trajectory
plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory )
plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory) ) )
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) )
## Plot black hole separation vs. time
plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) )
## Plot gravitational waveforms (psi4 and strain amplitude)
for i in range(input_data.Detector_Number):
plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) )
plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) )
## Plot ADM mass evolution
for i in range(input_data.Detector_Number):
plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) )
## Plot Hamiltonian constraint violation over time
for i in range(input_data.grid_level):
plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) )
run_plot_tasks_parallel(plot_tasks)
## Plot stored binary data
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )

View File

@@ -1,279 +0,0 @@
#!/usr/bin/env python3
"""
AMSS-NCKU GW150914 Simulation Regression Test Script
Verification Requirements:
1. XY-plane trajectory RMS error < 1% (Optimized vs. baseline, max of BH1 and BH2)
2. ADM constraint violation < 2 (Grid Level 0)
RMS Calculation Method:
- Computes trajectory deviation on the XY plane independently for BH1 and BH2
- For each black hole: RMS = sqrt((1/M) * sum((Δr_i / r_i^max)^2)) × 100%
- Final RMS = max(RMS_BH1, RMS_BH2)
Usage: python3 AMSS_NCKU_Verify_ASC26.py [output_dir]
Default: output_dir = GW150914/AMSS_NCKU_output
Reference: GW150914-origin (baseline simulation)
"""
import numpy as np
import sys
import os
# ANSI Color Codes
class Color:
GREEN = '\033[92m'
RED = '\033[91m'
YELLOW = '\033[93m'
BLUE = '\033[94m'
BOLD = '\033[1m'
RESET = '\033[0m'
def get_status_text(passed):
if passed:
return f"{Color.GREEN}{Color.BOLD}PASS{Color.RESET}"
else:
return f"{Color.RED}{Color.BOLD}FAIL{Color.RESET}"
def load_bh_trajectory(filepath):
"""Load black hole trajectory data"""
data = np.loadtxt(filepath)
return {
'time': data[:, 0],
'x1': data[:, 1], 'y1': data[:, 2], 'z1': data[:, 3],
'x2': data[:, 4], 'y2': data[:, 5], 'z2': data[:, 6]
}
def load_constraint_data(filepath):
"""Load constraint violation data"""
data = []
with open(filepath, 'r') as f:
for line in f:
if line.startswith('#'):
continue
parts = line.split()
if len(parts) >= 8:
data.append([float(x) for x in parts[:8]])
return np.array(data)
def calculate_rms_error(bh_data_ref, bh_data_target):
"""
Calculate trajectory-based RMS error on the XY plane between baseline and optimized simulations.
This function computes the RMS error independently for BH1 and BH2 trajectories,
then returns the maximum of the two as the final RMS error metric.
For each black hole, the RMS is calculated as:
RMS = sqrt( (1/M) * sum( (Δr_i / r_i^max)^2 ) ) × 100%
where:
Δr_i = sqrt((x_ref,i - x_new,i)^2 + (y_ref,i - y_new,i)^2)
r_i^max = max(sqrt(x_ref,i^2 + y_ref,i^2), sqrt(x_new,i^2 + y_new,i^2))
Args:
bh_data_ref: Reference (baseline) trajectory data
bh_data_target: Target (optimized) trajectory data
Returns:
rms_value: Final RMS error as a percentage (max of BH1 and BH2)
error: Error message if any
"""
# Align data: truncate to the length of the shorter dataset
M = min(len(bh_data_ref['time']), len(bh_data_target['time']))
if M < 10:
return None, "Insufficient data points for comparison"
# Extract XY coordinates for both black holes
x1_ref = bh_data_ref['x1'][:M]
y1_ref = bh_data_ref['y1'][:M]
x2_ref = bh_data_ref['x2'][:M]
y2_ref = bh_data_ref['y2'][:M]
x1_new = bh_data_target['x1'][:M]
y1_new = bh_data_target['y1'][:M]
x2_new = bh_data_target['x2'][:M]
y2_new = bh_data_target['y2'][:M]
# Calculate RMS for BH1
delta_r1 = np.sqrt((x1_ref - x1_new)**2 + (y1_ref - y1_new)**2)
r1_ref = np.sqrt(x1_ref**2 + y1_ref**2)
r1_new = np.sqrt(x1_new**2 + y1_new**2)
r1_max = np.maximum(r1_ref, r1_new)
# Calculate RMS for BH2
delta_r2 = np.sqrt((x2_ref - x2_new)**2 + (y2_ref - y2_new)**2)
r2_ref = np.sqrt(x2_ref**2 + y2_ref**2)
r2_new = np.sqrt(x2_new**2 + y2_new**2)
r2_max = np.maximum(r2_ref, r2_new)
# Avoid division by zero for BH1
valid_mask1 = r1_max > 1e-15
if np.sum(valid_mask1) < 10:
return None, "Insufficient valid data points for BH1"
terms1 = (delta_r1[valid_mask1] / r1_max[valid_mask1])**2
rms_bh1 = np.sqrt(np.mean(terms1)) * 100
# Avoid division by zero for BH2
valid_mask2 = r2_max > 1e-15
if np.sum(valid_mask2) < 10:
return None, "Insufficient valid data points for BH2"
terms2 = (delta_r2[valid_mask2] / r2_max[valid_mask2])**2
rms_bh2 = np.sqrt(np.mean(terms2)) * 100
# Final RMS is the maximum of BH1 and BH2
rms_final = max(rms_bh1, rms_bh2)
return rms_final, None
def analyze_constraint_violation(constraint_data, n_levels=9):
"""
Analyze ADM constraint violation
Return maximum constraint violation for Grid Level 0
"""
# Extract Grid Level 0 data (first entry for each time step)
level0_data = constraint_data[::n_levels]
# Calculate maximum absolute value for each constraint
results = {
'Ham': np.max(np.abs(level0_data[:, 1])),
'Px': np.max(np.abs(level0_data[:, 2])),
'Py': np.max(np.abs(level0_data[:, 3])),
'Pz': np.max(np.abs(level0_data[:, 4])),
'Gx': np.max(np.abs(level0_data[:, 5])),
'Gy': np.max(np.abs(level0_data[:, 6])),
'Gz': np.max(np.abs(level0_data[:, 7]))
}
results['max_violation'] = max(results.values())
return results
def print_header():
"""Print report header"""
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
print(Color.BOLD + " AMSS-NCKU GW150914 Simulation Regression Test Report" + Color.RESET)
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
def print_rms_results(rms_rel, error, threshold=1.0):
"""Print RMS error results"""
print(f"\n{Color.BOLD}1. RMS Error Analysis (Baseline vs Optimized){Color.RESET}")
print("-" * 45)
if error:
print(f" {Color.RED}Error: {error}{Color.RESET}")
return False
passed = rms_rel < threshold
print(f" RMS relative error: {rms_rel:.4f}%")
print(f" Requirement: < {threshold}%")
print(f" Status: {get_status_text(passed)}")
return passed
def print_constraint_results(results, threshold=2.0):
"""Print constraint violation results"""
print(f"\n{Color.BOLD}2. ADM Constraint Violation Analysis (Grid Level 0){Color.RESET}")
print("-" * 45)
names = ['Ham', 'Px', 'Py', 'Pz', 'Gx', 'Gy', 'Gz']
for i, name in enumerate(names):
print(f" Max |{name:3}|: {results[name]:.6f}", end=" ")
if (i + 1) % 2 == 0: print()
if len(names) % 2 != 0: print()
passed = results['max_violation'] < threshold
print(f"\n Maximum violation: {results['max_violation']:.6f}")
print(f" Requirement: < {threshold}")
print(f" Status: {get_status_text(passed)}")
return passed
def print_summary(rms_passed, constraint_passed):
"""Print summary"""
print("\n" + Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
print(Color.BOLD + "Verification Summary" + Color.RESET)
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET)
all_passed = rms_passed and constraint_passed
res_rms = get_status_text(rms_passed)
res_con = get_status_text(constraint_passed)
print(f" [1] RMS trajectory check: {res_rms}")
print(f" [2] ADM constraint check: {res_con}")
final_status = f"{Color.GREEN}{Color.BOLD}ALL CHECKS PASSED{Color.RESET}" if all_passed else f"{Color.RED}{Color.BOLD}SOME CHECKS FAILED{Color.RESET}"
print(f"\n Overall result: {final_status}")
print(Color.BLUE + Color.BOLD + "=" * 65 + Color.RESET + "\n")
return all_passed
def main():
# Determine target (optimized) output directory
if len(sys.argv) > 1:
target_dir = sys.argv[1]
else:
script_dir = os.path.dirname(os.path.abspath(__file__))
target_dir = os.path.join(script_dir, "GW150914/AMSS_NCKU_output")
# Determine reference (baseline) directory
script_dir = os.path.dirname(os.path.abspath(__file__))
reference_dir = os.path.join(script_dir, "GW150914-origin/AMSS_NCKU_output")
# Data file paths
bh_file_ref = os.path.join(reference_dir, "bssn_BH.dat")
bh_file_target = os.path.join(target_dir, "bssn_BH.dat")
constraint_file = os.path.join(target_dir, "bssn_constraint.dat")
# Check if files exist
if not os.path.exists(bh_file_ref):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Baseline trajectory file not found: {bh_file_ref}")
sys.exit(1)
if not os.path.exists(bh_file_target):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Target trajectory file not found: {bh_file_target}")
sys.exit(1)
if not os.path.exists(constraint_file):
print(f"{Color.RED}{Color.BOLD}Error:{Color.RESET} Constraint data file not found: {constraint_file}")
sys.exit(1)
# Print header
print_header()
print(f"\n{Color.BOLD}Reference (Baseline):{Color.RESET} {Color.BLUE}{reference_dir}{Color.RESET}")
print(f"{Color.BOLD}Target (Optimized): {Color.RESET} {Color.BLUE}{target_dir}{Color.RESET}")
# Load data
bh_data_ref = load_bh_trajectory(bh_file_ref)
bh_data_target = load_bh_trajectory(bh_file_target)
constraint_data = load_constraint_data(constraint_file)
# Calculate RMS error
rms_rel, error = calculate_rms_error(bh_data_ref, bh_data_target)
rms_passed = print_rms_results(rms_rel, error)
# Analyze constraint violation
constraint_results = analyze_constraint_violation(constraint_data)
constraint_passed = print_constraint_results(constraint_results)
# Print summary
all_passed = print_summary(rms_passed, constraint_passed)
# Return exit code
sys.exit(0 if all_passed else 1)
if __name__ == "__main__":
main()

View File

@@ -37,51 +37,57 @@ close(77)
end program checkFFT
#endif
!-------------
! Optimized FFT using Intel oneMKL DFTI
! Mathematical equivalence: Standard DFT definition
! Forward (isign=1): X[k] = sum_{n=0}^{N-1} x[n] * exp(-2*pi*i*k*n/N)
! Backward (isign=-1): X[k] = sum_{n=0}^{N-1} x[n] * exp(+2*pi*i*k*n/N)
! Input/Output: dataa is interleaved complex array [Re(0),Im(0),Re(1),Im(1),...]
!-------------
SUBROUTINE four1(dataa,nn,isign)
use MKL_DFTI
implicit none
INTEGER, intent(in) :: isign, nn
DOUBLE PRECISION, dimension(2*nn), intent(inout) :: dataa
type(DFTI_DESCRIPTOR), pointer :: desc
integer :: status
! Create DFTI descriptor for 1D complex-to-complex transform
status = DftiCreateDescriptor(desc, DFTI_DOUBLE, DFTI_COMPLEX, 1, nn)
if (status /= 0) return
! Set input/output storage as interleaved complex (default)
status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_INPLACE)
if (status /= 0) then
status = DftiFreeDescriptor(desc)
return
INTEGER::isign,nn
double precision,dimension(2*nn)::dataa
INTEGER::i,istep,j,m,mmax,n
double precision::tempi,tempr
DOUBLE PRECISION::theta,wi,wpi,wpr,wr,wtemp
n=2*nn
j=1
do i=1,n,2
if(j.gt.i)then
tempr=dataa(j)
tempi=dataa(j+1)
dataa(j)=dataa(i)
dataa(j+1)=dataa(i+1)
dataa(i)=tempr
dataa(i+1)=tempi
endif
m=nn
1 if ((m.ge.2).and.(j.gt.m)) then
j=j-m
m=m/2
goto 1
endif
j=j+m
enddo
mmax=2
2 if (n.gt.mmax) then
istep=2*mmax
theta=6.28318530717959d0/(isign*mmax)
wpr=-2.d0*sin(0.5d0*theta)**2
wpi=sin(theta)
wr=1.d0
wi=0.d0
do m=1,mmax,2
do i=m,n,istep
j=i+mmax
tempr=sngl(wr)*dataa(j)-sngl(wi)*dataa(j+1)
tempi=sngl(wr)*dataa(j+1)+sngl(wi)*dataa(j)
dataa(j)=dataa(i)-tempr
dataa(j+1)=dataa(i+1)-tempi
dataa(i)=dataa(i)+tempr
dataa(i+1)=dataa(i+1)+tempi
enddo
wtemp=wr
wr=wr*wpr-wi*wpi+wr
wi=wi*wpr+wtemp*wpi+wi
enddo
mmax=istep
goto 2
endif
! Commit the descriptor
status = DftiCommitDescriptor(desc)
if (status /= 0) then
status = DftiFreeDescriptor(desc)
return
endif
! Execute FFT based on direction
if (isign == 1) then
! Forward FFT: exp(-2*pi*i*k*n/N)
status = DftiComputeForward(desc, dataa)
else
! Backward FFT: exp(+2*pi*i*k*n/N)
status = DftiComputeBackward(desc, dataa)
endif
! Free descriptor
status = DftiFreeDescriptor(desc)
return
END SUBROUTINE four1

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,8 @@
#ifndef TWO_PUNCTURES_H
#define TWO_PUNCTURES_H
#include <omp.h>
#define StencilSize 19
#define N_PlaneRelax 1
#define NRELAX 200
@@ -42,32 +43,17 @@ private:
int ntotal;
// Pre-allocated workspace buffers for hot-path allocation elimination
// LineRelax_be workspace (sized for n2)
double *ws_diag_be, *ws_e_be, *ws_f_be, *ws_b_be, *ws_x_be;
// LineRelax_al workspace (sized for n1)
double *ws_diag_al, *ws_e_al, *ws_f_al, *ws_b_al, *ws_x_al;
// ThomasAlgorithm workspace (sized for max(n1,n2))
double *ws_thomas_y;
// JFD_times_dv workspace (sized for nvar)
double *ws_jfd_values;
derivs ws_jfd_dU, ws_jfd_U;
// chebft_Zeros workspace (sized for max(n1,n2,n3)+1)
double *ws_cheb_c;
// fourft workspace (sized for max(n1,n2,n3)/2+1 each)
double *ws_four_a, *ws_four_b;
// Derivatives_AB3 workspace
double *ws_deriv_p, *ws_deriv_dp, *ws_deriv_d2p;
double *ws_deriv_q, *ws_deriv_dq;
double *ws_deriv_r, *ws_deriv_dr;
int *ws_deriv_indx;
// F_of_v workspace
double *ws_fov_sources;
double *ws_fov_values;
derivs ws_fov_U;
// J_times_dv workspace
double *ws_jtdv_values;
derivs ws_jtdv_dU, ws_jtdv_U;
// ===== Precomputed spectral derivative matrices =====
double *D1_A, *D2_A;
double *D1_B, *D2_B;
double *DF1_phi, *DF2_phi;
// ===== Pre-allocated workspace for LineRelax (per-thread) =====
int max_threads;
double **ws_diag_be, **ws_e_be, **ws_f_be, **ws_b_be, **ws_x_be;
double **ws_l_be, **ws_u_be, **ws_d_be, **ws_y_be;
double **ws_diag_al, **ws_e_al, **ws_f_al, **ws_b_al, **ws_x_al;
double **ws_l_al, **ws_u_al, **ws_d_al, **ws_y_al;
struct parameters
{
@@ -85,6 +71,28 @@ public:
int Newtonmaxit);
~TwoPunctures();
// 02/07: New/modified methods
void allocate_workspace();
void free_workspace();
void precompute_derivative_matrices();
void build_cheb_deriv_matrices(int n, double *D1, double *D2);
void build_fourier_deriv_matrices(int N, double *DF1, double *DF2);
void Derivatives_AB3_MatMul(int nvar, int n1, int n2, int n3, derivs v);
void ThomasAlgorithm_ws(int N, double *b, double *a, double *c, double *x, double *q,
double *l, double *u_ws, double *d, double *y);
void LineRelax_be_omp(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD, int tid);
void LineRelax_al_omp(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD, int tid);
void relax_omp(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void Solve();
void set_initial_guess(derivs v);
int index(int i, int j, int k, int l, int a, int b, int c, int d);
@@ -143,23 +151,11 @@ public:
double BY_KKofxyz(double x, double y, double z);
void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix);
void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u);
void relax(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void LineRelax_be(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD);
void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2,
int n3, derivs dv, derivs u, double *values);
void LinEquations(double A, double B, double X, double R,
double x, double r, double phi,
double y, double z, derivs dU, derivs U, double *values);
void LineRelax_al(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD);
void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q);
void Save(char *fname);
// provided by Vasileios Paschalidis (vpaschal@illinois.edu)

View File

@@ -83,10 +83,6 @@
real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
! shared work arrays (memory pool) to avoid repeated allocation in subroutines
real*8, dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh_work2 ! for fderivs_fh/fdderivs_fh (ghost_width=2)
real*8, dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh_work3 ! for kodis_fh/lopsided_fh (ghost_width=3)
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
real*8 :: dX, dY, dZ, PI
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
@@ -110,8 +106,7 @@
call getpbh(BHN,Porg,Mass)
#endif
!!! sanity check (disabled in production builds for performance)
#ifdef DEBUG
!!! sanity check
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
@@ -141,7 +136,6 @@
gont = 1
return
endif
#endif
PI = dacos(-ONE)
@@ -155,22 +149,22 @@
gyy = dyy + ONE
gzz = dzz + ONE
call fderivs_fh(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev,fh_work2)
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
div_beta = betaxx + betayy + betazz
call fderivs_fh(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev,fh_work2)
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
call fderivs_fh(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev,fh_work2)
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
@@ -288,8 +282,8 @@
(gupyy * gupzz + gupyz * gupyz)* Ayz
! Right hand side for Gam^i without shift terms...
call fderivs_fh(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev,fh_work2)
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
TWO * alpn1 * ( &
@@ -318,12 +312,12 @@
Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
call fdderivs_fh(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev,fh_work2)
call fdderivs_fh(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,&
X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev,fh_work2)
call fdderivs_fh(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev,fh_work2)
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
call fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,&
X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
fxx = gxxx + gxyy + gxzz
fxy = gxyx + gyyy + gyzz
@@ -336,9 +330,9 @@
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
call fderivs_fh(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev,fh_work2)
call fderivs_fh(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev,fh_work2)
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
@@ -381,27 +375,27 @@
gzzz = gxz * Gamxzz + gyz * Gamyzz + gzz * Gamzzz
!compute Ricci tensor for tilted metric
call fdderivs_fh(ex,dxx,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev,fh_work2)
call fdderivs(ex,dxx,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev)
Rxx = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
call fdderivs_fh(ex,dyy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev,fh_work2)
call fdderivs(ex,dyy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev)
Ryy = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
call fdderivs_fh(ex,dzz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev,fh_work2)
call fdderivs(ex,dzz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev)
Rzz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
call fdderivs_fh(ex,gxy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI, ANTI,SYM ,symmetry,Lev,fh_work2)
call fdderivs(ex,gxy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI, ANTI,SYM ,symmetry,Lev)
Rxy = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
call fdderivs_fh(ex,gxz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI ,SYM ,ANTI,symmetry,Lev,fh_work2)
call fdderivs(ex,gxz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI ,SYM ,ANTI,symmetry,Lev)
Rxz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
call fdderivs_fh(ex,gyz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,ANTI ,ANTI,symmetry,Lev,fh_work2)
call fdderivs(ex,gyz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,ANTI ,ANTI,symmetry,Lev)
Ryz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
@@ -606,7 +600,7 @@
Gamxzz * gxzy + Gamyzz * gyzy + Gamzzz * gzzy + &
Gamxyz * gzzx + Gamyyz * gzzy + Gamzyz * gzzz )
!covariant second derivative of chi respect to tilted metric
call fdderivs_fh(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
@@ -632,8 +626,8 @@
Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
! covariant second derivatives of the lapse respect to physical metric
call fdderivs_fh(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
SYM,SYM,SYM,symmetry,Lev,fh_work2)
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
SYM,SYM,SYM,symmetry,Lev)
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
@@ -816,7 +810,7 @@
betay_rhs = FF*dtSfy
betaz_rhs = FF*dtSfz
call fderivs_fh(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
call fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-dsqrt(chin1))**2
@@ -828,7 +822,7 @@
betay_rhs = FF*dtSfy
betaz_rhs = FF*dtSfz
call fderivs_fh(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
call fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-chin1)**2
@@ -836,7 +830,7 @@
dtSfy_rhs = Gamy_rhs - reta*dtSfy
dtSfz_rhs = Gamz_rhs - reta*dtSfz
#elif (GAUGE == 4)
call fderivs_fh(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
call fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-dsqrt(chin1))**2
@@ -848,7 +842,7 @@
dtSfy_rhs = ZEO
dtSfz_rhs = ZEO
#elif (GAUGE == 5)
call fderivs_fh(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
call fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-chin1)**2
@@ -951,51 +945,51 @@
!!!!!!!!!advection term part
call lopsided_fh(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided_fh(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,fh_work3)
call lopsided_fh(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,fh_work3)
call lopsided_fh(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided_fh(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,fh_work3)
call lopsided_fh(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS)
call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA)
call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA)
call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_fh(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided_fh(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,fh_work3)
call lopsided_fh(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,fh_work3)
call lopsided_fh(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided_fh(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,fh_work3)
call lopsided_fh(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS)
call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA)
call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA)
call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_fh(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided_fh(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided_fh(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,fh_work3)
call lopsided_fh(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,fh_work3)
call lopsided_fh(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,fh_work3)
call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA)
!!
call lopsided_fh(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
call lopsided_fh(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,fh_work3)
call lopsided_fh(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,fh_work3)
call lopsided_fh(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,fh_work3)
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call lopsided_fh(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,fh_work3)
call lopsided_fh(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,fh_work3)
call lopsided_fh(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,fh_work3)
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif
if(eps>0)then
! usual Kreiss-Oliger dissipation
call kodis_fh(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps,fh_work3)
call kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
@@ -1009,7 +1003,7 @@ endif
#undef k
!!stop
#endif
call kodis_fh(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps,fh_work3)
call kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
@@ -1023,25 +1017,25 @@ endif
#undef k
!!stop
#endif
call kodis_fh(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps,fh_work3)
call kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps)
#if 1
!! bam does not apply dissipation on gauge variables
call kodis_fh(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps,fh_work3)
call kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps)
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call kodis_fh(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps,fh_work3)
call kodis_fh(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps,fh_work3)
call kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps)
#endif
#endif
@@ -1082,12 +1076,12 @@ endif
! mov_Res_j = gupkj*(-1/chi d_k chi*A_ij + D_k A_ij) - 2/3 d_j trK - 8 PI s_j where D respect to physical metric
! store D_i A_jk - 1/chi d_i chi*A_jk in gjk_i
call fderivs_fh(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0,fh_work2)
call fderivs_fh(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0,fh_work2)
call fderivs_fh(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0,fh_work2)
call fderivs_fh(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0,fh_work2)
call fderivs_fh(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0,fh_work2)
call fderivs_fh(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0,fh_work2)
call fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
call fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0)
call fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0)
call fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
call fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0)
call fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
gxxx = gxxx - ( Gamxxx * Axx + Gamyxx * Axy + Gamzxx * Axz &
+ Gamxxx * Axx + Gamyxx * Axy + Gamzxx * Axz) - chix*Axx/chin1

View File

@@ -1103,103 +1103,6 @@
end subroutine fderivs
!-----------------------------------------------------------------------------
! fderivs variant: reuses caller-provided fh work array (memory pool)
!-----------------------------------------------------------------------------
subroutine fderivs_fh(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3, &
symmetry,onoff,fh)
implicit none
integer, intent(in ):: ex(1:3),symmetry,onoff
real*8, dimension(ex(1),ex(2),ex(3)), intent(in ):: f
real*8, dimension(ex(1),ex(2),ex(3)), intent(out):: fx,fy,fz
real*8, intent(in) :: X(ex(1)),Y(ex(2)),Z(ex(3))
real*8, intent(in ):: SYM1,SYM2,SYM3
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)),intent(inout):: fh
real*8 :: dX,dY,dZ
real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8, parameter :: ZEO=0.d0,ONE=1.d0
real*8, parameter :: TWO=2.d0,EIT=8.d0
real*8, parameter :: F12=1.2d1
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
SoA(1) = SYM1
SoA(2) = SYM2
SoA(3) = SYM3
call symmetry_bd(2,ex,f,fh,SoA)
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
fx = ZEO
fy = ZEO
fz = ZEO
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
#if 0
if(i+2 <= imax .and. i-2 >= imin)then
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
endif
if(j+2 <= jmax .and. j-2 >= jmin)then
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
endif
if(k+2 <= kmax .and. k-2 >= kmin)then
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
endif
#else
if(i+2 <= imax .and. i-2 >= imin .and. &
j+2 <= jmax .and. j-2 >= jmin .and. &
k+2 <= kmax .and. k-2 >= kmin) then
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(i+1 <= imax .and. i-1 >= imin .and. &
j+1 <= jmax .and. j-1 >= jmin .and. &
k+1 <= kmax .and. k-1 >= kmin) then
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
endif
#endif
enddo
enddo
enddo
return
end subroutine fderivs_fh
!-----------------------------------------------------------------------------
!
! single derivatives dx
!
@@ -2037,162 +1940,6 @@
end subroutine fddyz
!-----------------------------------------------------------------------------
! fdderivs variant: reuses caller-provided fh work array (memory pool)
!-----------------------------------------------------------------------------
subroutine fdderivs_fh(ex,f,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
SYM1,SYM2,SYM3,symmetry,onoff,fh)
implicit none
integer, intent(in ):: ex(1:3),symmetry,onoff
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ):: f
real*8, dimension(ex(1),ex(2),ex(3)),intent(out):: fxx,fxy,fxz,fyy,fyz,fzz
real*8, intent(in ):: X(ex(1)),Y(ex(2)),Z(ex(3))
real*8, intent(in ):: SYM1,SYM2,SYM3
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)),intent(inout):: fh
real*8 :: dX,dY,dZ
real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8, parameter :: ZEO=0.d0, ONE=1.d0, TWO=2.d0, F1o4=2.5d-1
real*8, parameter :: F8=8.d0, F16=1.6d1, F30=3.d1
real*8, parameter :: F1o12=ONE/1.2d1, F1o144=ONE/1.44d2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
SoA(1) = SYM1
SoA(2) = SYM2
SoA(3) = SYM3
call symmetry_bd(2,ex,f,fh,SoA)
Sdxdx = ONE /( dX * dX )
Sdydy = ONE /( dY * dY )
Sdzdz = ONE /( dZ * dZ )
Fdxdx = F1o12 /( dX * dX )
Fdydy = F1o12 /( dY * dY )
Fdzdz = F1o12 /( dZ * dZ )
Sdxdy = F1o4 /( dX * dY )
Sdxdz = F1o4 /( dX * dZ )
Sdydz = F1o4 /( dY * dZ )
Fdxdy = F1o144 /( dX * dY )
Fdxdz = F1o144 /( dX * dZ )
Fdydz = F1o144 /( dY * dZ )
fxx = ZEO
fyy = ZEO
fzz = ZEO
fxy = ZEO
fxz = ZEO
fyz = ZEO
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
#if 0
if(i+2 <= imax .and. i-2 >= imin)then
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
elseif(i+1 <= imax .and. i-1 >= imin)then
fxx(i,j,k) = Sdxdx*(fh(i-1,j,k)-TWO*fh(i,j,k)+fh(i+1,j,k))
endif
if(j+2 <= jmax .and. j-2 >= jmin)then
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
elseif(j+1 <= jmax .and. j-1 >= jmin)then
fyy(i,j,k) = Sdydy*(fh(i,j-1,k)-TWO*fh(i,j,k)+fh(i,j+1,k))
endif
if(k+2 <= kmax .and. k-2 >= kmin)then
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
elseif(k+1 <= kmax .and. k-1 >= kmin)then
fzz(i,j,k) = Sdzdz*(fh(i,j,k-1)-TWO*fh(i,j,k)+fh(i,j,k+1))
endif
if(i+2 <= imax .and. i-2 >= imin .and. j+2 <= jmax .and. j-2 >= jmin)then
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
elseif(i+1 <= imax .and. i-1 >= imin .and. j+1 <= jmax .and. j-1 >= jmin)then
fxy(i,j,k) = Sdxdy*(fh(i-1,j-1,k)-fh(i+1,j-1,k)-fh(i-1,j+1,k)+fh(i+1,j+1,k))
endif
if(i+2 <= imax .and. i-2 >= imin .and. k+2 <= kmax .and. k-2 >= kmin)then
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
elseif(i+1 <= imax .and. i-1 >= imin .and. k+1 <= kmax .and. k-1 >= kmin)then
fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1))
endif
if(j+2 <= jmax .and. j-2 >= jmin .and. k+2 <= kmax .and. k-2 >= kmin)then
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
elseif(j+1 <= jmax .and. j-1 >= jmin .and. k+1 <= kmax .and. k-1 >= kmin)then
fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1))
endif
#else
! for bam comparison
if(i+2 <= imax .and. i-2 >= imin .and. &
j+2 <= jmax .and. j-2 >= jmin .and. &
k+2 <= kmax .and. k-2 >= kmin) then
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
elseif(i+1 <= imax .and. i-1 >= imin .and. &
j+1 <= jmax .and. j-1 >= jmin .and. &
k+1 <= kmax .and. k-1 >= kmin) then
fxx(i,j,k) = Sdxdx*(fh(i-1,j,k)-TWO*fh(i,j,k)+fh(i+1,j,k))
fyy(i,j,k) = Sdydy*(fh(i,j-1,k)-TWO*fh(i,j,k)+fh(i,j+1,k))
fzz(i,j,k) = Sdzdz*(fh(i,j,k-1)-TWO*fh(i,j,k)+fh(i,j,k+1))
fxy(i,j,k) = Sdxdy*(fh(i-1,j-1,k)-fh(i+1,j-1,k)-fh(i-1,j+1,k)+fh(i+1,j+1,k))
fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1))
fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1))
endif
#endif
enddo
enddo
enddo
return
end subroutine fdderivs_fh
#elif (ghost_width == 4)
! sixth order code

View File

@@ -883,13 +883,17 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
enddo
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1
funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2)
enddo
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
enddo
@@ -1111,7 +1115,149 @@ end subroutine d2dump
!------------------------------------------------------------------------------
! Lagrangian polynomial interpolation
!------------------------------------------------------------------------------
#ifndef POLINT6_USE_BARYCENTRIC
#define POLINT6_USE_BARYCENTRIC 1
#endif
!DIR$ ATTRIBUTES FORCEINLINE :: polint6_neville
subroutine polint6_neville(xa, ya, x, y, dy)
implicit none
real*8, dimension(6), intent(in) :: xa, ya
real*8, intent(in) :: x
real*8, intent(out) :: y, dy
integer :: i, m, ns, n_m
real*8, dimension(6) :: c, d, ho
real*8 :: dif, dift, hp, h, den_val
c = ya
d = ya
ho = xa - x
ns = 1
dif = abs(x - xa(1))
do i = 2, 6
dift = abs(x - xa(i))
if (dift < dif) then
ns = i
dif = dift
end if
end do
y = ya(ns)
ns = ns - 1
do m = 1, 5
n_m = 6 - m
do i = 1, n_m
hp = ho(i)
h = ho(i+m)
den_val = hp - h
if (den_val == 0.0d0) then
write(*,*) 'failure in polint for point',x
write(*,*) 'with input points: ',xa
stop
end if
den_val = (c(i+1) - d(i)) / den_val
d(i) = h * den_val
c(i) = hp * den_val
end do
if (2 * ns < n_m) then
dy = c(ns + 1)
else
dy = d(ns)
ns = ns - 1
end if
y = y + dy
end do
return
end subroutine polint6_neville
!DIR$ ATTRIBUTES FORCEINLINE :: polint6_barycentric
subroutine polint6_barycentric(xa, ya, x, y, dy)
implicit none
real*8, dimension(6), intent(in) :: xa, ya
real*8, intent(in) :: x
real*8, intent(out) :: y, dy
integer :: i, j
logical :: is_uniform
real*8, dimension(6) :: lambda
real*8 :: dx, den_i, term, num, den, step, tol
real*8, parameter :: c_uniform(6) = (/ -1.d0, 5.d0, -10.d0, 10.d0, -5.d0, 1.d0 /)
do i = 1, 6
if (x == xa(i)) then
y = ya(i)
dy = 0.d0
return
end if
end do
step = xa(2) - xa(1)
is_uniform = (step /= 0.d0)
if (is_uniform) then
tol = 64.d0 * epsilon(1.d0) * max(1.d0, abs(step))
do i = 3, 6
if (abs((xa(i) - xa(i-1)) - step) > tol) then
is_uniform = .false.
exit
end if
end do
end if
if (is_uniform) then
num = 0.d0
den = 0.d0
do i = 1, 6
term = c_uniform(i) / (x - xa(i))
num = num + term * ya(i)
den = den + term
end do
y = num / den
dy = 0.d0
return
end if
do i = 1, 6
den_i = 1.d0
do j = 1, 6
if (j /= i) then
dx = xa(i) - xa(j)
if (dx == 0.0d0) then
write(*,*) 'failure in polint for point',x
write(*,*) 'with input points: ',xa
stop
end if
den_i = den_i * dx
end if
end do
lambda(i) = 1.d0 / den_i
end do
num = 0.d0
den = 0.d0
do i = 1, 6
term = lambda(i) / (x - xa(i))
num = num + term * ya(i)
den = den + term
end do
y = num / den
dy = 0.d0
return
end subroutine polint6_barycentric
!DIR$ ATTRIBUTES FORCEINLINE :: polint
subroutine polint(xa, ya, x, y, dy, ordn)
implicit none
@@ -1124,6 +1270,15 @@ end subroutine d2dump
real*8, dimension(ordn) :: c, d, ho
real*8 :: dif, dift, hp, h, den_val
if (ordn == 6) then
#if POLINT6_USE_BARYCENTRIC
call polint6_barycentric(xa, ya, x, y, dy)
#else
call polint6_neville(xa, ya, x, y, dy)
#endif
return
end if
c = ya
d = ya
ho = xa - x
@@ -1173,6 +1328,41 @@ end subroutine d2dump
return
end subroutine polint
!------------------------------------------------------------------------------
! Compute Lagrange interpolation basis weights for one target point.
!------------------------------------------------------------------------------
!DIR$ ATTRIBUTES FORCEINLINE :: polint_lagrange_weights
subroutine polint_lagrange_weights(xa, x, w, ordn)
implicit none
integer, intent(in) :: ordn
real*8, dimension(1:ordn), intent(in) :: xa
real*8, intent(in) :: x
real*8, dimension(1:ordn), intent(out) :: w
integer :: i, j
real*8 :: num, den, dx
do i = 1, ordn
num = 1.d0
den = 1.d0
do j = 1, ordn
if (j /= i) then
dx = xa(i) - xa(j)
if (dx == 0.0d0) then
write(*,*) 'failure in polint for point',x
write(*,*) 'with input points: ',xa
stop
end if
num = num * (x - xa(j))
den = den * dx
end if
end do
w(i) = num / den
end do
return
end subroutine polint_lagrange_weights
!------------------------------------------------------------------------------
!
! interpolation in 2 dimensions, follow yx order
!
@@ -1243,19 +1433,26 @@ end subroutine d2dump
end do
call polint(x1a,ymtmp,x1,y,dy,ordn)
#else
integer :: j, k
real*8, dimension(ordn,ordn) :: yatmp
integer :: i, j, k
real*8, dimension(ordn) :: w1, w2
real*8, dimension(ordn) :: ymtmp
real*8 :: dy_temp
real*8 :: yx_sum, x_sum
do k=1,ordn
do j=1,ordn
call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn)
call polint_lagrange_weights(x1a, x1, w1, ordn)
call polint_lagrange_weights(x2a, x2, w2, ordn)
do k = 1, ordn
yx_sum = 0.d0
do j = 1, ordn
x_sum = 0.d0
do i = 1, ordn
x_sum = x_sum + w1(i) * ya(i,j,k)
end do
yx_sum = yx_sum + w2(j) * x_sum
end do
do k=1,ordn
call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn)
ymtmp(k) = yx_sum
end do
call polint(x3a, ymtmp, x3, y, dy, ordn)
#endif
@@ -1305,18 +1502,89 @@ if(dabs(X(1)-xmin) < dX) imin = 1
if(dabs(Y(1)-ymin) < dY) jmin = 1
if(dabs(Z(1)-zmin) < dZ) kmin = 1
! Optimized with oneMKL BLAS DDOT for dot product
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(n_elements))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
deallocate(f_flat)
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(n_elements))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
deallocate(f_flat)
f_out = f_out*dX*dY*dZ
return
end subroutine l2normhelper
!--------------------------------------------------------------------------------------
subroutine l2normhelper7(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
f1,f2,f3,f4,f5,f6,f7,f_out,gw)
implicit none
!~~~~~~> Input parameters:
integer,intent(in ):: ex(1:3)
real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3)),xmin,ymin,zmin,xmax,ymax,zmax
integer,intent(in)::gw
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: f1,f2,f3,f4,f5,f6,f7
real*8, intent(out) :: f_out(7)
!~~~~~~> Other variables:
real*8 :: dX, dY, dZ
integer::imin,jmin,kmin
integer::imax,jmax,kmax
integer::i,j,k
real*8 :: s1,s2,s3,s4,s5,s6,s7
dX = X(2) - X(1)
dY = Y(2) - Y(1)
dZ = Z(2) - Z(1)
imin = gw+1
jmin = gw+1
kmin = gw+1
imax = ex(1) - gw
jmax = ex(2) - gw
kmax = ex(3) - gw
if(dabs(X(ex(1))-xmax) < dX) imax = ex(1)
if(dabs(Y(ex(2))-ymax) < dY) jmax = ex(2)
if(dabs(Z(ex(3))-zmax) < dZ) kmax = ex(3)
if(dabs(X(1)-xmin) < dX) imin = 1
if(dabs(Y(1)-ymin) < dY) jmin = 1
if(dabs(Z(1)-zmin) < dZ) kmin = 1
s1 = 0.d0
s2 = 0.d0
s3 = 0.d0
s4 = 0.d0
s5 = 0.d0
s6 = 0.d0
s7 = 0.d0
do k=kmin,kmax
do j=jmin,jmax
!DIR$ SIMD REDUCTION(+:s1,s2,s3,s4,s5,s6,s7)
do i=imin,imax
s1 = s1 + f1(i,j,k)*f1(i,j,k)
s2 = s2 + f2(i,j,k)*f2(i,j,k)
s3 = s3 + f3(i,j,k)*f3(i,j,k)
s4 = s4 + f4(i,j,k)*f4(i,j,k)
s5 = s5 + f5(i,j,k)*f5(i,j,k)
s6 = s6 + f6(i,j,k)*f6(i,j,k)
s7 = s7 + f7(i,j,k)*f7(i,j,k)
enddo
enddo
enddo
f_out(1) = s1*dX*dY*dZ
f_out(2) = s2*dX*dY*dZ
f_out(3) = s3*dX*dY*dZ
f_out(4) = s4*dX*dY*dZ
f_out(5) = s5*dX*dY*dZ
f_out(6) = s6*dX*dY*dZ
f_out(7) = s7*dX*dY*dZ
return
end subroutine l2normhelper7
!--------------------------------------------------------------------------------------
! calculate L2norm especially for shell Blocks
subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
@@ -1400,12 +1668,11 @@ if(Symmetry==2)then
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
endif
! Optimized with oneMKL BLAS DDOT for dot product
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(n_elements))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
deallocate(f_flat)
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(n_elements))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
deallocate(f_flat)
f_out = f_out*dX*dY*dZ
@@ -1497,12 +1764,11 @@ if(Symmetry==2)then
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
endif
! Optimized with oneMKL BLAS DDOT for dot product
Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(Nout))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [Nout])
f_out = DDOT(Nout, f_flat, 1, f_flat, 1)
deallocate(f_flat)
allocate(f_flat(Nout))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [Nout])
f_out = DDOT(Nout, f_flat, 1, f_flat, 1)
deallocate(f_flat)
return
@@ -1604,8 +1870,11 @@ deallocate(f_flat)
! f=3/8*f_1 + 3/4*f_2 - 1/8*f_3
real*8,parameter::C1=3.d0/8.d0,C2=3.d0/4.d0,C3=-1.d0/8.d0
integer :: i,j,k
fout = C1*f1+C2*f2+C3*f3
do concurrent (k=1:ext(3), j=1:ext(2), i=1:ext(1))
fout(i,j,k) = C1*f1(i,j,k)+C2*f2(i,j,k)+C3*f3(i,j,k)
end do
return
@@ -1736,20 +2005,16 @@ deallocate(f_flat)
ya=fh(cxB(1):cxT(1),cxB(2):cxT(2),cxB(3):cxT(3))
endif
! Optimized with BLAS operations for better performance
! First dimension: z-direction weighted sum
tmp2=0
do m=1,ORDN
tmp2 = tmp2 + coef(2*ORDN+m)*ya(:,:,m)
enddo
! Second dimension: y-direction weighted sum
tmp1=0
do m=1,ORDN
tmp1 = tmp1 + coef(ORDN+m)*tmp2(:,m)
enddo
! Third dimension: x-direction weighted sum using BLAS DDOT
f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1)
return
@@ -1810,13 +2075,11 @@ deallocate(f_flat)
ya=fh(cxB(1):cxT(1),cxB(2):cxT(2),inds(3))
endif
! Optimized with BLAS operations
tmp1=0
do m=1,ORDN
tmp1 = tmp1 + coef(ORDN+m)*ya(:,m)
enddo
! Use BLAS DDOT for final weighted sum
f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1)
return
@@ -1909,7 +2172,6 @@ deallocate(f_flat)
write(*,*)"error in global_interpind1d, not recognized dumyd = ",dumyd
endif
! Optimized with BLAS DDOT for weighted sum
f_int = DDOT(ORDN, coef, 1, ya, 1)
return
@@ -2142,16 +2404,13 @@ deallocate(f_flat)
end function fWigner_d_function
!----------------------------------
! Optimized factorial function using lookup table for small N
! and log-gamma for large N to avoid overflow
function ffact(N) result(gont)
implicit none
integer,intent(in) :: N
real*8 :: gont
integer :: i
! Lookup table for factorials 0! to 20! (precomputed)
integer :: i
real*8, parameter, dimension(0:20) :: fact_table = [ &
1.d0, 1.d0, 2.d0, 6.d0, 24.d0, 120.d0, 720.d0, 5040.d0, 40320.d0, &
362880.d0, 3628800.d0, 39916800.d0, 479001600.d0, 6227020800.d0, &
@@ -2166,12 +2425,9 @@ deallocate(f_flat)
return
endif
! Use lookup table for small N (fast path)
if(N <= 20)then
gont = fact_table(N)
else
! Use log-gamma function for large N: N! = exp(log_gamma(N+1))
! This avoids overflow and is computed efficiently
gont = exp(log_gamma(dble(N+1)))
endif

View File

@@ -13,6 +13,7 @@
#define f_global_interpind2d global_interpind2d
#define f_global_interpind1d global_interpind1d
#define f_l2normhelper l2normhelper
#define f_l2normhelper7 l2normhelper7
#define f_l2normhelper_sh l2normhelper_sh
#define f_l2normhelper_sh_rms l2normhelper_sh_rms
#define f_average average
@@ -42,6 +43,7 @@
#define f_global_interpind2d GLOBAL_INTERPIND2D
#define f_global_interpind1d GLOBAL_INTERPIND1D
#define f_l2normhelper L2NORMHELPER
#define f_l2normhelper7 L2NORMHELPER7
#define f_l2normhelper_sh L2NORMHELPER_SH
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS
#define f_average AVERAGE
@@ -71,6 +73,7 @@
#define f_global_interpind2d global_interpind2d_
#define f_global_interpind1d global_interpind1d_
#define f_l2normhelper l2normhelper_
#define f_l2normhelper7 l2normhelper7_
#define f_l2normhelper_sh l2normhelper_sh_
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_
#define f_average average_
@@ -164,6 +167,15 @@ extern "C"
double *, double &, int &);
}
extern "C"
{
void f_l2normhelper7(int *, double *, double *, double *,
double &, double &, double &,
double &, double &, double &,
double *, double *, double *, double *,
double *, double *, double *, double *, int &);
}
extern "C"
{
void f_l2normhelper_sh(int *, double *, double *, double *,

View File

@@ -16,66 +16,115 @@ using namespace std;
#include <string.h>
#include <math.h>
#endif
// Intel oneMKL LAPACK interface
#include <mkl_lapacke.h>
/* Linear equation solution using Intel oneMKL LAPACK.
/* Linear equation solution by Gauss-Jordan elimination.
a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input
containing the right-hand side vectors. On output a is
replaced by its matrix inverse, and b is replaced by the
corresponding set of solution vectors.
Mathematical equivalence:
Solves: A * x = b => x = A^(-1) * b
Original Gauss-Jordan and LAPACK dgesv/dgetri produce identical results
within numerical precision. */
corresponding set of solution vectors */
int gaussj(double *a, double *b, int n)
{
// Allocate pivot array and workspace
lapack_int *ipiv = new lapack_int[n];
lapack_int info;
double swap;
// Make a copy of matrix a for solving (dgesv modifies it to LU form)
double *a_copy = new double[n * n];
for (int i = 0; i < n * n; i++) {
a_copy[i] = a[i];
int *indxc, *indxr, *ipiv;
indxc = new int[n];
indxr = new int[n];
ipiv = new int[n];
int i, icol, irow, j, k, l, ll;
double big, dum, pivinv, temp;
for (j = 0; j < n; j++)
ipiv[j] = 0;
for (i = 0; i < n; i++)
{
big = 0.0;
for (j = 0; j < n; j++)
if (ipiv[j] != 1)
for (k = 0; k < n; k++)
{
if (ipiv[k] == 0)
{
if (fabs(a[j * n + k]) >= big)
{
big = fabs(a[j * n + k]);
irow = j;
icol = k;
}
}
else if (ipiv[k] > 1)
{
cout << "gaussj: Singular Matrix-1" << endl;
for (int ii = 0; ii < n; ii++)
{
for (int jj = 0; jj < n; jj++)
cout << a[ii * n + jj] << " ";
cout << endl;
}
return 1; // error return
}
}
// Step 1: Solve linear system A*x = b using LU decomposition
// LAPACKE_dgesv uses column-major by default, but we use row-major
info = LAPACKE_dgesv(LAPACK_ROW_MAJOR, n, 1, a_copy, n, ipiv, b, 1);
if (info != 0) {
cout << "gaussj: Singular Matrix (dgesv info=" << info << ")" << endl;
delete[] ipiv;
delete[] a_copy;
return 1;
ipiv[icol] = ipiv[icol] + 1;
if (irow != icol)
{
for (l = 0; l < n; l++)
{
swap = a[irow * n + l];
a[irow * n + l] = a[icol * n + l];
a[icol * n + l] = swap;
}
// Step 2: Compute matrix inverse A^(-1) using LU factorization
// First do LU factorization of original matrix a
info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, n, n, a, n, ipiv);
if (info != 0) {
cout << "gaussj: Singular Matrix (dgetrf info=" << info << ")" << endl;
delete[] ipiv;
delete[] a_copy;
return 1;
swap = b[irow];
b[irow] = b[icol];
b[icol] = swap;
}
// Then compute inverse from LU factorization
info = LAPACKE_dgetri(LAPACK_ROW_MAJOR, n, a, n, ipiv);
indxr[i] = irow;
indxc[i] = icol;
if (info != 0) {
cout << "gaussj: Singular Matrix (dgetri info=" << info << ")" << endl;
delete[] ipiv;
delete[] a_copy;
return 1;
if (a[icol * n + icol] == 0.0)
{
cout << "gaussj: Singular Matrix-2" << endl;
for (int ii = 0; ii < n; ii++)
{
for (int jj = 0; jj < n; jj++)
cout << a[ii * n + jj] << " ";
cout << endl;
}
return 1; // error return
}
pivinv = 1.0 / a[icol * n + icol];
a[icol * n + icol] = 1.0;
for (l = 0; l < n; l++)
a[icol * n + l] *= pivinv;
b[icol] *= pivinv;
for (ll = 0; ll < n; ll++)
if (ll != icol)
{
dum = a[ll * n + icol];
a[ll * n + icol] = 0.0;
for (l = 0; l < n; l++)
a[ll * n + l] -= a[icol * n + l] * dum;
b[ll] -= b[icol] * dum;
}
}
for (l = n - 1; l >= 0; l--)
{
if (indxr[l] != indxc[l])
for (k = 0; k < n; k++)
{
swap = a[k * n + indxr[l]];
a[k * n + indxr[l]] = a[k * n + indxc[l]];
a[k * n + indxc[l]] = swap;
}
}
delete[] indxc;
delete[] indxr;
delete[] ipiv;
delete[] a_copy;
return 0;
}

View File

@@ -512,10 +512,11 @@
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION V(N),W(N)
! SUBROUTINE TO COMPUTE DOUBLE PRECISION VECTOR DOT PRODUCT.
! Optimized using Intel oneMKL BLAS ddot
! Mathematical equivalence: DGVV = sum_{i=1}^{N} V(i)*W(i)
DOUBLE PRECISION, EXTERNAL :: DDOT
DGVV = DDOT(N, V, 1, W, 1)
SUM = 0.0D0
DO 10 I = 1,N
SUM = SUM + V(I)*W(I)
10 CONTINUE
DGVV = SUM
RETURN
END

View File

@@ -215,99 +215,6 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
end subroutine kodis
!-----------------------------------------------------------------------------
! kodis variant: reuses caller-provided fh work array (memory pool)
!-----------------------------------------------------------------------------
subroutine kodis_fh(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps,fh)
implicit none
! argument variables
integer,intent(in) :: Symmetry
integer,dimension(3),intent(in)::ex
real*8, dimension(1:3), intent(in) :: SoA
double precision,intent(in),dimension(ex(1))::X
double precision,intent(in),dimension(ex(2))::Y
double precision,intent(in),dimension(ex(3))::Z
double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
real*8,intent(in) :: eps
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)),intent(inout):: fh
! local variables
integer :: imin,jmin,kmin,imax,jmax,kmax
integer :: i,j,k
real*8 :: dX,dY,dZ
real*8, parameter :: ONE=1.d0,SIX=6.d0,FIT=1.5d1,TWT=2.d1
real*8,parameter::cof=6.4d1 ! 2^6
integer, parameter :: NO_SYMM=0, OCTANT=2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin = -2
if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin = -2
call symmetry_bd(3,ex,f,fh,SoA)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i-3 >= imin .and. i+3 <= imax .and. &
j-3 >= jmin .and. j+3 <= jmax .and. &
k-3 >= kmin .and. k+3 <= kmax) then
#if 0
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dX/cof * ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - &
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
TWT* fh(i,j,k) )
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dY/cof * ( &
(fh(i,j-3,k)+fh(i,j+3,k)) - &
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
TWT* fh(i,j,k) )
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dZ/cof * ( &
(fh(i,j,k-3)+fh(i,j,k+3)) - &
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )
#else
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - &
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
TWT* fh(i,j,k) )/dX + &
( &
(fh(i,j-3,k)+fh(i,j+3,k)) - &
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
TWT* fh(i,j,k) )/dY + &
( &
(fh(i,j,k-3)+fh(i,j,k+3)) - &
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )/dZ )
#endif
endif
enddo
enddo
enddo
return
end subroutine kodis_fh
#elif (ghost_width == 4)
! sixth order code
!------------------------------------------------------------------------------------------------------------------------------

View File

@@ -487,160 +487,6 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
end subroutine lopsided
!-----------------------------------------------------------------------------
! lopsided variant: reuses caller-provided fh work array (memory pool)
!-----------------------------------------------------------------------------
subroutine lopsided_fh(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,fh)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)),intent(inout):: fh
!~~~~~~> local variables:
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F3=3.d0
real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1
real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -2
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -2
call symmetry_bd(3,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
#if 0
!! old code - same as original lopsided
#else
!! new code, 2012dec27, based on bam
! x direction
if(Sfx(i,j,k) > ZEO)then
if(i+3 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
elseif(i+2 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
endif
elseif(Sfx(i,j,k) < ZEO)then
if(i-3 >= imin)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
elseif(i-2 >= imin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i-1 >= imin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
endif
endif
! y direction
if(Sfy(i,j,k) > ZEO)then
if(j+3 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
elseif(j+2 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
endif
elseif(Sfy(i,j,k) < ZEO)then
if(j-3 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
elseif(j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
endif
endif
! z direction
if(Sfz(i,j,k) > ZEO)then
if(k+3 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
elseif(k+2 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
endif
elseif(Sfz(i,j,k) < ZEO)then
if(k-3 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
elseif(k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
endif
endif
#endif
enddo
enddo
enddo
return
end subroutine lopsided_fh
#elif (ghost_width == 4)
! sixth order code
! Compute advection terms in right hand sides of field equations

View File

@@ -2,6 +2,20 @@
include makefile.inc
## polint(ordn=6) kernel selector:
## 1 (default): barycentric fast path
## 0 : fallback to Neville path
POLINT6_USE_BARY ?= 1
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
ARCH_OPT = -march=x86-64-v4
CXXAPPFLAGS = -O3 $(ARCH_OPT) -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 $(ARCH_OPT) -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
TP_OPTFLAGS = -O3 $(ARCH_OPT) -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include
.SUFFIXES: .o .f90 .C .for .cu
.f90.o:
@@ -16,6 +30,12 @@ include makefile.inc
.cu.o:
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
TwoPunctures.o: TwoPunctures.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
TwoPunctureABE.o: TwoPunctureABE.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
# Input files
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
@@ -96,7 +116,7 @@ ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
TwoPunctureABE: $(TwoPunctureFILES)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(TwoPunctureFILES) $(LDLIBS)
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
clean:
rm *.o ABE ABEGPU TwoPunctureABE make.log -f

View File

@@ -1,25 +1,27 @@
## GCC version (commented out)
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
## Intel oneAPI version with oneMKL (Optimized for performance)
## Intel oneAPI version with oneMKL
filein = -I/usr/include/ -I${MKLROOT}/include
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
## Use sequential oneMKL to avoid introducing extra OpenMP behavior into ABE.
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
## Optional Intel oneTBB allocator, kept aligned with main's build environment.
USE_TBBMALLOC ?= 1
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
ifneq ($(wildcard $(TBBMALLOC_SO)),)
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
else
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
endif
ifeq ($(USE_TBBMALLOC),1)
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
endif
## Aggressive optimization flags:
## -O3: Maximum optimization
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
## -fp-model fast=2: Aggressive floating-point optimizations
## -fma: Enable fused multiply-add instructions
## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include
f90 = ifx
f77 = ifx
CXX = icpx

View File

@@ -11,18 +11,6 @@
import AMSS_NCKU_Input as input_data
import subprocess
## CPU core binding configuration using taskset
## taskset ensures all child processes inherit the CPU affinity mask
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
NUMACTL_CPU_BIND = "taskset -c 16-47,64-95"
#NUMACTL_CPU_BIND = "taskset -c 0-111"
## Build parallelism configuration
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
## Set make -j to utilize available cores for faster builds
BUILD_JOBS = 64
##################################################################
@@ -38,11 +26,11 @@ def makefile_ABE():
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
print( )
## Build command with CPU binding to nohz_full cores
## Build command
if (input_data.GPU_Calculation == "no"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
makefile_command = "make -j96" + " ABE"
elif (input_data.GPU_Calculation == "yes"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
makefile_command = "make -j4" + " ABEGPU"
else:
print( " CPU/GPU numerical calculation setting is wrong " )
print( )
@@ -79,8 +67,8 @@ def makefile_TwoPunctureABE():
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
print( )
## Build command with CPU binding to nohz_full cores
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
## Build command
makefile_command = "make" + " TwoPunctureABE"
## Execute the command with subprocess.Popen and stream output
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
@@ -117,10 +105,10 @@ def run_ABE():
## Define the command to run; cast other values to strings as needed
if (input_data.GPU_Calculation == "no"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command = "mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command = "mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output
@@ -159,7 +147,7 @@ def run_TwoPunctureABE():
print( )
## Define the command to run
TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
TwoPuncture_command = "./TwoPunctureABE"
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
## Execute the command with subprocess.Popen and stream output

12
parallel_plot_helper.py Normal file
View File

@@ -0,0 +1,12 @@
import multiprocessing
def run_plot_task(task):
func, args = task
return func(*args)
def run_plot_tasks_parallel(plot_tasks):
ctx = multiprocessing.get_context('fork')
with ctx.Pool() as pool:
pool.map(run_plot_task, plot_tasks)

View File

@@ -11,6 +11,8 @@
import numpy ## numpy for array operations
import scipy ## scipy for interpolation and signal processing
import math
import matplotlib
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import matplotlib.pyplot as plt ## matplotlib for plotting
import os ## os for system/file operations

View File

@@ -8,16 +8,21 @@
##
#################################################
## Restrict OpenMP to one thread per process so that parallel
## subprocess plotting does not multiply BLAS thread counts.
import os
os.environ.setdefault("OMP_NUM_THREADS", "1")
import numpy
import scipy
import matplotlib
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from mpl_toolkits.mplot3d import Axes3D
## import torch
import AMSS_NCKU_Input as input_data
import os
#########################################################################################
@@ -192,3 +197,11 @@ def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ):
####################################################################################
## Allow standalone subprocess execution for parallel binary-data plotting.
if __name__ == '__main__':
import sys
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <filename> <binary_outdir> <figure_outdir>")
sys.exit(1)
plot_binary_data(sys.argv[1], sys.argv[2], sys.argv[3])

View File

@@ -8,6 +8,8 @@
#################################################
import numpy ## numpy for array operations
import matplotlib
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import matplotlib.pyplot as plt ## matplotlib for plotting
from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots
import glob
@@ -15,6 +17,9 @@ import os ## operating system utilities
import plot_binary_data
import AMSS_NCKU_Input as input_data
import subprocess
import sys
import multiprocessing
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
@@ -50,10 +55,34 @@ def generate_binary_data_plot( binary_outdir, figure_outdir ):
file_list.append(x)
print(x)
## Plot each file in the list
## Plot each file in parallel using subprocesses.
## Each subprocess starts with BLAS thread limits in plot_binary_data.py.
script = os.path.join( os.path.dirname(__file__), "plot_binary_data.py" )
max_workers = min( multiprocessing.cpu_count(), len(file_list) ) if file_list else 0
running = []
failed = []
for filename in file_list:
print(filename)
plot_binary_data.plot_binary_data(filename, binary_outdir, figure_outdir)
proc = subprocess.Popen(
[sys.executable, script, filename, binary_outdir, figure_outdir],
)
running.append( (proc, filename) )
if len(running) >= max_workers:
p, fn = running.pop(0)
p.wait()
if p.returncode != 0:
failed.append(fn)
for p, fn in running:
p.wait()
if p.returncode != 0:
failed.append(fn)
if failed:
print( " WARNING: the following binary data plots failed:" )
for fn in failed:
print( " ", fn )
print( )
print( " Binary Data Plot Has been Finished " )