Compare commits

..

1 Commits

Author SHA1 Message Date
082f9c3423 feat: Implement hybrid MPI+OpenMP parallelization
- Enable -qopenmp in makefile.inc
- Add OpenMP directives to 4th order derivatives in diff_new.f90
- Update makefile_and_run.py to dynamic calculate OMP_NUM_THREADS based on 96 cores and remove hardcoded CPU binding
2026-02-06 13:25:07 +08:00
13 changed files with 1686 additions and 1936 deletions

View File

@@ -1,447 +0,0 @@
##################################################################
##
## AMSS-NCKU ABE Test Program (Skip TwoPuncture if data exists)
## Modified from AMSS_NCKU_Program.py
## Author: Xiaoqu
## Modified: 2026/02/01
##
##################################################################
##################################################################
## Print program introduction
import print_information
print_information.print_program_introduction()
##################################################################
import AMSS_NCKU_Input as input_data
##################################################################
## Create directories to store program run data
import os
import shutil
import sys
import time
## Set the output directory according to the input file
File_directory = os.path.join(input_data.File_directory)
## Check if output directory exists and if TwoPuncture data is available
#skip_twopuncture = False
skip_twopuncture = True
output_directory = os.path.join(File_directory, "AMSS_NCKU_output")
binary_results_directory = os.path.join(output_directory, input_data.Output_directory)
if os.path.exists(File_directory):
print( " Output directory already exists." )
print()
'''
# Check if TwoPuncture initial data files exist
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture"):
twopuncture_output = os.path.join(output_directory, "TwoPunctureABE")
input_par = os.path.join(output_directory, "input.par")
if os.path.exists(twopuncture_output) and os.path.exists(input_par):
print( " Found existing TwoPuncture initial data." )
print( " Do you want to skip TwoPuncture phase and reuse existing data?" )
print( " Input 'skip' to skip TwoPuncture and start ABE directly" )
print( " Input 'regenerate' to regenerate everything from scratch" )
print()
while True:
try:
inputvalue = input()
if ( inputvalue == "skip" ):
print( " Skipping TwoPuncture phase, will reuse existing initial data." )
print()
skip_twopuncture = True
break
elif ( inputvalue == "regenerate" ):
print( " Regenerating everything from scratch." )
print()
skip_twopuncture = False
break
else:
print( " Please input 'skip' or 'regenerate'." )
except ValueError:
print( " Please input 'skip' or 'regenerate'." )
else:
print( " TwoPuncture initial data not found, will regenerate everything." )
print()
'''
# If not skipping, remove and recreate directory
if not skip_twopuncture:
shutil.rmtree(File_directory, ignore_errors=True)
os.mkdir(File_directory)
os.mkdir(output_directory)
os.mkdir(binary_results_directory)
figure_directory = os.path.join(File_directory, "figure")
os.mkdir(figure_directory)
shutil.copy("AMSS_NCKU_Input.py", File_directory)
print( " Output directory has been regenerated." )
print()
else:
# Create fresh directory structure
os.mkdir(File_directory)
shutil.copy("AMSS_NCKU_Input.py", File_directory)
os.mkdir(output_directory)
os.mkdir(binary_results_directory)
figure_directory = os.path.join(File_directory, "figure")
os.mkdir(figure_directory)
print( " Output directory has been generated." )
print()
# Ensure figure directory exists
figure_directory = os.path.join(File_directory, "figure")
if not os.path.exists(figure_directory):
os.mkdir(figure_directory)
##################################################################
## Output related parameter information
import setup
## Print and save input parameter information
setup.print_input_data( File_directory )
if not skip_twopuncture:
setup.generate_AMSSNCKU_input()
setup.print_puncture_information()
##################################################################
## Generate AMSS-NCKU program input files based on the configured parameters
if not skip_twopuncture:
print()
print( " Generating the AMSS-NCKU input parfile for the ABE executable." )
print()
## Generate cgh-related input files from the grid information
import numerical_grid
numerical_grid.append_AMSSNCKU_cgh_input()
print()
print( " The input parfile for AMSS-NCKU C++ executable file ABE has been generated." )
print( " However, the input relevant to TwoPuncture need to be appended later." )
print()
##################################################################
## Plot the initial grid configuration
if not skip_twopuncture:
print()
print( " Schematically plot the numerical grid structure." )
print()
import numerical_grid
numerical_grid.plot_initial_grid()
##################################################################
## Generate AMSS-NCKU macro files according to the numerical scheme and parameters
if not skip_twopuncture:
print()
print( " Automatically generating the macro file for AMSS-NCKU C++ executable file ABE " )
print( " (Based on the finite-difference numerical scheme) " )
print()
import generate_macrodef
generate_macrodef.generate_macrodef_h()
print( " AMSS-NCKU macro file macrodef.h has been generated. " )
generate_macrodef.generate_macrodef_fh()
print( " AMSS-NCKU macro file macrodef.fh has been generated. " )
##################################################################
# Compile the AMSS-NCKU program according to user requirements
# NOTE: ABE compilation is always performed, even when skipping TwoPuncture
print()
print( " Preparing to compile and run the AMSS-NCKU code as requested " )
print( " Compiling the AMSS-NCKU code based on the generated macro files " )
print()
AMSS_NCKU_source_path = "AMSS_NCKU_source"
AMSS_NCKU_source_copy = os.path.join(File_directory, "AMSS_NCKU_source_copy")
## If AMSS_NCKU source folder is missing, create it and prompt the user
if not os.path.exists(AMSS_NCKU_source_path):
os.makedirs(AMSS_NCKU_source_path)
print( " The AMSS-NCKU source files are incomplete; copy all source files into ./AMSS_NCKU_source. " )
print( " Press Enter to continue. " )
inputvalue = input()
# Copy AMSS-NCKU source files to prepare for compilation
# If skipping TwoPuncture and source_copy already exists, remove it first
if skip_twopuncture and os.path.exists(AMSS_NCKU_source_copy):
shutil.rmtree(AMSS_NCKU_source_copy)
shutil.copytree(AMSS_NCKU_source_path, AMSS_NCKU_source_copy)
# Copy the generated macro files into the AMSS_NCKU source folder
if not skip_twopuncture:
macrodef_h_path = os.path.join(File_directory, "macrodef.h")
macrodef_fh_path = os.path.join(File_directory, "macrodef.fh")
else:
# When skipping TwoPuncture, use existing macro files from previous run
macrodef_h_path = os.path.join(File_directory, "macrodef.h")
macrodef_fh_path = os.path.join(File_directory, "macrodef.fh")
shutil.copy2(macrodef_h_path, AMSS_NCKU_source_copy)
shutil.copy2(macrodef_fh_path, AMSS_NCKU_source_copy)
# Compile related programs
import makefile_and_run
## Change working directory to the target source copy
os.chdir(AMSS_NCKU_source_copy)
## Build the main AMSS-NCKU executable (ABE or ABEGPU)
makefile_and_run.makefile_ABE()
## If the initial-data method is Ansorg-TwoPuncture, build the TwoPunctureABE executable
## Only build TwoPunctureABE if not skipping TwoPuncture phase
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture:
makefile_and_run.makefile_TwoPunctureABE()
## Change current working directory back up two levels
os.chdir('..')
os.chdir('..')
print()
##################################################################
## Copy the AMSS-NCKU executable (ABE/ABEGPU) to the run directory
if (input_data.GPU_Calculation == "no"):
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE")
elif (input_data.GPU_Calculation == "yes"):
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABEGPU")
if not os.path.exists( ABE_file ):
print()
print( " Lack of AMSS-NCKU executable file ABE/ABEGPU; recompile AMSS_NCKU_source manually. " )
print( " When recompilation is finished, press Enter to continue. " )
inputvalue = input()
## Copy the executable ABE (or ABEGPU) into the run directory
shutil.copy2(ABE_file, output_directory)
## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
## Only copy TwoPunctureABE if not skipping TwoPuncture phase
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture:
TwoPuncture_file = os.path.join(AMSS_NCKU_source_copy, "TwoPunctureABE")
if not os.path.exists( TwoPuncture_file ):
print()
print( " Lack of AMSS-NCKU executable file TwoPunctureABE; recompile TwoPunctureABE in AMSS_NCKU_source. " )
print( " When recompilation is finished, press Enter to continue. " )
inputvalue = input()
## Copy the TwoPunctureABE executable into the run directory
shutil.copy2(TwoPuncture_file, output_directory)
##################################################################
## If the initial-data method is TwoPuncture, generate the TwoPuncture input files
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture:
print()
print( " Initial data is chosen as Ansorg-TwoPuncture" )
print()
print()
print( " Automatically generating the input parfile for the TwoPunctureABE executable " )
print()
import generate_TwoPuncture_input
generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input()
print()
print( " The input parfile for the TwoPunctureABE executable has been generated. " )
print()
## Generated AMSS-NCKU TwoPuncture input filename
AMSS_NCKU_TwoPuncture_inputfile = 'AMSS-NCKU-TwoPuncture.input'
AMSS_NCKU_TwoPuncture_inputfile_path = os.path.join( File_directory, AMSS_NCKU_TwoPuncture_inputfile )
## Copy and rename the file
shutil.copy2( AMSS_NCKU_TwoPuncture_inputfile_path, os.path.join(output_directory, 'TwoPunctureinput.par') )
## Run TwoPuncture to generate initial-data files
start_time = time.time() # Record start time
print()
print()
## Change to the output (run) directory
os.chdir(output_directory)
## Run the TwoPuncture executable
import makefile_and_run
makefile_and_run.run_TwoPunctureABE()
## Change current working directory back up two levels
os.chdir('..')
os.chdir('..')
elif (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and skip_twopuncture:
print()
print( " Skipping TwoPuncture execution, using existing initial data." )
print()
start_time = time.time() # Record start time for ABE only
else:
start_time = time.time() # Record start time
##################################################################
## Update puncture data based on TwoPuncture run results
if not skip_twopuncture:
import renew_puncture_parameter
renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory)
## Generated AMSS-NCKU input filename
AMSS_NCKU_inputfile = 'AMSS-NCKU.input'
AMSS_NCKU_inputfile_path = os.path.join(File_directory, AMSS_NCKU_inputfile)
## Copy and rename the file
shutil.copy2( AMSS_NCKU_inputfile_path, os.path.join(output_directory, 'input.par') )
print()
print( " Successfully copy all AMSS-NCKU input parfile to target dictionary. " )
print()
else:
print()
print( " Using existing input.par file from previous run." )
print()
##################################################################
## Launch the AMSS-NCKU program
print()
print()
## Change to the run directory
os.chdir( output_directory )
import makefile_and_run
makefile_and_run.run_ABE()
## Change current working directory back up two levels
os.chdir('..')
os.chdir('..')
end_time = time.time()
elapsed_time = end_time - start_time
##################################################################
## Copy some basic input and log files out to facilitate debugging
## Path to the file that stores calculation settings
AMSS_NCKU_error_file_path = os.path.join(binary_results_directory, "setting.par")
## Copy and rename the file for easier inspection
shutil.copy( AMSS_NCKU_error_file_path, os.path.join(output_directory, "AMSSNCKU_setting_parameter") )
## Path to the error log file
AMSS_NCKU_error_file_path = os.path.join(binary_results_directory, "Error.log")
## Copy and rename the error log
shutil.copy( AMSS_NCKU_error_file_path, os.path.join(output_directory, "Error.log") )
## Primary program outputs
AMSS_NCKU_BH_data = os.path.join(binary_results_directory, "bssn_BH.dat" )
AMSS_NCKU_ADM_data = os.path.join(binary_results_directory, "bssn_ADMQs.dat" )
AMSS_NCKU_psi4_data = os.path.join(binary_results_directory, "bssn_psi4.dat" )
AMSS_NCKU_constraint_data = os.path.join(binary_results_directory, "bssn_constraint.dat")
## copy and rename the file
shutil.copy( AMSS_NCKU_BH_data, os.path.join(output_directory, "bssn_BH.dat" ) )
shutil.copy( AMSS_NCKU_ADM_data, os.path.join(output_directory, "bssn_ADMQs.dat" ) )
shutil.copy( AMSS_NCKU_psi4_data, os.path.join(output_directory, "bssn_psi4.dat" ) )
shutil.copy( AMSS_NCKU_constraint_data, os.path.join(output_directory, "bssn_constraint.dat") )
## Additional program outputs
if (input_data.Equation_Class == "BSSN-EM"):
AMSS_NCKU_phi1_data = os.path.join(binary_results_directory, "bssn_phi1.dat" )
AMSS_NCKU_phi2_data = os.path.join(binary_results_directory, "bssn_phi2.dat" )
shutil.copy( AMSS_NCKU_phi1_data, os.path.join(output_directory, "bssn_phi1.dat" ) )
shutil.copy( AMSS_NCKU_phi2_data, os.path.join(output_directory, "bssn_phi2.dat" ) )
elif (input_data.Equation_Class == "BSSN-EScalar"):
AMSS_NCKU_maxs_data = os.path.join(binary_results_directory, "bssn_maxs.dat" )
shutil.copy( AMSS_NCKU_maxs_data, os.path.join(output_directory, "bssn_maxs.dat" ) )
##################################################################
## Plot the AMSS-NCKU program results
print()
print( " Plotting the txt and binary results data from the AMSS-NCKU simulation " )
print()
import plot_xiaoqu
import plot_GW_strain_amplitude_xiaoqu
## Plot black hole trajectory
plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory )
plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
## Plot black hole separation vs. time
plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
## Plot gravitational waveforms (psi4 and strain amplitude)
for i in range(input_data.Detector_Number):
plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
## Plot ADM mass evolution
for i in range(input_data.Detector_Number):
plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
## Plot Hamiltonian constraint violation over time
for i in range(input_data.grid_level):
plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
## Plot stored binary data
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
print()
print( f" This Program Cost = {elapsed_time} Seconds " )
print()
##################################################################
print()
print( " The AMSS-NCKU-Python simulation is successfully finished, thanks for using !!! " )
print()
##################################################################

View File

@@ -16,7 +16,7 @@ import numpy
File_directory = "GW150914" ## output file directory File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long ## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation MPI_processes = 48 ## number of mpi processes used in the simulation
GPU_Calculation = "no" ## Use GPU or not GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,7 @@
#ifndef TWO_PUNCTURES_H #ifndef TWO_PUNCTURES_H
#define TWO_PUNCTURES_H #define TWO_PUNCTURES_H
#include <omp.h>
#define StencilSize 19 #define StencilSize 19
#define N_PlaneRelax 1 #define N_PlaneRelax 1
#define NRELAX 200 #define NRELAX 200
@@ -33,7 +32,7 @@ private:
int npoints_A, npoints_B, npoints_phi; int npoints_A, npoints_B, npoints_phi;
double target_M_plus, target_M_minus; double target_M_plus, target_M_minus;
double admMass; double admMass;
double adm_tol; double adm_tol;
@@ -43,18 +42,6 @@ private:
int ntotal; int ntotal;
// ===== Precomputed spectral derivative matrices =====
double *D1_A, *D2_A;
double *D1_B, *D2_B;
double *DF1_phi, *DF2_phi;
// ===== Pre-allocated workspace for LineRelax (per-thread) =====
int max_threads;
double **ws_diag_be, **ws_e_be, **ws_f_be, **ws_b_be, **ws_x_be;
double **ws_l_be, **ws_u_be, **ws_d_be, **ws_y_be;
double **ws_diag_al, **ws_e_al, **ws_f_al, **ws_b_al, **ws_x_al;
double **ws_l_al, **ws_u_al, **ws_d_al, **ws_y_al;
struct parameters struct parameters
{ {
int nvar, n1, n2, n3; int nvar, n1, n2, n3;
@@ -71,28 +58,6 @@ public:
int Newtonmaxit); int Newtonmaxit);
~TwoPunctures(); ~TwoPunctures();
// 02/07: New/modified methods
void allocate_workspace();
void free_workspace();
void precompute_derivative_matrices();
void build_cheb_deriv_matrices(int n, double *D1, double *D2);
void build_fourier_deriv_matrices(int N, double *DF1, double *DF2);
void Derivatives_AB3_MatMul(int nvar, int n1, int n2, int n3, derivs v);
void ThomasAlgorithm_ws(int N, double *b, double *a, double *c, double *x, double *q,
double *l, double *u_ws, double *d, double *y);
void LineRelax_be_omp(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD, int tid);
void LineRelax_al_omp(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD, int tid);
void relax_omp(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void Solve(); void Solve();
void set_initial_guess(derivs v); void set_initial_guess(derivs v);
int index(int i, int j, int k, int l, int a, int b, int c, int d); int index(int i, int j, int k, int l, int a, int b, int c, int d);
@@ -151,11 +116,23 @@ public:
double BY_KKofxyz(double x, double y, double z); double BY_KKofxyz(double x, double y, double z);
void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix); void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix);
void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u); void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u);
void relax(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void LineRelax_be(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD);
void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2,
int n3, derivs dv, derivs u, double *values); int n3, derivs dv, derivs u, double *values);
void LinEquations(double A, double B, double X, double R, void LinEquations(double A, double B, double X, double R,
double x, double r, double phi, double x, double r, double phi,
double y, double z, derivs dU, derivs U, double *values); double y, double z, derivs dU, derivs U, double *values);
void LineRelax_al(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD);
void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q); void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q);
void Save(char *fname); void Save(char *fname);
// provided by Vasileios Paschalidis (vpaschal@illinois.edu) // provided by Vasileios Paschalidis (vpaschal@illinois.edu)
@@ -164,4 +141,4 @@ public:
void SpecCoef(parameters par, int ivar, double *v, double *cf); void SpecCoef(parameters par, int ivar, double *v, double *cf);
}; };
#endif /* TWO_PUNCTURES_H */ #endif /* TWO_PUNCTURES_H */

View File

@@ -106,8 +106,7 @@
call getpbh(BHN,Porg,Mass) call getpbh(BHN,Porg,Mass)
#endif #endif
!!! sanity check (disabled in production builds for performance) !!! sanity check
#ifdef DEBUG
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) & dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) & +sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) & +sum(Gamx)+sum(Gamy)+sum(Gamz) &
@@ -137,7 +136,6 @@
gont = 1 gont = 1
return return
endif endif
#endif
PI = dacos(-ONE) PI = dacos(-ONE)
@@ -161,8 +159,36 @@
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
gxx * betaxy + gxz * betazy + &
gyy * betayx + gyz * betazx &
- gxy * betazz
gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
gxy * betaxz + gyy * betayz + &
gxz * betaxy + gzz * betazy &
- gyz * betaxx
gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
gxx * betaxz + gxy * betayz + &
gyz * betayx + gzz * betazx &
- gxz * betayy !rhs for gij
! invert tilted metric ! invert tilted metric
gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - & gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
@@ -173,12 +199,7 @@
gupyy = ( gxx * gzz - gxz * gxz ) / gupzz gupyy = ( gxx * gzz - gxz * gxz ) / gupzz
gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
gupzz = ( gxx * gyy - gxy * gxy ) / gupzz gupzz = ( gxx * gyy - gxy * gxy ) / gupzz
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
if(co == 0)then if(co == 0)then
! Gam^i_Res = Gam^i + gup^ij_,j ! Gam^i_Res = Gam^i + gup^ij_,j
Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)& Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)&
@@ -924,99 +945,99 @@
!!!!!!!!!advection term part !!!!!!!!!advection term part
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
gxx * betaxy + gxz * betazy + &
gyy * betayx + gyz * betazx &
- gxy * betazz
gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
gxy * betaxz + gyy * betayz + &
gxz * betaxy + gzz * betazy &
- gyz * betaxx
gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
gxx * betaxz + gxy * betayz + &
gyz * betayx + gzz * betazx &
- gxz * betayy !rhs for gij
if(eps>0)then
! usual Kreiss-Oliger dissipation
call merge_lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
else
call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS) call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS)
call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA) call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA)
call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA) call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA)
call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS) call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS)
call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA) call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA)
call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA) call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA)
call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA)
!!
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif
if(eps>0)then
! usual Kreiss-Oliger dissipation
call kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
#define k 40
if(Lev == 1)then
write(*,*) X(i),Y(j),Z(k)
write(*,*) "before",Axx_rhs(i,j,k)
endif
#undef i
#undef j
#undef k
!!stop
#endif
call kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
#define k 40
if(Lev == 1)then
write(*,*) X(i),Y(j),Z(k)
write(*,*) "after",Axx_rhs(i,j,k)
endif
#undef i
#undef j
#undef k
!!stop
#endif
call kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps)
#if 1
!! bam does not apply dissipation on gauge variables
call kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps)
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps)
#endif
#endif
endif endif
@@ -1163,265 +1184,3 @@ endif
return return
end function compute_rhs_bssn end function compute_rhs_bssn
subroutine merge_lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
!~~~~~~> local variables:
! note index -2,-1,0, so we have 3 extra points
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh
integer :: imin_lopsided,jmin_lopsided,kmin_lopsided,imin_kodis,jmin_kodis,kmin_kodis,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F3=3.d0
real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1
real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8, parameter :: SIX=6.d0,FIT=1.5d1,TWT=2.d1
real*8,parameter::cof=6.4d1 ! 2^6
real*8,intent(in) :: eps
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin_lopsided = 1
jmin_lopsided = 1
kmin_lopsided = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin_lopsided = -2
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin_lopsided = -2
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin_lopsided = -2
imin_kodis = 1
jmin_kodis = 1
kmin_kodis = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin_kodis = -2
if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin_kodis = -2
if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin_kodis = -2
call symmetry_bd(3,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
!! new code, 2012dec27, based on bam
! x direction
if(Sfx(i,j,k) > ZEO)then
if(i+3 <= imax)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
elseif(i+2 <= imax)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
! set imax and imin_lopsided 0
endif
elseif(Sfx(i,j,k) < ZEO)then
if(i-3 >= imin_lopsided)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
elseif(i-2 >= imin_lopsided)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i-1 >= imin_lopsided)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
! set imax and imin_lopsided 0
endif
endif
! y direction
if(Sfy(i,j,k) > ZEO)then
if(j+3 <= jmax)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
elseif(j+2 <= jmax)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
! set imax and imin_lopsided 0
endif
elseif(Sfy(i,j,k) < ZEO)then
if(j-3 >= jmin_lopsided)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
elseif(j-2 >= jmin_lopsided)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j-1 >= jmin_lopsided)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
! set jmax and jmin_lopsided 0
endif
endif
! z direction
if(Sfz(i,j,k) > ZEO)then
if(k+3 <= kmax)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
elseif(k+2 <= kmax)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
! set imax and imin_lopsided 0
endif
elseif(Sfz(i,j,k) < ZEO)then
if(k-3 >= kmin_lopsided)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
elseif(k-2 >= kmin_lopsided)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k-1 >= kmin_lopsided)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
! set kmax and kmin_lopsided 0
endif
endif
if(i-3 >= imin_kodis .and. i+3 <= imax .and. &
j-3 >= jmin_kodis .and. j+3 <= jmax .and. &
k-3 >= kmin_kodis .and. k+3 <= kmax) then
! calculation order if important ?
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - &
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
TWT* fh(i,j,k) )/dX + &
( &
(fh(i,j-3,k)+fh(i,j+3,k)) - &
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
TWT* fh(i,j,k) )/dY + &
( &
(fh(i,j,k-3)+fh(i,j,k+3)) - &
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )/dZ )
endif
enddo
enddo
enddo
return
end subroutine merge_lopsided_kodis

View File

@@ -69,6 +69,7 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -151,6 +152,7 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -218,6 +220,7 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -282,6 +285,7 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -371,6 +375,7 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -469,6 +474,7 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -531,6 +537,7 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -594,6 +601,7 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -657,6 +665,7 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -719,6 +728,7 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -780,6 +790,7 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -866,6 +877,7 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -997,10 +1009,90 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
#if 0
! x direction
if(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! - f(i-1) + f(i+1)
! fx(i) = --------------------------------
! 2 dx
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
! set imax and imin 0
endif
! y direction
if(j+2 <= jmax .and. j-2 >= jmin)then
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
! set jmax and jmin 0
endif
! z direction
if(k+2 <= kmax .and. k-2 >= kmin)then
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
! set kmax and kmin 0
endif
#elif 0
! x direction
if(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+3 <= imax .and. i-1 >= imin)then
fx(i,j,k)=d12dx*(-3.d0*fh(i-1,j,k)-1.d1*fh(i,j,k)+1.8d1*fh(i+1,j,k)-6.d0*fh(i+2,j,k)+fh(i+3,j,k))
elseif(i+1 <= imax .and. i-3 >= imin)then
fx(i,j,k)=d12dx*( 3.d0*fh(i+1,j,k)+1.d1*fh(i,j,k)-1.8d1*fh(i-1,j,k)+6.d0*fh(i-2,j,k)-fh(i-3,j,k))
! set imax and imin 0
endif
! y direction
if(j+2 <= jmax .and. j-2 >= jmin)then
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+3 <= jmax .and. j-1 >= jmin)then
fy(i,j,k)=d12dy*(-3.d0*fh(i,j-1,k)-1.d1*fh(i,j,k)+1.8d1*fh(i,j+1,k)-6.d0*fh(i,j+2,k)+fh(i,j+3,k))
elseif(j+1 <= jmax .and. j-3 >= jmin)then
fy(i,j,k)=d12dy*( 3.d0*fh(i,j+1,k)+1.d1*fh(i,j,k)-1.8d1*fh(i,j-1,k)+6.d0*fh(i,j-2,k)-fh(i,j-3,k))
! set jmax and jmin 0
endif
! z direction
if(k+2 <= kmax .and. k-2 >= kmin)then
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+3 <= kmax .and. k-1 >= kmin)then
fz(i,j,k)=d12dz*(-3.d0*fh(i,j,k-1)-1.d1*fh(i,j,k)+1.8d1*fh(i,j,k+1)-6.d0*fh(i,j,k+2)+fh(i,j,k+3))
elseif(k+1 <= kmax .and. k-3 >= kmin)then
fz(i,j,k)=d12dz*( 3.d0*fh(i,j,k+1)+1.d1*fh(i,j,k)-1.8d1*fh(i,j,k-1)+6.d0*fh(i,j,k-2)-fh(i,j,k-3))
! set kmax and kmin 0
endif
#else
! for bam comparison ! for bam comparison
if(i+2 <= imax .and. i-2 >= imin .and. & if(i+2 <= imax .and. i-2 >= imin .and. &
j+2 <= jmax .and. j-2 >= jmin .and. & j+2 <= jmax .and. j-2 >= jmin .and. &
@@ -1015,7 +1107,7 @@
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k)) fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1)) fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
endif endif
#endif
enddo enddo
enddo enddo
enddo enddo
@@ -1072,6 +1164,7 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1148,6 +1241,7 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1218,6 +1312,7 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1322,10 +1417,89 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
#if 0
!~~~~~~ fxx
if(i+2 <= imax .and. i-2 >= imin)then
!
! - f(i-2) + 16 f(i-1) - 30 f(i) + 16 f(i+1) - f(i+2)
! fxx(i) = ----------------------------------------------------------
! 12 dx^2
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! f(i-1) - 2 f(i) + f(i+1)
! fxx(i) = --------------------------------
! dx^2
fxx(i,j,k) = Sdxdx*(fh(i-1,j,k)-TWO*fh(i,j,k) &
+fh(i+1,j,k) )
endif
!~~~~~~ fyy
if(j+2 <= jmax .and. j-2 >= jmin)then
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
elseif(j+1 <= jmax .and. j-1 >= jmin)then
fyy(i,j,k) = Sdydy*(fh(i,j-1,k)-TWO*fh(i,j,k) &
+fh(i,j+1,k) )
endif
!~~~~~~ fzz
if(k+2 <= kmax .and. k-2 >= kmin)then
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
elseif(k+1 <= kmax .and. k-1 >= kmin)then
fzz(i,j,k) = Sdzdz*(fh(i,j,k-1)-TWO*fh(i,j,k) &
+fh(i,j,k+1) )
endif
!~~~~~~ fxy
if(i+2 <= imax .and. i-2 >= imin .and. j+2 <= jmax .and. j-2 >= jmin)then
!
! ( f(i-2,j-2) - 8 f(i-1,j-2) + 8 f(i+1,j-2) - f(i+2,j-2) )
! - 8 ( f(i-2,j-1) - 8 f(i-1,j-1) + 8 f(i+1,j-1) - f(i+2,j-1) )
! + 8 ( f(i-2,j+1) - 8 f(i-1,j+1) + 8 f(i+1,j+1) - f(i+2,j+1) )
! - ( f(i-2,j+2) - 8 f(i-1,j+2) + 8 f(i+1,j+2) - f(i+2,j+2) )
! fxy(i,j) = ----------------------------------------------------------------
! 144 dx dy
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
elseif(i+1 <= imax .and. i-1 >= imin .and. j+1 <= jmax .and. j-1 >= jmin)then
! f(i-1,j-1) - f(i+1,j-1) - f(i-1,j+1) + f(i+1,j+1)
! fxy(i,j) = -----------------------------------------------------------
! 4 dx dy
fxy(i,j,k) = Sdxdy*(fh(i-1,j-1,k)-fh(i+1,j-1,k)-fh(i-1,j+1,k)+fh(i+1,j+1,k))
endif
!~~~~~~ fxz
if(i+2 <= imax .and. i-2 >= imin .and. k+2 <= kmax .and. k-2 >= kmin)then
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
elseif(i+1 <= imax .and. i-1 >= imin .and. k+1 <= kmax .and. k-1 >= kmin)then
fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1))
endif
!~~~~~~ fyz
if(j+2 <= jmax .and. j-2 >= jmin .and. k+2 <= kmax .and. k-2 >= kmin)then
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
elseif(j+1 <= jmax .and. j-1 >= jmin .and. k+1 <= kmax .and. k-1 >= kmin)then
fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1))
endif
#else
! for bam comparison ! for bam comparison
if(i+2 <= imax .and. i-2 >= imin .and. & if(i+2 <= imax .and. i-2 >= imin .and. &
j+2 <= jmax .and. j-2 >= jmin .and. & j+2 <= jmax .and. j-2 >= jmin .and. &
@@ -1361,7 +1535,7 @@
fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1)) fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1))
fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1)) fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1))
endif endif
#endif
enddo enddo
enddo enddo
enddo enddo
@@ -1419,6 +1593,7 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1486,6 +1661,7 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1555,6 +1731,7 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1624,6 +1801,7 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1694,6 +1872,7 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1762,6 +1941,7 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1854,6 +2034,7 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1970,6 +2151,7 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2055,6 +2237,7 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2131,6 +2314,7 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2249,6 +2433,7 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2436,6 +2621,7 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2508,6 +2694,7 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2583,6 +2770,7 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2658,6 +2846,7 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2738,6 +2927,7 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2816,6 +3006,7 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2923,6 +3114,7 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3059,6 +3251,7 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3154,6 +3347,7 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3238,6 +3432,7 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3373,6 +3568,7 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3645,6 +3841,7 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3726,6 +3923,7 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3810,6 +4008,7 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3894,6 +4093,7 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3996,6 +4196,7 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -4096,6 +4297,7 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(3) schedule(static)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1

View File

@@ -18,61 +18,49 @@
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
!~~~~~~~> Local variable: !~~~~~~~> Local variable:
integer :: i,j,k real*8, dimension(ex(1),ex(2),ex(3)) :: trA,detg
real*8 :: lgxx,lgyy,lgzz,ldetg real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz
real*8 :: ltrA,lscale
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0 real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
!~~~~~~> !~~~~~~>
do k=1,ex(3) gxx = dxx + ONE
do j=1,ex(2) gyy = dyy + ONE
do i=1,ex(1) gzz = dzz + ONE
lgxx = dxx(i,j,k) + ONE detg = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
lgyy = dyy(i,j,k) + ONE gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
lgzz = dzz(i,j,k) + ONE gupxx = ( gyy * gzz - gyz * gyz ) / detg
gupxy = - ( gxy * gzz - gyz * gxz ) / detg
gupxz = ( gxy * gyz - gyy * gxz ) / detg
gupyy = ( gxx * gzz - gxz * gxz ) / detg
gupyz = - ( gxx * gyz - gxy * gxz ) / detg
gupzz = ( gxx * gyy - gxy * gxy ) / detg
ldetg = lgxx * lgyy * lgzz & trA = gupxx * Axx + gupyy * Ayy + gupzz * Azz &
+ gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) & + TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz)
+ gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) &
- gxz(i,j,k) * lgyy * gxz(i,j,k) &
- gxy(i,j,k) * gxy(i,j,k) * lgzz &
- lgxx * gyz(i,j,k) * gyz(i,j,k)
lgupxx = ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg Axx = Axx - F1o3 * gxx * trA
lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg Axy = Axy - F1o3 * gxy * trA
lgupxz = ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg Axz = Axz - F1o3 * gxz * trA
lgupyy = ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg Ayy = Ayy - F1o3 * gyy * trA
lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg Ayz = Ayz - F1o3 * gyz * trA
lgupzz = ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg Azz = Azz - F1o3 * gzz * trA
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) & detg = ONE / ( detg ** F1o3 )
+ lgupzz * Azz(i,j,k) &
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) & gxx = gxx * detg
+ lgupyz * Ayz(i,j,k)) gxy = gxy * detg
gxz = gxz * detg
gyy = gyy * detg
gyz = gyz * detg
gzz = gzz * detg
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA dxx = gxx - ONE
Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA dyy = gyy - ONE
Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA dzz = gzz - ONE
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
lscale = ONE / ( ldetg ** F1o3 )
dxx(i,j,k) = lgxx * lscale - ONE
gxy(i,j,k) = gxy(i,j,k) * lscale
gxz(i,j,k) = gxz(i,j,k) * lscale
dyy(i,j,k) = lgyy * lscale - ONE
gyz(i,j,k) = gyz(i,j,k) * lscale
dzz(i,j,k) = lgzz * lscale - ONE
enddo
enddo
enddo
return return
@@ -94,71 +82,51 @@
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
!~~~~~~~> Local variable: !~~~~~~~> Local variable:
integer :: i,j,k real*8, dimension(ex(1),ex(2),ex(3)) :: trA
real*8 :: lgxx,lgyy,lgzz,lscale real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
real*8 :: lgxy,lgxz,lgyz real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz
real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
real*8 :: ltrA
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0 real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
!~~~~~~> !~~~~~~>
do k=1,ex(3) gxx = dxx + ONE
do j=1,ex(2) gyy = dyy + ONE
do i=1,ex(1) gzz = dzz + ONE
! for g
gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
! for g: normalize determinant first gupzz = ONE / ( gupzz ** F1o3 )
lgxx = dxx(i,j,k) + ONE
lgyy = dyy(i,j,k) + ONE gxx = gxx * gupzz
lgzz = dzz(i,j,k) + ONE gxy = gxy * gupzz
lgxy = gxy(i,j,k) gxz = gxz * gupzz
lgxz = gxz(i,j,k) gyy = gyy * gupzz
lgyz = gyz(i,j,k) gyz = gyz * gupzz
gzz = gzz * gupzz
lscale = lgxx * lgyy * lgzz + lgxy * lgyz * lgxz & dxx = gxx - ONE
+ lgxz * lgxy * lgyz - lgxz * lgyy * lgxz & dyy = gyy - ONE
- lgxy * lgxy * lgzz - lgxx * lgyz * lgyz dzz = gzz - ONE
! for A
lscale = ONE / ( lscale ** F1o3 ) gupxx = ( gyy * gzz - gyz * gyz )
gupxy = - ( gxy * gzz - gyz * gxz )
gupxz = ( gxy * gyz - gyy * gxz )
gupyy = ( gxx * gzz - gxz * gxz )
gupyz = - ( gxx * gyz - gxy * gxz )
gupzz = ( gxx * gyy - gxy * gxy )
lgxx = lgxx * lscale trA = gupxx * Axx + gupyy * Ayy + gupzz * Azz &
lgxy = lgxy * lscale + TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz)
lgxz = lgxz * lscale
lgyy = lgyy * lscale
lgyz = lgyz * lscale
lgzz = lgzz * lscale
dxx(i,j,k) = lgxx - ONE Axx = Axx - F1o3 * gxx * trA
gxy(i,j,k) = lgxy Axy = Axy - F1o3 * gxy * trA
gxz(i,j,k) = lgxz Axz = Axz - F1o3 * gxz * trA
dyy(i,j,k) = lgyy - ONE Ayy = Ayy - F1o3 * gyy * trA
gyz(i,j,k) = lgyz Ayz = Ayz - F1o3 * gyz * trA
dzz(i,j,k) = lgzz - ONE Azz = Azz - F1o3 * gzz * trA
! for A: trace-free using normalized metric (det=1, no division needed)
lgupxx = ( lgyy * lgzz - lgyz * lgyz )
lgupxy = - ( lgxy * lgzz - lgyz * lgxz )
lgupxz = ( lgxy * lgyz - lgyy * lgxz )
lgupyy = ( lgxx * lgzz - lgxz * lgxz )
lgupyz = - ( lgxx * lgyz - lgxy * lgxz )
lgupzz = ( lgxx * lgyy - lgxy * lgxy )
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
+ lgupzz * Azz(i,j,k) &
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
+ lgupyz * Ayz(i,j,k))
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA
Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
enddo
enddo
enddo
return return

View File

@@ -324,10 +324,10 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
enddo enddo
do i=0,ord-1 do i=0,ord-1
funcc(:,-i,1:extc(3)) = funcc(:,i+2,1:extc(3))*SoA(2) funcc(:,-i,1:extc(3)) = funcc(:,i+2,1:extc(3))*SoA(2)
@@ -350,6 +350,7 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
@@ -378,6 +379,7 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
@@ -884,6 +886,7 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -909,6 +912,7 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -937,6 +941,7 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -1113,65 +1118,64 @@ end subroutine d2dump
! Lagrangian polynomial interpolation ! Lagrangian polynomial interpolation
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
subroutine polint(xa, ya, x, y, dy, ordn) subroutine polint(xa,ya,x,y,dy,ordn)
implicit none implicit none
integer, intent(in) :: ordn !~~~~~~> Input Parameter:
real*8, dimension(ordn), intent(in) :: xa, ya integer,intent(in) :: ordn
real*8, dimension(ordn), intent(in) :: xa,ya
real*8, intent(in) :: x real*8, intent(in) :: x
real*8, intent(out) :: y, dy real*8, intent(out) :: y,dy
integer :: i, m, ns, n_m !~~~~~~> Other parameter:
real*8, dimension(ordn) :: c, d, ho
real*8 :: dif, dift, hp, h, den_val
c = ya integer :: m,n,ns
d = ya real*8, dimension(ordn) :: c,d,den,ho
ho = xa - x real*8 :: dif,dift
ns = 1 !~~~~~~>
dif = abs(x - xa(1))
do i = 2, ordn n=ordn
dift = abs(x - xa(i)) m=ordn
if (dift < dif) then
ns = i c=ya
dif = dift d=ya
end if ho=xa-x
ns=1
dif=abs(x-xa(1))
do m=1,n
dift=abs(x-xa(m))
if(dift < dif) then
ns=m
dif=dift
end if
end do end do
y = ya(ns) y=ya(ns)
ns = ns - 1 ns=ns-1
do m=1,n-1
do m = 1, ordn - 1 den(1:n-m)=ho(1:n-m)-ho(1+m:n)
n_m = ordn - m if (any(den(1:n-m) == 0.0))then
do i = 1, n_m write(*,*) 'failure in polint for point',x
hp = ho(i) write(*,*) 'with input points: ',xa
h = ho(i+m) stop
den_val = hp - h endif
den(1:n-m)=(c(2:n-m+1)-d(1:n-m))/den(1:n-m)
if (den_val == 0.0d0) then d(1:n-m)=ho(1+m:n)*den(1:n-m)
write(*,*) 'failure in polint for point',x c(1:n-m)=ho(1:n-m)*den(1:n-m)
write(*,*) 'with input points: ',xa if (2*ns < n-m) then
stop dy=c(ns+1)
end if
den_val = (c(i+1) - d(i)) / den_val
d(i) = h * den_val
c(i) = hp * den_val
end do
if (2 * ns < n_m) then
dy = c(ns + 1)
else else
dy = d(ns) dy=d(ns)
ns = ns - 1 ns=ns-1
end if end if
y = y + dy y=y+dy
end do end do
return return
end subroutine polint end subroutine polint
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
! !
@@ -1179,37 +1183,35 @@ end subroutine d2dump
! !
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn) subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn)
implicit none implicit none
!~~~~~~> Input parameters:
integer,intent(in) :: ordn integer,intent(in) :: ordn
real*8, dimension(1:ordn), intent(in) :: x1a,x2a real*8, dimension(1:ordn), intent(in) :: x1a,x2a
real*8, dimension(1:ordn,1:ordn), intent(in) :: ya real*8, dimension(1:ordn,1:ordn), intent(in) :: ya
real*8, intent(in) :: x1,x2 real*8, intent(in) :: x1,x2
real*8, intent(out) :: y,dy real*8, intent(out) :: y,dy
#ifdef POLINT_LEGACY_ORDER !~~~~~~> Other parameters:
integer :: i,m integer :: i,m
real*8, dimension(ordn) :: ymtmp real*8, dimension(ordn) :: ymtmp
real*8, dimension(ordn) :: yntmp real*8, dimension(ordn) :: yntmp
m=size(x1a) m=size(x1a)
do i=1,m do i=1,m
yntmp=ya(i,:) yntmp=ya(i,:)
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn) call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
end do
call polint(x1a,ymtmp,x1,y,dy,ordn)
#else
integer :: j
real*8, dimension(ordn) :: ymtmp
real*8 :: dy_temp
do j=1,ordn
call polint(x1a, ya(:,j), x1, ymtmp(j), dy_temp, ordn)
end do end do
call polint(x2a, ymtmp, x2, y, dy, ordn)
#endif call polint(x1a,ymtmp,x1,y,dy,ordn)
return return
end subroutine polin2 end subroutine polin2
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
! !
@@ -1217,15 +1219,18 @@ end subroutine d2dump
! !
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn) subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn)
implicit none implicit none
!~~~~~~> Input parameters:
integer,intent(in) :: ordn integer,intent(in) :: ordn
real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a
real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya
real*8, intent(in) :: x1,x2,x3 real*8, intent(in) :: x1,x2,x3
real*8, intent(out) :: y,dy real*8, intent(out) :: y,dy
#ifdef POLINT_LEGACY_ORDER !~~~~~~> Other parameters:
integer :: i,j,m,n integer :: i,j,m,n
real*8, dimension(ordn,ordn) :: yatmp real*8, dimension(ordn,ordn) :: yatmp
real*8, dimension(ordn) :: ymtmp real*8, dimension(ordn) :: ymtmp
@@ -1234,33 +1239,24 @@ end subroutine d2dump
m=size(x1a) m=size(x1a)
n=size(x2a) n=size(x2a)
do i=1,m do i=1,m
do j=1,n do j=1,n
yqtmp=ya(i,j,:) yqtmp=ya(i,j,:)
call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn) call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn)
end do end do
yntmp=yatmp(i,:) yntmp=yatmp(i,:)
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn) call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
end do
call polint(x1a,ymtmp,x1,y,dy,ordn)
#else
integer :: j, k
real*8, dimension(ordn,ordn) :: yatmp
real*8, dimension(ordn) :: ymtmp
real*8 :: dy_temp
do k=1,ordn
do j=1,ordn
call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn)
end do
end do end do
do k=1,ordn
call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn) call polint(x1a,ymtmp,x1,y,dy,ordn)
end do
call polint(x3a, ymtmp, x3, y, dy, ordn)
#endif
return return
end subroutine polin3 end subroutine polin3
!-------------------------------------------------------------------------------------- !--------------------------------------------------------------------------------------
! calculate L2norm ! calculate L2norm

View File

@@ -6,6 +6,101 @@
! Vertex or Cell is distinguished in routine symmetry_bd which locates in ! Vertex or Cell is distinguished in routine symmetry_bd which locates in
! file "fmisc.f90" ! file "fmisc.f90"
#if (ghost_width == 2)
! second order code
!------------------------------------------------------------------------------------------------------------------------------
!usual type Kreiss-Oliger type numerical dissipation
!We support cell center only
! (D_+D_-)^2 =
! f(i-2) - 4 f(i-1) + 6 f(i) - 4 f(i+1) + f(i+2)
! ------------------------------------------------------
! dx^4
!------------------------------------------------------------------------------------------------------------------------------
! do not add dissipation near boundary
subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
implicit none
! argument variables
integer,intent(in) :: Symmetry
integer,dimension(3),intent(in)::ex
real*8, dimension(1:3), intent(in) :: SoA
double precision,intent(in),dimension(ex(1))::X
double precision,intent(in),dimension(ex(2))::Y
double precision,intent(in),dimension(ex(3))::Z
double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
real*8,intent(in) :: eps
!~~~~~~ other variables
real*8 :: dX,dY,dZ
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8,parameter :: cof = 1.6d1 ! 2^4
real*8, parameter :: F4=4.d0,F6=6.d0
integer::i,j,k
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
call symmetry_bd(2,ex,f,fh,SoA)
! f(i-2) - 4 f(i-1) + 6 f(i) - 4 f(i+1) + f(i+2)
! ------------------------------------------------------
! dx^4
! note the sign (-1)^r-1, now r=2
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i-2 >= imin .and. i+2 <= imax .and. &
j-2 >= jmin .and. j+2 <= jmax .and. &
k-2 >= kmin .and. k+2 <= kmax) then
! x direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dX/cof * ( &
(fh(i-2,j,k)+fh(i+2,j,k)) &
- F4 * (fh(i-1,j,k)+fh(i+1,j,k)) &
+ F6 * fh(i,j,k) )
! y direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dY/cof * ( &
(fh(i,j-2,k)+fh(i,j+2,k)) &
- F4 * (fh(i,j-1,k)+fh(i,j+1,k)) &
+ F6 * fh(i,j,k) )
! z direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dZ/cof * ( &
(fh(i,j,k-2)+fh(i,j,k+2)) &
- F4 * (fh(i,j,k-1)+fh(i,j,k+1)) &
+ F6 * fh(i,j,k) )
endif
enddo
enddo
enddo
return
end subroutine kodis
#elif (ghost_width == 3)
! fourth order code ! fourth order code
!--------------------------------------------------------------------------------------------- !---------------------------------------------------------------------------------------------
@@ -61,7 +156,7 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2 if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin = -2 if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin = -2
if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin = -2 if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin = -2
!print*,'imin,jmin,kmin=',imin,jmin,kmin
call symmetry_bd(3,ex,f,fh,SoA) call symmetry_bd(3,ex,f,fh,SoA)
do k=1,ex(3) do k=1,ex(3)
@@ -71,7 +166,28 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
if(i-3 >= imin .and. i+3 <= imax .and. & if(i-3 >= imin .and. i+3 <= imax .and. &
j-3 >= jmin .and. j+3 <= jmax .and. & j-3 >= jmin .and. j+3 <= jmax .and. &
k-3 >= kmin .and. k+3 <= kmax) then k-3 >= kmin .and. k+3 <= kmax) then
#if 0
! x direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dX/cof * ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - &
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
TWT* fh(i,j,k) )
! y direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dY/cof * ( &
(fh(i,j-3,k)+fh(i,j+3,k)) - &
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
TWT* fh(i,j,k) )
! z direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dZ/cof * ( &
(fh(i,j,k-3)+fh(i,j,k+3)) - &
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )
#else
! calculation order if important ? ! calculation order if important ?
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( & f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - & (fh(i-3,j,k)+fh(i+3,j,k)) - &
@@ -88,7 +204,7 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + & SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - & FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )/dZ ) TWT* fh(i,j,k) )/dZ )
#endif
endif endif
enddo enddo
@@ -99,6 +215,218 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
end subroutine kodis end subroutine kodis
#elif (ghost_width == 4)
! sixth order code
!------------------------------------------------------------------------------------------------------------------------------
!usual type Kreiss-Oliger type numerical dissipation
!We support cell center only
! (D_+D_-)^4 =
! f(i-4) - 8 f(i-3) + 28 f(i-2) - 56 f(i-1) + 70 f(i) - 56 f(i+1) + 28 f(i+2) - 8 f(i+3) + f(i+4)
! ----------------------------------------------------------------------------------------------------------
! dx^8
!------------------------------------------------------------------------------------------------------------------------------
! do not add dissipation near boundary
subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
implicit none
! argument variables
integer,intent(in) :: Symmetry
integer,dimension(3),intent(in)::ex
real*8, dimension(1:3), intent(in) :: SoA
double precision,intent(in),dimension(ex(1))::X
double precision,intent(in),dimension(ex(2))::Y
double precision,intent(in),dimension(ex(3))::Z
double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
real*8,intent(in) :: eps
!~~~~~~ other variables
real*8 :: dX,dY,dZ
real*8,dimension(-3:ex(1),-3:ex(2),-3:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8,parameter :: cof = 2.56d2 ! 2^8
real*8, parameter :: F8=8.d0,F28=2.8d1,F56=5.6d1,F70=7.d1
integer::i,j,k
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -3
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -3
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -3
call symmetry_bd(4,ex,f,fh,SoA)
! f(i-4) - 8 f(i-3) + 28 f(i-2) - 56 f(i-1) + 70 f(i) - 56 f(i+1) + 28 f(i+2) - 8 f(i+3) + f(i+4)
! ----------------------------------------------------------------------------------------------------------
! dx^8
! note the sign (-1)^r-1, now r=4
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i>imin+3 .and. i < imax-3 .and. &
j>jmin+3 .and. j < jmax-3 .and. &
k>kmin+3 .and. k < kmax-3) then
! x direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dX/cof * ( &
(fh(i-4,j,k)+fh(i+4,j,k)) &
- F8 * (fh(i-3,j,k)+fh(i+3,j,k)) &
+F28 * (fh(i-2,j,k)+fh(i+2,j,k)) &
-F56 * (fh(i-1,j,k)+fh(i+1,j,k)) &
+F70 * fh(i,j,k) )
! y direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dY/cof * ( &
(fh(i,j-4,k)+fh(i,j+4,k)) &
- F8 * (fh(i,j-3,k)+fh(i,j+3,k)) &
+F28 * (fh(i,j-2,k)+fh(i,j+2,k)) &
-F56 * (fh(i,j-1,k)+fh(i,j+1,k)) &
+F70 * fh(i,j,k) )
! z direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dZ/cof * ( &
(fh(i,j,k-4)+fh(i,j,k+4)) &
- F8 * (fh(i,j,k-3)+fh(i,j,k+3)) &
+F28 * (fh(i,j,k-2)+fh(i,j,k+2)) &
-F56 * (fh(i,j,k-1)+fh(i,j,k+1)) &
+F70 * fh(i,j,k) )
endif
enddo
enddo
enddo
return
end subroutine kodis
#elif (ghost_width == 5)
! eighth order code
!------------------------------------------------------------------------------------------------------------------------------
!usual type Kreiss-Oliger type numerical dissipation
!We support cell center only
! Note the notation D_+ and D_- [P240 of B. Gustafsson, H.-O. Kreiss, and J. Oliger, Time
! Dependent Problems and Difference Methods (Wiley, New York, 1995).]
! D_+ = (f(i+1) - f(i))/h
! D_- = (f(i) - f(i-1))/h
! then we have D_+D_- = D_-D_+ = (f(i+1) - 2f(i) + f(i-1))/h^2
! for nth order accurate finite difference code, we need r =n/2+1
! D_+^rD_-^r = (D_+D_-)^r
! following the tradiation of PRD 77, 024027 (BB's calibration paper, Eq.(64),
! correct some typo according to above book) :
! + eps*(-1)^(r-1)*h^(2r-1)/2^(2r)*(D_+D_-)^r
!
!
! this is for 8th order accurate finite difference scheme
! (D_+D_-)^5 =
! f(i-5) - 10 f(i-4) + 45 f(i-3) - 120 f(i-2) + 210 f(i-1) - 252 f(i) + 210 f(i+1) - 120 f(i+2) + 45 f(i+3) - 10 f(i+4) + f(i+5)
! -------------------------------------------------------------------------------------------------------------------------------
! dx^10
!---------------------------------------------------------------------------------------------------------------------------------
! do not add dissipation near boundary
subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
implicit none
! argument variables
integer,intent(in) :: Symmetry
integer,dimension(3),intent(in)::ex
real*8, dimension(1:3), intent(in) :: SoA
double precision,intent(in),dimension(ex(1))::X
double precision,intent(in),dimension(ex(2))::Y
double precision,intent(in),dimension(ex(3))::Z
double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
real*8,intent(in) :: eps
!~~~~~~ other variables
real*8 :: dX,dY,dZ
real*8,dimension(-4:ex(1),-4:ex(2),-4:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8,parameter :: cof = 1.024d3 ! 2^2r = 2^10
real*8, parameter :: F10=1.d1,F45=4.5d1,F120=1.2d2,F210=2.1d2,F252=2.52d2
integer::i,j,k
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -4
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -4
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -4
call symmetry_bd(5,ex,f,fh,SoA)
! f(i-5) - 10 f(i-4) + 45 f(i-3) - 120 f(i-2) + 210 f(i-1) - 252 f(i) + 210 f(i+1) - 120 f(i+2) + 45 f(i+3) - 10 f(i+4) + f(i+5)
! -------------------------------------------------------------------------------------------------------------------------------
! dx^10
! note the sign (-1)^r-1, now r=5
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i>imin+4 .and. i < imax-4 .and. &
j>jmin+4 .and. j < jmax-4 .and. &
k>kmin+4 .and. k < kmax-4) then
! x direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dX/cof * ( &
(fh(i-5,j,k)+fh(i+5,j,k)) &
- F10 * (fh(i-4,j,k)+fh(i+4,j,k)) &
+ F45 * (fh(i-3,j,k)+fh(i+3,j,k)) &
- F120* (fh(i-2,j,k)+fh(i+2,j,k)) &
+ F210* (fh(i-1,j,k)+fh(i+1,j,k)) &
- F252 * fh(i,j,k) )
! y direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dY/cof * ( &
(fh(i,j-5,k)+fh(i,j+5,k)) &
- F10 * (fh(i,j-4,k)+fh(i,j+4,k)) &
+ F45 * (fh(i,j-3,k)+fh(i,j+3,k)) &
- F120* (fh(i,j-2,k)+fh(i,j+2,k)) &
+ F210* (fh(i,j-1,k)+fh(i,j+1,k)) &
- F252 * fh(i,j,k) )
! z direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dZ/cof * ( &
(fh(i,j,k-5)+fh(i,j,k+5)) &
- F10 * (fh(i,j,k-4)+fh(i,j,k+4)) &
+ F45 * (fh(i,j,k-3)+fh(i,j,k+3)) &
- F120* (fh(i,j,k-2)+fh(i,j,k+2)) &
+ F210* (fh(i,j,k-1)+fh(i,j,k+1)) &
- F252 * fh(i,j,k) )
endif
enddo
enddo
enddo
return
end subroutine kodis
#endif

View File

@@ -7,7 +7,163 @@
! Vertex or Cell is distinguished in routine symmetry_bd which locates in ! Vertex or Cell is distinguished in routine symmetry_bd which locates in
! file "fmisc.f90" ! file "fmisc.f90"
#if (ghost_width == 2)
! second order code
!-----------------------------------------------------------------------------
! v
! D f = ------[ - 3 f + 4 f - f ]
! i 2dx i i+v i+2v
!
! where
!
! i
! |B |
! v = -----
! i
! B
!
!-----------------------------------------------------------------------------
subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
!~~~~~~> local variables:
! note index -1,0, so we have 2 extra points
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0,TWO=2.d0,THR=3.d0,FOUR=4.d0
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
call symmetry_bd(2,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
! x direction
if(Sfx(i,j,k) >= ZEO)then
if( i+2 <= imax .and. i >= imin)then
! v
! D f = ------[ - 3 f + 4 f - f ]
! i 2dx i i+v i+2v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d2dx*(-THR*fh(i,j,k)+FOUR*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i >= imin)then
! v
! D f = ------[ - f + f ]
! i dx i i+v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d2dx*(-fh(i,j,k)+fh(i+1,j,k))
endif
elseif(Sfx(i,j,k) <= ZEO)then
if( i-2 >= imin .and. i <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d2dx*(-THR*fh(i,j,k)+FOUR*fh(i-1,j,k)-fh(i-2,j,k))
elseif(i-1 >= imin .and. i <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d2dx*(-fh(i,j,k)+fh(i-1,j,k))
endif
! set imax and imin 0
endif
! y direction
if(Sfy(i,j,k) >= ZEO)then
if( j+2 <= jmax .and. j >= jmin)then
! v
! D f = ------[ - 3 f + 4 f - f ]
! i 2dx i i+v i+2v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d2dy*(-THR*fh(i,j,k)+FOUR*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j >= jmin)then
! v
! D f = ------[ - f + f ]
! i dx i i+v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d2dy*(-fh(i,j,k)+fh(i,j+1,k))
endif
elseif(Sfy(i,j,k) <= ZEO)then
if( j-2 >= jmin .and. j <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d2dy*(-THR*fh(i,j,k)+FOUR*fh(i,j-1,k)-fh(i,j-2,k))
elseif(j-1 >= jmin .and. j <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d2dy*(-fh(i,j,k)+fh(i,j-1,k))
endif
! set jmin and jmax 0
endif
!! z direction
if(Sfz(i,j,k) >= ZEO)then
if( k+2 <= kmax .and. k >= kmin)then
! v
! D f = ------[ - 3 f + 4 f - f ]
! i 2dx i i+v i+2v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d2dz*(-THR*fh(i,j,k)+FOUR*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k >= kmin)then
! v
! D f = ------[ - f + f ]
! i dx i i+v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d2dz*(-fh(i,j,k)+fh(i,j,k+1))
endif
elseif(Sfz(i,j,k) <= ZEO)then
if( k-2 >= kmin .and. k <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d2dz*(-THR*fh(i,j,k)+FOUR*fh(i,j,k-1)-fh(i,j,k-2))
elseif(k-1 >= kmin .and. k <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d2dz*(-fh(i,j,k)+fh(i,j,k-1))
endif
! set kmin and kmax 0
endif
enddo
enddo
enddo
return
end subroutine lopsided
#elif (ghost_width == 3)
! fourth order code ! fourth order code
!----------------------------------------------------------------------------- !-----------------------------------------------------------------------------
@@ -80,7 +236,89 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
#if 0
!! old code
! x direction
if(Sfx(i,j,k) >= ZEO .and. i+3 <= imax .and. i-1 >= imin)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
elseif(Sfx(i,j,k) <= ZEO .and. i-3 >= imin .and. i+1 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
elseif(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! - f(i-1) + f(i+1)
! fx(i) = --------------------------------
! 2 dx
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
! set imax and imin 0
endif
! y direction
if(Sfy(i,j,k) >= ZEO .and. j+3 <= jmax .and. j-1 >= jmin)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
elseif(Sfy(i,j,k) <= ZEO .and. j-3 >= jmin .and. j+1 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
elseif(j+2 <= jmax .and. j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
! set jmin and jmax 0
endif
!! z direction
if(Sfz(i,j,k) >= ZEO .and. k+3 <= kmax .and. k-1 >= kmin)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
elseif(Sfz(i,j,k) <= ZEO .and. k-3 >= kmin .and. k+1 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
elseif(k+2 <= kmax .and. k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
! set kmin and kmax 0
endif
#else
!! new code, 2012dec27, based on bam !! new code, 2012dec27, based on bam
! x direction ! x direction
if(Sfx(i,j,k) > ZEO)then if(Sfx(i,j,k) > ZEO)then
@@ -240,6 +478,7 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
! set kmax and kmin 0 ! set kmax and kmin 0
endif endif
endif endif
#endif
enddo enddo
enddo enddo
enddo enddo
@@ -247,3 +486,417 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
return return
end subroutine lopsided end subroutine lopsided
#elif (ghost_width == 4)
! sixth order code
! Compute advection terms in right hand sides of field equations
! v
! D f = ------[ 2f - 24f - 35f + 80f - 30f + 8f - f ]
! i 60dx i-2v i-v i i+v i+2v i+3v i+4v
!
! where
!
! i
! |B |
! v = -----
! i
! B
!
!-----------------------------------------------------------------------------
subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
!~~~~~~> local variables:
real*8,dimension(-3:ex(1),-3:ex(2),-3:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d60dx,d60dy,d60dz,d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
real*8, parameter :: TWO=2.d0,F24=2.4d1,F35=3.5d1,F80=8.d1,F30=3.d1,EIT=8.d0
real*8, parameter :: F9=9.d0,F45=4.5d1,F12=1.2d1
real*8, parameter :: F10=1.d1,F77=7.7d1,F150=1.5d2,F100=1.d2,F50=5.d1,F15=1.5d1
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d60dx = ONE/F60/dX
d60dy = ONE/F60/dY
d60dz = ONE/F60/dZ
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -3
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -3
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -3
call symmetry_bd(4,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
! x direction
if(Sfx(i,j,k) >= ZEO .and. i+4 <= imax .and. i-2 >= imin)then
! v
! D f = ------[ 2f - 24f - 35f + 80f - 30f + 8f - f ]
! i 60dx i-2v i-v i i+v i+2v i+3v i+4v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d60dx*(TWO*fh(i-2,j,k)-F24*fh(i-1,j,k)-F35*fh(i,j,k)+F80*fh(i+1,j,k) &
-F30*fh(i+2,j,k)+EIT*fh(i+3,j,k)- fh(i+4,j,k))
elseif(Sfx(i,j,k) >= ZEO .and. i+5 <= imax .and. i-1 >= imin)then
! v
! D f = ------[-10f - 77f + 150f - 100f + 50f -15f + 2f ]
! i 60dx i-v i i+v i+2v i+3v i+4v i+5v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d60dx*(-F10*fh(i-1,j,k)-F77*fh(i ,j,k)+F150*fh(i+1,j,k)-F100*fh(i+2,j,k) &
+F50*fh(i+3,j,k)-F15*fh(i+4,j,k)+ TWO*fh(i+5,j,k))
elseif(Sfx(i,j,k) <= ZEO .and. i-4 >= imin .and. i+2 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d60dx*(TWO*fh(i+2,j,k)-F24*fh(i+1,j,k)-F35*fh(i,j,k)+F80*fh(i-1,j,k) &
-F30*fh(i-2,j,k)+EIT*fh(i-3,j,k)- fh(i-4,j,k))
elseif(Sfx(i,j,k) <= ZEO .and. i-5 >= imin .and. i+1 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d60dx*(-F10*fh(i+1,j,k)-F77*fh(i ,j,k)+F150*fh(i-1,j,k)-F100*fh(i-2,j,k) &
+F50*fh(i-3,j,k)-F15*fh(i-4,j,k)+ TWO*fh(i-5,j,k))
elseif(i+3 <= imax .and. i-3 >= imin)then
! - f(i-3) + 9 f(i-2) - 45 f(i-1) + 45 f(i+1) - 9 f(i+2) + f(i+3)
! fx(i) = -----------------------------------------------------------------
! 60 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d60dx*(-fh(i-3,j,k)+F9*fh(i-2,j,k)-F45*fh(i-1,j,k)+F45*fh(i+1,j,k)-F9*fh(i+2,j,k)+fh(i+3,j,k))
elseif(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! - f(i-1) + f(i+1)
! fx(i) = --------------------------------
! 2 dx
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
! set imax and imin 0
endif
! y direction
if(Sfy(i,j,k) >= ZEO .and. j+4 <= jmax .and. j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d60dy*(TWO*fh(i,j-2,k)-F24*fh(i,j-1,k)-F35*fh(i,j,k)+F80*fh(i,j+1,k) &
-F30*fh(i,j+2,k)+EIT*fh(i,j+3,k)- fh(i,j+4,k))
elseif(Sfy(i,j,k) >= ZEO .and. j+5 <= jmax .and. j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d60dy*(-F10*fh(i,j-1,k)-F77*fh(i,j ,k)+F150*fh(i,j+1,k)-F100*fh(i,j+2,k) &
+F50*fh(i,j+3,k)-F15*fh(i,j+4,k)+ TWO*fh(i,j+5,k))
elseif(Sfy(i,j,k) <= ZEO .and. j-4 >= jmin .and. j+2 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d60dy*(TWO*fh(i,j+2,k)-F24*fh(i,j+1,k)-F35*fh(i,j,k)+F80*fh(i,j-1,k) &
-F30*fh(i,j-2,k)+EIT*fh(i,j-3,k)- fh(i,j-4,k))
elseif(Sfy(i,j,k) <= ZEO .and. j-5 >= jmin .and. j+1 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d60dy*(-F10*fh(i,j+1,k)-F77*fh(i,j ,k)+F150*fh(i,j-1,k)-F100*fh(i,j-2,k) &
+F50*fh(i,j-3,k)-F15*fh(i,j-4,k)+ TWO*fh(i,j-5,k))
elseif(j+3 <= jmax .and. j-3 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d60dy*(-fh(i,j-3,k)+F9*fh(i,j-2,k)-F45*fh(i,j-1,k)+F45*fh(i,j+1,k)-F9*fh(i,j+2,k)+fh(i,j+3,k))
elseif(j+2 <= jmax .and. j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
! set jmin and jmax 0
endif
!! z direction
if(Sfz(i,j,k) >= ZEO .and. k+4 <= kmax .and. k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d60dz*(TWO*fh(i,j,k-2)-F24*fh(i,j,k-1)-F35*fh(i,j,k)+F80*fh(i,j,k+1) &
-F30*fh(i,j,k+2)+EIT*fh(i,j,k+3)- fh(i,j,k+4))
elseif(Sfz(i,j,k) >= ZEO .and. k+5 <= kmax .and. k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d60dz*(-F10*fh(i,j,k-1)-F77*fh(i,j,k )+F150*fh(i,j,k+1)-F100*fh(i,j,k+2) &
+F50*fh(i,j,k+3)-F15*fh(i,j,k+4)+ TWO*fh(i,j,k+5))
elseif(Sfz(i,j,k) <= ZEO .and. k-4 >= kmin .and. k+2 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d60dz*(TWO*fh(i,j,k+2)-F24*fh(i,j,k+1)-F35*fh(i,j,k)+F80*fh(i,j,k-1) &
-F30*fh(i,j,k-2)+EIT*fh(i,j,k-3)- fh(i,j,k-4))
elseif(Sfz(i,j,k) <= ZEO .and. k-5 >= kmin .and. k+1 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d60dz*(-F10*fh(i,j,k+1)-F77*fh(i,j,k )+F150*fh(i,j,k-1)-F100*fh(i,j,k-2) &
+F50*fh(i,j,k-3)-F15*fh(i,j,k-4)+ TWO*fh(i,j,k-5))
elseif(k+3 <= kmax .and. k-3 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d60dz*(-fh(i,j,k-3)+F9*fh(i,j,k-2)-F45*fh(i,j,k-1)+F45*fh(i,j,k+1)-F9*fh(i,j,k+2)+fh(i,j,k+3))
elseif(k+2 <= kmax .and. k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
! set kmin and kmax 0
endif
enddo
enddo
enddo
return
end subroutine lopsided
#elif (ghost_width == 5)
! eighth order code
!-----------------------------------------------------------------------------
! PRD 77, 024034 (2008)
! Compute advection terms in right hand sides of field equations
! v [ - 5 f(i-3v) + 60 f(i-2v) - 420 f(i-v) - 378 f(i) + 1050 f(i+v) - 420 f(i+2v) + 140 f(i+3v) - 30 f(i+4v) + 3 f(i+5v)]
! D f = --------------------------------------------------------------------------------------------------------------------------
! i 840 dx
!
! where
!
! i
! |B |
! v = -----
! i
! B
!
!-----------------------------------------------------------------------------
subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
!~~~~~~> local variables:
real*8,dimension(-4:ex(1),-4:ex(2),-4:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d840dx,d840dy,d840dz,d60dx,d60dy,d60dz,d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
real*8, parameter :: TWO=2.d0,F30=3.d1,EIT=8.d0
real*8, parameter :: F9=9.d0,F45=4.5d1,F12=1.2d1,F140=1.4d2,THR=3.d0
real*8, parameter :: F840=8.4d2,F5=5.d0,F420=4.2d2,F378=3.78d2,F1050=1.05d3
real*8, parameter :: F32=3.2d1,F168=1.68d2,F672=6.72d2
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d840dx = ONE/F840/dX
d840dy = ONE/F840/dY
d840dz = ONE/F840/dZ
d60dx = ONE/F60/dX
d60dy = ONE/F60/dY
d60dz = ONE/F60/dZ
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -4
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -4
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -4
call symmetry_bd(5,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
! x direction
if(Sfx(i,j,k) >= ZEO .and. i+5 <= imax .and. i-3 >= imin)then
! v [ - 5 f(i-3v) + 60 f(i-2v) - 420 f(i-v) - 378 f(i) + 1050 f(i+v) - 420 f(i+2v) + 140 f(i+3v) - 30 f(i+4v) + 3 f(i+5v)]
! D f = --------------------------------------------------------------------------------------------------------------------------
! i 840 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d840dx*(-F5*fh(i-3,j,k)+F60 *fh(i-2,j,k)-F420*fh(i-1,j,k)-F378*fh(i ,j,k) &
+F1050*fh(i+1,j,k)-F420*fh(i+2,j,k)+F140*fh(i+3,j,k)-F30 *fh(i+4,j,k)+THR*fh(i+5,j,k))
elseif(Sfx(i,j,k) <= ZEO .and. i-5 >= imin .and. i+3 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d840dx*(-F5*fh(i+3,j,k)+F60 *fh(i+2,j,k)-F420*fh(i+1,j,k)-F378*fh(i ,j,k) &
+F1050*fh(i-1,j,k)-F420*fh(i-2,j,k)+F140*fh(i-3,j,k)- F30*fh(i-4,j,k)+THR*fh(i-5,j,k))
elseif(i+4 <= imax .and. i-4 >= imin)then
! 3 f(i-4) - 32 f(i-3) + 168 f(i-2) - 672 f(i-1) + 672 f(i+1) - 168 f(i+2) + 32 f(i+3) - 3 f(i+4)
! fx(i) = -------------------------------------------------------------------------------------------------
! 840 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d840dx*( THR*fh(i-4,j,k)-F32 *fh(i-3,j,k)+F168*fh(i-2,j,k)-F672*fh(i-1,j,k)+ &
F672*fh(i+1,j,k)-F168*fh(i+2,j,k)+F32 *fh(i+3,j,k)-THR *fh(i+4,j,k))
elseif(i+3 <= imax .and. i-3 >= imin)then
! - f(i-3) + 9 f(i-2) - 45 f(i-1) + 45 f(i+1) - 9 f(i+2) + f(i+3)
! fx(i) = -----------------------------------------------------------------
! 60 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d60dx*(-fh(i-3,j,k)+F9*fh(i-2,j,k)-F45*fh(i-1,j,k)+F45*fh(i+1,j,k)-F9*fh(i+2,j,k)+fh(i+3,j,k))
elseif(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! - f(i-1) + f(i+1)
! fx(i) = --------------------------------
! 2 dx
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
! set imax and imin 0
endif
! y direction
if(Sfy(i,j,k) >= ZEO .and. j+5 <= jmax .and. j-3 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d840dy*(-F5*fh(i,j-3,k)+F60 *fh(i,j-2,k)-F420*fh(i,j-1,k)-F378*fh(i,j ,k) &
+F1050*fh(i,j+1,k)-F420*fh(i,j+2,k)+F140*fh(i,j+3,k)-F30 *fh(i,j+4,k)+THR*fh(i,j+5,k))
elseif(Sfy(i,j,k) <= ZEO .and. j-5 >= jmin .and. j+3 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d840dy*(-F5*fh(i,j+3,k)+F60 *fh(i,j+2,k)-F420*fh(i,j+1,k)-F378*fh(i,j ,k) &
+F1050*fh(i,j-1,k)-F420*fh(i,j-2,k)+F140*fh(i,j-3,k)- F30*fh(i,j-4,k)+THR*fh(i,j-5,k))
elseif(j+4 <= jmax .and. j-4 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d840dy*( THR*fh(i,j-4,k)-F32 *fh(i,j-3,k)+F168*fh(i,j-2,k)-F672*fh(i,j-1,k)+ &
F672*fh(i,j+1,k)-F168*fh(i,j+2,k)+F32 *fh(i,j+3,k)-THR *fh(i,j+4,k))
elseif(j+3 <= jmax .and. j-3 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d60dy*(-fh(i,j-3,k)+F9*fh(i,j-2,k)-F45*fh(i,j-1,k)+F45*fh(i,j+1,k)-F9*fh(i,j+2,k)+fh(i,j+3,k))
elseif(j+2 <= jmax .and. j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
! set jmin and jmax 0
endif
!! z direction
if(Sfz(i,j,k) >= ZEO .and. k+5 <= kmax .and. k-3 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d840dz*(-F5*fh(i,j,k-3)+F60 *fh(i,j,k-2)-F420*fh(i,j,k-1)-F378*fh(i,j,k ) &
+F1050*fh(i,j,k+1)-F420*fh(i,j,k+2)+F140*fh(i,j,k+3)-F30 *fh(i,j,k+4)+THR*fh(i,j,k+5))
elseif(Sfz(i,j,k) <= ZEO .and. k-5 >= kmin .and. k+3 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d840dz*(-F5*fh(i,j,k+3)+F60 *fh(i,j,k+2)-F420*fh(i,j,k+1)-F378*fh(i,j,k ) &
+F1050*fh(i,j,k-1)-F420*fh(i,j,k-2)+F140*fh(i,j,k-3)- F30*fh(i,j,k-4)+THR*fh(i,j,k-5))
elseif(k+4 <= kmax .and. k-4 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d840dz*( THR*fh(i,j,k-4)-F32 *fh(i,j,k-3)+F168*fh(i,j,k-2)-F672*fh(i,j,k-1)+ &
F672*fh(i,j,k+1)-F168*fh(i,j,k+2)+F32 *fh(i,j,k+3)-THR *fh(i,j,k+4))
elseif(k+3 <= kmax .and. k-3 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d60dz*(-fh(i,j,k-3)+F9*fh(i,j,k-2)-F45*fh(i,j,k-1)+F45*fh(i,j,k+1)-F9*fh(i,j,k+2)+fh(i,j,k+3))
elseif(k+2 <= kmax .and. k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
! set kmin and kmax 0
endif
enddo
enddo
enddo
return
end subroutine lopsided
#endif

View File

@@ -16,12 +16,6 @@ include makefile.inc
.cu.o: .cu.o:
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
TwoPunctures.o: TwoPunctures.C
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
TwoPunctureABE.o: TwoPunctureABE.C
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
# Input files # Input files
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o bssn_class.o surface_integral.o ShellPatch.o\ cgh.o bssn_class.o surface_integral.o ShellPatch.o\
@@ -102,7 +96,7 @@ ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
TwoPunctureABE: $(TwoPunctureFILES) TwoPunctureABE: $(TwoPunctureFILES)
$(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(TwoPunctureFILES) $(LDLIBS)
clean: clean:
rm *.o ABE ABEGPU TwoPunctureABE make.log -f rm *.o ABE ABEGPU TwoPunctureABE make.log -f

View File

@@ -15,15 +15,16 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible) ## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
## -fp-model fast=2: Aggressive floating-point optimizations ## -fp-model fast=2: Aggressive floating-point optimizations
## -fma: Enable fused multiply-add instructions ## -fma: Enable fused multiply-add instructions
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ ## Note: OpenMP enabled for hybrid MPI+OpenMP
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -qopenmp \
-Dfortran3 -Dnewc -I${MKLROOT}/include -Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ f90appflags = -O3 -xHost -fp-model fast=2 -fma -qopenmp \
-align array64byte -fpp -I${MKLROOT}/include -fpp -I${MKLROOT}/include
f90 = ifx f90 = ifx
f77 = ifx f77 = ifx
CXX = icpx CXX = icpx
CC = icx CC = icx
CLINKER = mpiicpx CLINKER = mpiicpx -qopenmp
Cu = nvcc Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include

View File

@@ -10,17 +10,15 @@
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data
import subprocess import subprocess
import time
## CPU core binding configuration using taskset ## CPU core binding configuration
## taskset ensures all child processes inherit the CPU affinity mask ## Removed hardcoded taskset to allow full utilization of 96 cores via MPI+OpenMP
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111) NUMACTL_CPU_BIND = ""
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
NUMACTL_CPU_BIND = "taskset -c 0-111"
## Build parallelism configuration ## Build parallelism configuration
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores ## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
## Set make -j to utilize available cores for faster builds ## Set make -j to utilize available cores for faster builds
BUILD_JOBS = 104 BUILD_JOBS = 96
################################################################## ##################################################################
@@ -37,7 +35,7 @@ def makefile_ABE():
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " ) print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
print( ) print( )
## Build command with CPU binding to nohz_full cores ## Build command
if (input_data.GPU_Calculation == "no"): if (input_data.GPU_Calculation == "no"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE" makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
elif (input_data.GPU_Calculation == "yes"): elif (input_data.GPU_Calculation == "yes"):
@@ -78,7 +76,7 @@ def makefile_TwoPunctureABE():
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " ) print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
print( ) print( )
## Build command with CPU binding to nohz_full cores ## Build command
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE" makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
## Execute the command with subprocess.Popen and stream output ## Execute the command with subprocess.Popen and stream output
@@ -113,13 +111,23 @@ def run_ABE():
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " ) print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
print( ) print( )
## Calculate OMP_NUM_THREADS
## User has 96 cores. Calculate threads per MPI process.
total_physical_cores = 96
omp_num_threads = total_physical_cores // input_data.MPI_processes
if omp_num_threads < 1:
omp_num_threads = 1
print( f" Configuration: {input_data.MPI_processes} MPI processes, {omp_num_threads} OpenMP threads per process." )
print( f" Total cores utilized: {input_data.MPI_processes * omp_num_threads}" )
## Define the command to run; cast other values to strings as needed ## Define the command to run; cast other values to strings as needed
if (input_data.GPU_Calculation == "no"): if (input_data.GPU_Calculation == "no"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" mpi_command = f"{NUMACTL_CPU_BIND} mpirun -genv OMP_NUM_THREADS {omp_num_threads} -np {input_data.MPI_processes} ./ABE"
mpi_command_outfile = "ABE_out.log" mpi_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"): elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU" mpi_command = f"{NUMACTL_CPU_BIND} mpirun -genv OMP_NUM_THREADS {omp_num_threads} -np {input_data.MPI_processes} ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log" mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output ## Execute the MPI command and stream output
@@ -152,7 +160,7 @@ def run_ABE():
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE ## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
def run_TwoPunctureABE(): def run_TwoPunctureABE():
tp_time1=time.time()
print( ) print( )
print( " Running the AMSS-NCKU executable file TwoPunctureABE " ) print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
print( ) print( )
@@ -179,9 +187,7 @@ def run_TwoPunctureABE():
print( ) print( )
print( " The TwoPunctureABE simulation is finished " ) print( " The TwoPunctureABE simulation is finished " )
print( ) print( )
tp_time2=time.time()
et=tp_time2-tp_time1
print(f"Used time: {et}")
return return
################################################################## ##################################################################