Fix potential division by zero in reta_val calculation and enable NaN checks

Added a safety check for the denominator in the reta_val calculation to prevent division by zero when chi approaches zero (e.g., at far-field boundaries). Also enabled DEBUG_NAN_CHECK macro to catch invalid inputs early. Initialized output arrays to zero to prevent uninitialized memory access.
Fix boundary handling in bssn_rhs_opt.f90 to prevent NaNs
2026-01-19 20:29:48 +08:00 · 2026-01-19 20:03:22 +08:00 · 2026-01-19 19:22:52 +08:00 · 2026-01-19 17:14:28 +08:00 · 2026-01-19 16:39:24 +08:00
15 changed files with 3959 additions and 3017 deletions
--- a/AMSS_NCKU_ABEtest.py
+++ b/AMSS_NCKU_ABEtest.py
@@ -1,447 +0,0 @@
-
-##################################################################
-##
-## AMSS-NCKU ABE Test Program (Skip TwoPuncture if data exists)
-## Modified from AMSS_NCKU_Program.py
-## Author: Xiaoqu
-## Modified: 2026/02/01
-##
-##################################################################
-
-
-##################################################################
-
-## Print program introduction
-
-import print_information
-
-print_information.print_program_introduction()
-
-##################################################################
-
-import AMSS_NCKU_Input as input_data
-
-##################################################################
-
-## Create directories to store program run data
-
-import os
-import shutil
-import sys
-import time
-
-## Set the output directory according to the input file
-File_directory = os.path.join(input_data.File_directory)
-
-## Check if output directory exists and if TwoPuncture data is available
-#skip_twopuncture = False
-skip_twopuncture = True
-output_directory = os.path.join(File_directory, "AMSS_NCKU_output")
-binary_results_directory = os.path.join(output_directory, input_data.Output_directory)
-
-if os.path.exists(File_directory):
-    print( " Output directory already exists." )
-    print()
-    '''
-    # Check if TwoPuncture initial data files exist
-    if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture"):
-        twopuncture_output = os.path.join(output_directory, "TwoPunctureABE")
-        input_par = os.path.join(output_directory, "input.par")
-
-        if os.path.exists(twopuncture_output) and os.path.exists(input_par):
-            print( " Found existing TwoPuncture initial data." )
-            print( " Do you want to skip TwoPuncture phase and reuse existing data?" )
-            print( " Input 'skip' to skip TwoPuncture and start ABE directly" )
-            print( " Input 'regenerate' to regenerate everything from scratch" )
-            print()
-            
-            while True:
-                try:
-                    inputvalue = input()
-                    if ( inputvalue == "skip" ):
-                        print( " Skipping TwoPuncture phase, will reuse existing initial data." )
-                        print()
-                        skip_twopuncture = True
-                        break
-                    elif ( inputvalue == "regenerate" ):
-                        print( " Regenerating everything from scratch." )
-                        print()
-                        skip_twopuncture = False
-                        break
-                    else:
-                        print( " Please input 'skip' or 'regenerate'." )
-                except ValueError:
-                    print( " Please input 'skip' or 'regenerate'." )
-            
-        else:
-            print( " TwoPuncture initial data not found, will regenerate everything." )
-            print()
-'''
-    # If not skipping, remove and recreate directory
-    if not skip_twopuncture:
-        shutil.rmtree(File_directory, ignore_errors=True)
-        os.mkdir(File_directory)
-        os.mkdir(output_directory)
-        os.mkdir(binary_results_directory)
-        figure_directory = os.path.join(File_directory, "figure")
-        os.mkdir(figure_directory)
-        shutil.copy("AMSS_NCKU_Input.py", File_directory)
-        print( " Output directory has been regenerated." )
-        print()
-else:
-    # Create fresh directory structure
-    os.mkdir(File_directory)
-    shutil.copy("AMSS_NCKU_Input.py", File_directory)
-    os.mkdir(output_directory)
-    os.mkdir(binary_results_directory)
-    figure_directory = os.path.join(File_directory, "figure")
-    os.mkdir(figure_directory)
-    print( " Output directory has been generated." )
-    print()
-
-# Ensure figure directory exists
-figure_directory = os.path.join(File_directory, "figure")
-if not os.path.exists(figure_directory):
-    os.mkdir(figure_directory)
-
-##################################################################
-
-## Output related parameter information
-
-import setup
-
-## Print and save input parameter information
-setup.print_input_data( File_directory )
-
-if not skip_twopuncture:
-    setup.generate_AMSSNCKU_input()
-
-setup.print_puncture_information()
-
-
-##################################################################
-
-## Generate AMSS-NCKU program input files based on the configured parameters
-
-if not skip_twopuncture:
-    print()
-    print( " Generating the AMSS-NCKU input parfile for the ABE executable." )
-    print()
-
-    ## Generate cgh-related input files from the grid information
-
-    import numerical_grid
-
-    numerical_grid.append_AMSSNCKU_cgh_input()
-
-    print()
-    print( " The input parfile for AMSS-NCKU C++ executable file ABE has been generated." )
-    print( " However, the input relevant to TwoPuncture need to be appended later." )
-    print()
-
-
-##################################################################
-
-## Plot the initial grid configuration
-
-if not skip_twopuncture:
-    print()
-    print( " Schematically plot the numerical grid structure." )
-    print()
-
-    import numerical_grid
-    numerical_grid.plot_initial_grid()
-
-
-##################################################################
-
-## Generate AMSS-NCKU macro files according to the numerical scheme and parameters
-
-if not skip_twopuncture:
-    print()
-    print( " Automatically generating the macro file for AMSS-NCKU C++ executable file ABE " )
-    print( " (Based on the finite-difference numerical scheme) " )
-    print()
-
-    import generate_macrodef
-
-    generate_macrodef.generate_macrodef_h()
-    print( " AMSS-NCKU macro file macrodef.h has been generated. " )
-
-    generate_macrodef.generate_macrodef_fh()
-    print( " AMSS-NCKU macro file macrodef.fh has been generated. " )
-
-
-##################################################################
-
-# Compile the AMSS-NCKU program according to user requirements
-# NOTE: ABE compilation is always performed, even when skipping TwoPuncture
-
-print()
-print( " Preparing to compile and run the AMSS-NCKU code as requested " )
-print( " Compiling the AMSS-NCKU code based on the generated macro files " )
-print()
-
-AMSS_NCKU_source_path = "AMSS_NCKU_source"
-AMSS_NCKU_source_copy = os.path.join(File_directory, "AMSS_NCKU_source_copy")
-
-## If AMSS_NCKU source folder is missing, create it and prompt the user
-if not os.path.exists(AMSS_NCKU_source_path):
-    os.makedirs(AMSS_NCKU_source_path)
-    print( " The AMSS-NCKU source files are incomplete; copy all source files into ./AMSS_NCKU_source. " )
-    print( " Press Enter to continue. " )
-    inputvalue = input()
-
-# Copy AMSS-NCKU source files to prepare for compilation
-# If skipping TwoPuncture and source_copy already exists, remove it first
-if skip_twopuncture and os.path.exists(AMSS_NCKU_source_copy):
-    shutil.rmtree(AMSS_NCKU_source_copy)
-
-shutil.copytree(AMSS_NCKU_source_path, AMSS_NCKU_source_copy)
-
-# Copy the generated macro files into the AMSS_NCKU source folder
-if not skip_twopuncture:
-    macrodef_h_path  = os.path.join(File_directory, "macrodef.h")
-    macrodef_fh_path = os.path.join(File_directory, "macrodef.fh")
-else:
-    # When skipping TwoPuncture, use existing macro files from previous run
-    macrodef_h_path  = os.path.join(File_directory, "macrodef.h")
-    macrodef_fh_path = os.path.join(File_directory, "macrodef.fh")
-
-shutil.copy2(macrodef_h_path,  AMSS_NCKU_source_copy)
-shutil.copy2(macrodef_fh_path, AMSS_NCKU_source_copy)
-
-# Compile related programs
-import makefile_and_run
-
-## Change working directory to the target source copy
-os.chdir(AMSS_NCKU_source_copy)
-
-## Build the main AMSS-NCKU executable (ABE or ABEGPU)
-makefile_and_run.makefile_ABE()
-
-## If the initial-data method is Ansorg-TwoPuncture, build the TwoPunctureABE executable
-## Only build TwoPunctureABE if not skipping TwoPuncture phase
-if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture:
-    makefile_and_run.makefile_TwoPunctureABE()
-
-## Change current working directory back up two levels
-os.chdir('..')
-os.chdir('..')
-
-print()
-
-##################################################################
-
-## Copy the AMSS-NCKU executable (ABE/ABEGPU) to the run directory
-
-if (input_data.GPU_Calculation == "no"):
-    ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE")
-elif (input_data.GPU_Calculation == "yes"):
-    ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABEGPU")
-
-if not os.path.exists( ABE_file ):
-    print()
-    print( " Lack of AMSS-NCKU executable file ABE/ABEGPU; recompile AMSS_NCKU_source manually. " )
-    print( " When recompilation is finished, press Enter to continue. " )
-    inputvalue = input()
-
-## Copy the executable ABE (or ABEGPU) into the run directory
-shutil.copy2(ABE_file, output_directory)
-
-## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
-## Only copy TwoPunctureABE if not skipping TwoPuncture phase
-if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture:
-    TwoPuncture_file = os.path.join(AMSS_NCKU_source_copy, "TwoPunctureABE")
-
-    if not os.path.exists( TwoPuncture_file ):
-        print()
-        print( " Lack of AMSS-NCKU executable file TwoPunctureABE; recompile TwoPunctureABE in AMSS_NCKU_source. " )
-        print( " When recompilation is finished, press Enter to continue. " )
-        inputvalue = input()
-
-    ## Copy the TwoPunctureABE executable into the run directory
-    shutil.copy2(TwoPuncture_file, output_directory)
-
-##################################################################
-
-## If the initial-data method is TwoPuncture, generate the TwoPuncture input files
-
-if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and not skip_twopuncture:
-
-    print()
-    print( " Initial data is chosen as Ansorg-TwoPuncture" )
-    print()
-    
-    print()
-    print( " Automatically generating the input parfile for the TwoPunctureABE executable " )
-    print()
-    
-    import generate_TwoPuncture_input
-    
-    generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input()
-    
-    print()
-    print( " The input parfile for the TwoPunctureABE executable has been generated. " )
-    print()
-    
-    ## Generated AMSS-NCKU TwoPuncture input filename
-    AMSS_NCKU_TwoPuncture_inputfile      = 'AMSS-NCKU-TwoPuncture.input'
-    AMSS_NCKU_TwoPuncture_inputfile_path = os.path.join( File_directory, AMSS_NCKU_TwoPuncture_inputfile )
- 
-    ## Copy and rename the file
-    shutil.copy2( AMSS_NCKU_TwoPuncture_inputfile_path, os.path.join(output_directory, 'TwoPunctureinput.par') )
-    
-    ## Run TwoPuncture to generate initial-data files
-    
-    start_time = time.time()  # Record start time
-
-    print()
-    print()
-    
-    ## Change to the output (run) directory
-    os.chdir(output_directory)
-
-    ## Run the TwoPuncture executable
-    import makefile_and_run
-    makefile_and_run.run_TwoPunctureABE()
-    
-    ## Change current working directory back up two levels
-    os.chdir('..')
-    os.chdir('..')
-
-elif (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ) and skip_twopuncture:
-    print()
-    print( " Skipping TwoPuncture execution, using existing initial data." )
-    print()
-    start_time = time.time()  # Record start time for ABE only
-else:
-    start_time = time.time()  # Record start time
-    
-##################################################################
-    
-## Update puncture data based on TwoPuncture run results
-
-if not skip_twopuncture:
-    import renew_puncture_parameter
-    renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory)
-
-    ## Generated AMSS-NCKU input filename
-    AMSS_NCKU_inputfile      = 'AMSS-NCKU.input'
-    AMSS_NCKU_inputfile_path = os.path.join(File_directory, AMSS_NCKU_inputfile)
- 
-    ## Copy and rename the file
-    shutil.copy2( AMSS_NCKU_inputfile_path, os.path.join(output_directory, 'input.par') )
-
-    print()
-    print( " Successfully copy all AMSS-NCKU input parfile to target dictionary. " )
-    print()
-else:
-    print()
-    print( " Using existing input.par file from previous run." )
-    print()
-
-##################################################################
-
-## Launch the AMSS-NCKU program
-
-print()
-print()
-
-## Change to the run directory
-os.chdir( output_directory )
-
-import makefile_and_run
-makefile_and_run.run_ABE()
-
-## Change current working directory back up two levels
-os.chdir('..')
-os.chdir('..')
-
-end_time = time.time()
-elapsed_time = end_time - start_time
-
-##################################################################
-
-## Copy some basic input and log files out to facilitate debugging
-
-## Path to the file that stores calculation settings
-AMSS_NCKU_error_file_path = os.path.join(binary_results_directory, "setting.par")
-## Copy and rename the file for easier inspection
-shutil.copy( AMSS_NCKU_error_file_path, os.path.join(output_directory, "AMSSNCKU_setting_parameter") )
-
-## Path to the error log file
-AMSS_NCKU_error_file_path = os.path.join(binary_results_directory, "Error.log")
-## Copy and rename the error log
-shutil.copy( AMSS_NCKU_error_file_path, os.path.join(output_directory, "Error.log") )
-
-## Primary program outputs
-AMSS_NCKU_BH_data         = os.path.join(binary_results_directory, "bssn_BH.dat"        )
-AMSS_NCKU_ADM_data        = os.path.join(binary_results_directory, "bssn_ADMQs.dat"     )
-AMSS_NCKU_psi4_data       = os.path.join(binary_results_directory, "bssn_psi4.dat"      )
-AMSS_NCKU_constraint_data = os.path.join(binary_results_directory, "bssn_constraint.dat")
-## copy and rename the file
-shutil.copy( AMSS_NCKU_BH_data,         os.path.join(output_directory, "bssn_BH.dat"        ) )
-shutil.copy( AMSS_NCKU_ADM_data,        os.path.join(output_directory, "bssn_ADMQs.dat"     ) )
-shutil.copy( AMSS_NCKU_psi4_data,       os.path.join(output_directory, "bssn_psi4.dat"      ) )
-shutil.copy( AMSS_NCKU_constraint_data, os.path.join(output_directory, "bssn_constraint.dat") )
-
-## Additional program outputs
-if (input_data.Equation_Class == "BSSN-EM"):
-    AMSS_NCKU_phi1_data = os.path.join(binary_results_directory, "bssn_phi1.dat" )
-    AMSS_NCKU_phi2_data = os.path.join(binary_results_directory, "bssn_phi2.dat" )
-    shutil.copy( AMSS_NCKU_phi1_data, os.path.join(output_directory, "bssn_phi1.dat" ) )
-    shutil.copy( AMSS_NCKU_phi2_data, os.path.join(output_directory, "bssn_phi2.dat" ) )
-elif (input_data.Equation_Class == "BSSN-EScalar"):
-    AMSS_NCKU_maxs_data = os.path.join(binary_results_directory, "bssn_maxs.dat" )
-    shutil.copy( AMSS_NCKU_maxs_data, os.path.join(output_directory, "bssn_maxs.dat" ) )
-
-##################################################################
-
-## Plot the AMSS-NCKU program results
-
-print()
-print( " Plotting the txt and binary results data from the AMSS-NCKU simulation " )
-print()
-
-
-import plot_xiaoqu
-import plot_GW_strain_amplitude_xiaoqu
-
-## Plot black hole trajectory
-plot_xiaoqu.generate_puncture_orbit_plot(   binary_results_directory, figure_directory )
-plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
-
-## Plot black hole separation vs. time
-plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
-
-## Plot gravitational waveforms (psi4 and strain amplitude)
-for i in range(input_data.Detector_Number):
-    plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
-    plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
-
-## Plot ADM mass evolution
-for i in range(input_data.Detector_Number):
-    plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
-
-## Plot Hamiltonian constraint violation over time
-for i in range(input_data.grid_level):
-    plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
-
-## Plot stored binary data
-plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
-
-print()
-print( f" This Program Cost = {elapsed_time} Seconds " )
-print()
-
-
-##################################################################
-
-print()
-print( " The AMSS-NCKU-Python simulation is successfully finished, thanks for using !!! " )
-print()
-
-##################################################################
-
-
--- a/AMSS_NCKU_Input.py
+++ b/AMSS_NCKU_Input.py
@@ -16,7 +16,7 @@ import numpy
 File_directory   = "GW150914"                    ## output file directory
 Output_directory = "binary_output"               ## binary data file directory
                                                 ## The file directory name should not be too long
-MPI_processes    = 64                             ## number of mpi processes used in the simulation
+MPI_processes    = 48                             ## number of mpi processes used in the simulation

 GPU_Calculation  = "no"                          ## Use GPU or not 
                                                 ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
--- a/AMSS_NCKU_source/TwoPunctures.C
+++ b/AMSS_NCKU_source/TwoPunctures.C
--- a/AMSS_NCKU_source/TwoPunctures.h
+++ b/AMSS_NCKU_source/TwoPunctures.h
@@ -1,8 +1,7 @@
+
 #ifndef TWO_PUNCTURES_H
 #define TWO_PUNCTURES_H

-#include <omp.h>
-
 #define StencilSize 19
 #define N_PlaneRelax 1
 #define NRELAX 200
@@ -33,7 +32,7 @@ private:
       int npoints_A, npoints_B, npoints_phi;

       double target_M_plus, target_M_minus;
-
+       
       double admMass;

       double adm_tol;
@@ -43,18 +42,6 @@ private:

       int ntotal;

-       // ===== Precomputed spectral derivative matrices =====
-       double *D1_A, *D2_A;
-       double *D1_B, *D2_B;
-       double *DF1_phi, *DF2_phi;
-
-       // ===== Pre-allocated workspace for LineRelax (per-thread) =====
-       int max_threads;
-       double **ws_diag_be, **ws_e_be, **ws_f_be, **ws_b_be, **ws_x_be;
-       double **ws_l_be, **ws_u_be, **ws_d_be, **ws_y_be;
-       double **ws_diag_al, **ws_e_al, **ws_f_al, **ws_b_al, **ws_x_al;
-       double **ws_l_al, **ws_u_al, **ws_d_al, **ws_y_al;
-
       struct parameters
       {
              int nvar, n1, n2, n3;
@@ -71,28 +58,6 @@ public:
                    int Newtonmaxit);
       ~TwoPunctures();

-       // 02/07: New/modified methods
-       void allocate_workspace();
-       void free_workspace();
-       void precompute_derivative_matrices();
-       void build_cheb_deriv_matrices(int n, double *D1, double *D2);
-       void build_fourier_deriv_matrices(int N, double *DF1, double *DF2);
-       void Derivatives_AB3_MatMul(int nvar, int n1, int n2, int n3, derivs v);
-       void ThomasAlgorithm_ws(int N, double *b, double *a, double *c, double *x, double *q,
-                                double *l, double *u_ws, double *d, double *y);
-       void LineRelax_be_omp(double *dv,
-                         int const i, int const k, int const nvar,
-                         int const n1, int const n2, int const n3,
-                         double const *rhs, int const *ncols, int **cols,
-                         double **JFD, int tid);
-       void LineRelax_al_omp(double *dv,
-                         int const j, int const k, int const nvar,
-                         int const n1, int const n2, int const n3,
-                         double const *rhs, int const *ncols,
-                         int **cols, double **JFD, int tid);
-       void relax_omp(double *dv, int const nvar, int const n1, int const n2, int const n3,
-                  double const *rhs, int const *ncols, int **cols, double **JFD);
-
       void Solve();
       void set_initial_guess(derivs v);
       int index(int i, int j, int k, int l, int a, int b, int c, int d);
@@ -151,11 +116,23 @@ public:
       double BY_KKofxyz(double x, double y, double z);
       void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix);
       void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u);
+       void relax(double *dv, int const nvar, int const n1, int const n2, int const n3,
+                  double const *rhs, int const *ncols, int **cols, double **JFD);
+       void LineRelax_be(double *dv,
+                         int const i, int const k, int const nvar,
+                         int const n1, int const n2, int const n3,
+                         double const *rhs, int const *ncols, int **cols,
+                         double **JFD);
       void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2,
                         int n3, derivs dv, derivs u, double *values);
       void LinEquations(double A, double B, double X, double R,
                         double x, double r, double phi,
                         double y, double z, derivs dU, derivs U, double *values);
+       void LineRelax_al(double *dv,
+                         int const j, int const k, int const nvar,
+                         int const n1, int const n2, int const n3,
+                         double const *rhs, int const *ncols,
+                         int **cols, double **JFD);
       void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q);
       void Save(char *fname);
       // provided by Vasileios Paschalidis (vpaschal@illinois.edu)
@@ -164,4 +141,4 @@ public:
       void SpecCoef(parameters par, int ivar, double *v, double *cf);
 };

-#endif /* TWO_PUNCTURES_H */
+#endif /* TWO_PUNCTURES_H */
--- a/AMSS_NCKU_source/bssn_rhs.f90
+++ b/AMSS_NCKU_source/bssn_rhs.f90
--- a/AMSS_NCKU_source/bssn_rhs_legacy.f90
+++ b/AMSS_NCKU_source/bssn_rhs_legacy.f90
--- a/AMSS_NCKU_source/bssn_rhs_opt.f90
+++ b/AMSS_NCKU_source/bssn_rhs_opt.f90
--- a/AMSS_NCKU_source/diff_new.f90
+++ b/AMSS_NCKU_source/diff_new.f90
@@ -1000,7 +1000,86 @@
  do k=1,ex(3)-1
  do j=1,ex(2)-1
  do i=1,ex(1)-1
+#if 0  
+! x direction   
+        if(i+2 <= imax .and. i-2 >= imin)then
+!
+!              f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
+!  fx(i) = ---------------------------------------------
+!                             12 dx
+      fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))

+    elseif(i+1 <= imax .and. i-1 >= imin)then
+!
+!              - f(i-1) + f(i+1)
+!  fx(i) = --------------------------------
+!                     2 dx
+      fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
+
+! set imax and imin 0
+    endif
+! y direction   
+        if(j+2 <= jmax .and. j-2 >= jmin)then
+
+      fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
+
+    elseif(j+1 <= jmax .and. j-1 >= jmin)then
+
+     fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
+
+! set jmax and jmin 0
+    endif
+! z direction   
+        if(k+2 <= kmax .and. k-2 >= kmin)then
+
+      fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
+
+    elseif(k+1 <= kmax .and. k-1 >= kmin)then
+
+      fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
+
+! set kmax and kmin 0
+    endif
+#elif 0
+! x direction   
+        if(i+2 <= imax .and. i-2 >= imin)then
+!
+!              f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
+!  fx(i) = ---------------------------------------------
+!                             12 dx
+      fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
+
+    elseif(i+3 <= imax .and. i-1 >= imin)then
+      fx(i,j,k)=d12dx*(-3.d0*fh(i-1,j,k)-1.d1*fh(i,j,k)+1.8d1*fh(i+1,j,k)-6.d0*fh(i+2,j,k)+fh(i+3,j,k))
+    elseif(i+1 <= imax .and. i-3 >= imin)then
+      fx(i,j,k)=d12dx*( 3.d0*fh(i+1,j,k)+1.d1*fh(i,j,k)-1.8d1*fh(i-1,j,k)+6.d0*fh(i-2,j,k)-fh(i-3,j,k))
+! set imax and imin 0
+    endif
+! y direction   
+        if(j+2 <= jmax .and. j-2 >= jmin)then
+
+      fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
+
+    elseif(j+3 <= jmax .and. j-1 >= jmin)then
+      fy(i,j,k)=d12dy*(-3.d0*fh(i,j-1,k)-1.d1*fh(i,j,k)+1.8d1*fh(i,j+1,k)-6.d0*fh(i,j+2,k)+fh(i,j+3,k))
+    elseif(j+1 <= jmax .and. j-3 >= jmin)then
+      fy(i,j,k)=d12dy*( 3.d0*fh(i,j+1,k)+1.d1*fh(i,j,k)-1.8d1*fh(i,j-1,k)+6.d0*fh(i,j-2,k)-fh(i,j-3,k))
+
+! set jmax and jmin 0
+    endif
+! z direction   
+        if(k+2 <= kmax .and. k-2 >= kmin)then
+
+      fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
+
+    elseif(k+3 <= kmax .and. k-1 >= kmin)then
+      fz(i,j,k)=d12dz*(-3.d0*fh(i,j,k-1)-1.d1*fh(i,j,k)+1.8d1*fh(i,j,k+1)-6.d0*fh(i,j,k+2)+fh(i,j,k+3))
+    elseif(k+1 <= kmax .and. k-3 >= kmin)then
+      fz(i,j,k)=d12dz*( 3.d0*fh(i,j,k+1)+1.d1*fh(i,j,k)-1.8d1*fh(i,j,k-1)+6.d0*fh(i,j,k-2)-fh(i,j,k-3))
+
+! set kmax and kmin 0
+    endif
+#else
 ! for bam comparison
   if(i+2 <= imax .and. i-2 >= imin .and. &
      j+2 <= jmax .and. j-2 >= jmin .and. &
@@ -1015,7 +1094,7 @@
      fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
      fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
   endif
-
+#endif
  enddo
  enddo
  enddo
@@ -1325,7 +1404,85 @@
  do k=1,ex(3)-1
  do j=1,ex(2)-1
  do i=1,ex(1)-1
+#if 0  
+!~~~~~~ fxx
+        if(i+2 <= imax .and. i-2 >= imin)then
+!
+!               - f(i-2) + 16 f(i-1) - 30 f(i) + 16 f(i+1) - f(i+2)
+!  fxx(i) = ----------------------------------------------------------
+!                                  12 dx^2 
+   fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
+                       -fh(i+2,j,k)+F16*fh(i+1,j,k)              )
+   elseif(i+1 <= imax .and. i-1 >= imin)then
+!
+!               f(i-1) - 2 f(i) + f(i+1)
+!  fxx(i) = --------------------------------
+!                         dx^2 
+   fxx(i,j,k) = Sdxdx*(fh(i-1,j,k)-TWO*fh(i,j,k) &
+                      +fh(i+1,j,k)              )
+   endif

+
+!~~~~~~ fyy
+        if(j+2 <= jmax .and. j-2 >= jmin)then
+
+   fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
+                       -fh(i,j+2,k)+F16*fh(i,j+1,k)              )
+   elseif(j+1 <= jmax .and. j-1 >= jmin)then
+
+   fyy(i,j,k) = Sdydy*(fh(i,j-1,k)-TWO*fh(i,j,k) &
+                      +fh(i,j+1,k)              )
+   endif
+
+!~~~~~~ fzz
+        if(k+2 <= kmax .and. k-2 >= kmin)then
+
+   fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
+                       -fh(i,j,k+2)+F16*fh(i,j,k+1)              )
+   elseif(k+1 <= kmax .and. k-1 >= kmin)then
+
+   fzz(i,j,k) = Sdzdz*(fh(i,j,k-1)-TWO*fh(i,j,k) &
+                      +fh(i,j,k+1)              )
+   endif
+!~~~~~~ fxy
+       if(i+2 <= imax .and. i-2 >= imin .and. j+2 <= jmax .and. j-2 >= jmin)then
+!
+!                 ( f(i-2,j-2) - 8 f(i-1,j-2) + 8 f(i+1,j-2) - f(i+2,j-2) )
+!             - 8 ( f(i-2,j-1) - 8 f(i-1,j-1) + 8 f(i+1,j-1) - f(i+2,j-1) )
+!             + 8 ( f(i-2,j+1) - 8 f(i-1,j+1) + 8 f(i+1,j+1) - f(i+2,j+1) )
+!             -   ( f(i-2,j+2) - 8 f(i-1,j+2) + 8 f(i+1,j+2) - f(i+2,j+2) )
+!  fxy(i,j) = ----------------------------------------------------------------
+!                                  144 dx dy
+   fxy(i,j,k) = Fdxdy*(     (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k))  &
+                       -F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k))  &
+                       +F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k))  &
+                       -    (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
+
+   elseif(i+1 <= imax .and. i-1 >= imin .and. j+1 <= jmax .and. j-1 >= jmin)then
+!                 f(i-1,j-1) - f(i+1,j-1) - f(i-1,j+1) + f(i+1,j+1) 
+!  fxy(i,j) = -----------------------------------------------------------
+!                                      4 dx dy
+   fxy(i,j,k) = Sdxdy*(fh(i-1,j-1,k)-fh(i+1,j-1,k)-fh(i-1,j+1,k)+fh(i+1,j+1,k))
+   endif
+!~~~~~~ fxz
+       if(i+2 <= imax .and. i-2 >= imin .and. k+2 <= kmax .and. k-2 >= kmin)then
+   fxz(i,j,k) = Fdxdz*(     (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2))  &
+                       -F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1))  &
+                       +F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1))  &
+                       -    (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
+   elseif(i+1 <= imax .and. i-1 >= imin .and. k+1 <= kmax .and. k-1 >= kmin)then
+   fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1))
+   endif
+!~~~~~~ fyz
+       if(j+2 <= jmax .and. j-2 >= jmin .and. k+2 <= kmax .and. k-2 >= kmin)then
+   fyz(i,j,k) = Fdydz*(     (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2))  &
+                       -F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1))  &
+                       +F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1))  &
+                       -    (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
+   elseif(j+1 <= jmax .and. j-1 >= jmin .and. k+1 <= kmax .and. k-1 >= kmin)then
+   fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1))
+   endif 
+#else
 ! for bam comparison
   if(i+2 <= imax .and. i-2 >= imin .and. &
      j+2 <= jmax .and. j-2 >= jmin .and. &
@@ -1361,7 +1518,7 @@
   fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1))
   fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1))
   endif
-
+#endif
   enddo
   enddo
   enddo
--- a/AMSS_NCKU_source/enforce_algebra.f90
+++ b/AMSS_NCKU_source/enforce_algebra.f90
@@ -18,61 +18,49 @@
  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz

 !~~~~~~~> Local variable:
-
-  integer :: i,j,k
-  real*8 :: lgxx,lgyy,lgzz,ldetg
-  real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
-  real*8 :: ltrA,lscale
+  
+  real*8, dimension(ex(1),ex(2),ex(3)) :: trA,detg
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz 
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz
  real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0

 !~~~~~~>

-  do k=1,ex(3)
-  do j=1,ex(2)
-  do i=1,ex(1)
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE

-    lgxx = dxx(i,j,k) + ONE
-    lgyy = dyy(i,j,k) + ONE
-    lgzz = dzz(i,j,k) + ONE
+  detg =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
+          gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
+  gupxx =   ( gyy * gzz - gyz * gyz ) / detg
+  gupxy = - ( gxy * gzz - gyz * gxz ) / detg
+  gupxz =   ( gxy * gyz - gyy * gxz ) / detg
+  gupyy =   ( gxx * gzz - gxz * gxz ) / detg
+  gupyz = - ( gxx * gyz - gxy * gxz ) / detg
+  gupzz =   ( gxx * gyy - gxy * gxy ) / detg

-    ldetg =  lgxx * lgyy * lgzz &
-           + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) &
-           + gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) &
-           - gxz(i,j,k) * lgyy * gxz(i,j,k) &
-           - gxy(i,j,k) * gxy(i,j,k) * lgzz &
-           - lgxx * gyz(i,j,k) * gyz(i,j,k)
+  trA =         gupxx * Axx + gupyy * Ayy + gupzz * Azz &
+       + TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz)

-    lgupxx =   ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg
-    lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg
-    lgupxz =   ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg
-    lgupyy =   ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg
-    lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg
-    lgupzz =   ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg
+  Axx = Axx - F1o3 * gxx * trA
+  Axy = Axy - F1o3 * gxy * trA
+  Axz = Axz - F1o3 * gxz * trA
+  Ayy = Ayy - F1o3 * gyy * trA
+  Ayz = Ayz - F1o3 * gyz * trA
+  Azz = Azz - F1o3 * gzz * trA

-    ltrA =         lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
-                 + lgupzz * Azz(i,j,k) &
-         + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
-                 + lgupyz * Ayz(i,j,k))
+  detg = ONE / ( detg ** F1o3 ) 
+  
+  gxx = gxx * detg
+  gxy = gxy * detg
+  gxz = gxz * detg
+  gyy = gyy * detg
+  gyz = gyz * detg
+  gzz = gzz * detg

-    Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
-    Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA
-    Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA
-    Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
-    Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA
-    Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
-
-    lscale = ONE / ( ldetg ** F1o3 )
-
-    dxx(i,j,k) = lgxx * lscale - ONE
-    gxy(i,j,k) = gxy(i,j,k) * lscale
-    gxz(i,j,k) = gxz(i,j,k) * lscale
-    dyy(i,j,k) = lgyy * lscale - ONE
-    gyz(i,j,k) = gyz(i,j,k) * lscale
-    dzz(i,j,k) = lgzz * lscale - ONE
-
-  enddo
-  enddo
-  enddo
+  dxx = gxx - ONE
+  dyy = gyy - ONE
+  dzz = gzz - ONE

  return

@@ -94,71 +82,51 @@
  real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz

 !~~~~~~~> Local variable:
-
-  integer :: i,j,k
-  real*8 :: lgxx,lgyy,lgzz,lscale
-  real*8 :: lgxy,lgxz,lgyz
-  real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
-  real*8 :: ltrA
+  
+  real*8, dimension(ex(1),ex(2),ex(3)) :: trA
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz 
+  real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz
  real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0

 !~~~~~~>

-  do k=1,ex(3)
-  do j=1,ex(2)
-  do i=1,ex(1)
+  gxx = dxx + ONE
+  gyy = dyy + ONE
+  gzz = dzz + ONE
+! for g
+  gupzz =  gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
+           gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz

-! for g: normalize determinant first
-    lgxx = dxx(i,j,k) + ONE
-    lgyy = dyy(i,j,k) + ONE
-    lgzz = dzz(i,j,k) + ONE
-    lgxy = gxy(i,j,k)
-    lgxz = gxz(i,j,k)
-    lgyz = gyz(i,j,k)
+  gupzz = ONE / ( gupzz ** F1o3 ) 
+  
+  gxx = gxx * gupzz
+  gxy = gxy * gupzz
+  gxz = gxz * gupzz
+  gyy = gyy * gupzz
+  gyz = gyz * gupzz
+  gzz = gzz * gupzz

-    lscale =  lgxx * lgyy * lgzz + lgxy * lgyz * lgxz &
-            + lgxz * lgxy * lgyz - lgxz * lgyy * lgxz &
-            - lgxy * lgxy * lgzz - lgxx * lgyz * lgyz
+  dxx = gxx - ONE
+  dyy = gyy - ONE
+  dzz = gzz - ONE
+! for A  

-    lscale = ONE / ( lscale ** F1o3 )
+  gupxx =   ( gyy * gzz - gyz * gyz )
+  gupxy = - ( gxy * gzz - gyz * gxz )
+  gupxz =   ( gxy * gyz - gyy * gxz )
+  gupyy =   ( gxx * gzz - gxz * gxz )
+  gupyz = - ( gxx * gyz - gxy * gxz )
+  gupzz =   ( gxx * gyy - gxy * gxy )

-    lgxx = lgxx * lscale
-    lgxy = lgxy * lscale
-    lgxz = lgxz * lscale
-    lgyy = lgyy * lscale
-    lgyz = lgyz * lscale
-    lgzz = lgzz * lscale
+  trA =         gupxx * Axx + gupyy * Ayy + gupzz * Azz &
+       + TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz)

-    dxx(i,j,k) = lgxx - ONE
-    gxy(i,j,k) = lgxy
-    gxz(i,j,k) = lgxz
-    dyy(i,j,k) = lgyy - ONE
-    gyz(i,j,k) = lgyz
-    dzz(i,j,k) = lgzz - ONE
-
-! for A: trace-free using normalized metric (det=1, no division needed)
-    lgupxx =   ( lgyy * lgzz - lgyz * lgyz )
-    lgupxy = - ( lgxy * lgzz - lgyz * lgxz )
-    lgupxz =   ( lgxy * lgyz - lgyy * lgxz )
-    lgupyy =   ( lgxx * lgzz - lgxz * lgxz )
-    lgupyz = - ( lgxx * lgyz - lgxy * lgxz )
-    lgupzz =   ( lgxx * lgyy - lgxy * lgxy )
-
-    ltrA =         lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
-                 + lgupzz * Azz(i,j,k) &
-         + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
-                 + lgupyz * Ayz(i,j,k))
-
-    Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
-    Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA
-    Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA
-    Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
-    Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA
-    Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
-
-  enddo
-  enddo
-  enddo
+  Axx = Axx - F1o3 * gxx * trA
+  Axy = Axy - F1o3 * gxy * trA
+  Axz = Axz - F1o3 * gxz * trA
+  Ayy = Ayy - F1o3 * gyy * trA
+  Ayz = Ayz - F1o3 * gyz * trA
+  Azz = Azz - F1o3 * gzz * trA

  return

--- a/AMSS_NCKU_source/fmisc.f90
+++ b/AMSS_NCKU_source/fmisc.f90
@@ -324,10 +324,10 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)

  integer::i

+  funcc = 0.d0
  funcc(1:extc(1),1:extc(2),1:extc(3)) = func
   do i=0,ord-1
-      
-    funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
+      funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
   enddo
   do i=0,ord-1
      funcc(:,-i,1:extc(3)) = funcc(:,i+2,1:extc(3))*SoA(2)
@@ -350,6 +350,7 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)

  integer::i

+  funcc = 0.d0
  funcc(1:extc(1),1:extc(2),1:extc(3)) = func
   do i=0,ord-1
      funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
@@ -378,6 +379,7 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)

  integer::i

+  funcc = 0.d0
  funcc(1:extc(1),1:extc(2),1:extc(3)) = func
   do i=0,ord-1
      funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
@@ -884,6 +886,7 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)

  integer::i

+  funcc = 0.d0
  funcc(1:extc(1),1:extc(2),1:extc(3)) = func
   do i=0,ord-1
      funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -909,6 +912,7 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)

  integer::i

+  funcc = 0.d0
  funcc(1:extc(1),1:extc(2),1:extc(3)) = func
   do i=0,ord-1
      funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -937,6 +941,7 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)

  integer::i

+  funcc = 0.d0
  funcc(1:extc(1),1:extc(2),1:extc(3)) = func
   do i=0,ord-1
      funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -1113,65 +1118,64 @@ end subroutine d2dump
 ! Lagrangian polynomial interpolation
 !------------------------------------------------------------------------------

-  subroutine polint(xa, ya, x, y, dy, ordn)
+  subroutine polint(xa,ya,x,y,dy,ordn)
+
  implicit none

-  integer, intent(in) :: ordn
-  real*8, dimension(ordn), intent(in) :: xa, ya
+!~~~~~~> Input Parameter:
+  integer,intent(in) :: ordn
+  real*8, dimension(ordn), intent(in) :: xa,ya
  real*8, intent(in) :: x
-  real*8, intent(out) :: y, dy
+  real*8, intent(out) :: y,dy

-  integer :: i, m, ns, n_m
-  real*8, dimension(ordn) :: c, d, ho
-  real*8 :: dif, dift, hp, h, den_val
+!~~~~~~> Other parameter:

-  c = ya
-  d = ya
-  ho = xa - x
+  integer :: m,n,ns
+  real*8, dimension(ordn) :: c,d,den,ho
+  real*8 :: dif,dift

-  ns = 1
-  dif = abs(x - xa(1))
+!~~~~~~>

-  do i = 2, ordn
-    dift = abs(x - xa(i))
-    if (dift < dif) then
-      ns = i
-      dif = dift
-    end if
+  n=ordn
+  m=ordn
+
+  c=ya
+  d=ya
+  ho=xa-x
+
+  ns=1
+  dif=abs(x-xa(1))
+  do m=1,n
+   dift=abs(x-xa(m))
+   if(dift < dif) then
+    ns=m
+    dif=dift
+   end if
  end do

-  y = ya(ns)
-  ns = ns - 1
-
-  do m = 1, ordn - 1
-    n_m = ordn - m
-    do i = 1, n_m
-      hp = ho(i)
-      h  = ho(i+m)
-      den_val = hp - h
-
-      if (den_val == 0.0d0) then
-        write(*,*) 'failure in polint for point',x
-        write(*,*) 'with input points: ',xa
-        stop
-      end if
-
-      den_val = (c(i+1) - d(i)) / den_val
-
-      d(i) = h * den_val
-      c(i) = hp * den_val
-    end do
-
-    if (2 * ns < n_m) then
-      dy = c(ns + 1)
+  y=ya(ns)
+  ns=ns-1
+  do m=1,n-1
+    den(1:n-m)=ho(1:n-m)-ho(1+m:n)
+    if (any(den(1:n-m) == 0.0))then
+      write(*,*) 'failure in polint for point',x
+      write(*,*) 'with input points: ',xa
+      stop
+    endif
+    den(1:n-m)=(c(2:n-m+1)-d(1:n-m))/den(1:n-m)
+    d(1:n-m)=ho(1+m:n)*den(1:n-m)
+    c(1:n-m)=ho(1:n-m)*den(1:n-m)
+    if (2*ns < n-m) then
+      dy=c(ns+1)
    else
-      dy = d(ns)
-      ns = ns - 1
+      dy=d(ns)
+      ns=ns-1
    end if
-    y = y + dy
+    y=y+dy
  end do

  return
+
  end subroutine polint
 !------------------------------------------------------------------------------
 !
@@ -1179,37 +1183,35 @@ end subroutine d2dump
 !
 !------------------------------------------------------------------------------
  subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn)
+
  implicit none

+!~~~~~~> Input parameters:
  integer,intent(in) :: ordn
  real*8, dimension(1:ordn), intent(in) :: x1a,x2a
  real*8, dimension(1:ordn,1:ordn), intent(in) :: ya
  real*8, intent(in) :: x1,x2
  real*8, intent(out) :: y,dy

-#ifdef POLINT_LEGACY_ORDER
+!~~~~~~> Other parameters:
+
  integer  :: i,m
  real*8, dimension(ordn) :: ymtmp
  real*8, dimension(ordn) :: yntmp

  m=size(x1a)
+  
  do i=1,m
+
    yntmp=ya(i,:)
    call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
-  end do
-  call polint(x1a,ymtmp,x1,y,dy,ordn)
-#else
-  integer  :: j
-  real*8, dimension(ordn) :: ymtmp
-  real*8 :: dy_temp

-  do j=1,ordn
-    call polint(x1a, ya(:,j), x1, ymtmp(j), dy_temp, ordn)
  end do
-  call polint(x2a, ymtmp, x2, y, dy, ordn)
-#endif
+
+  call polint(x1a,ymtmp,x1,y,dy,ordn)

  return
+
  end subroutine polin2
 !------------------------------------------------------------------------------
 !
@@ -1217,15 +1219,18 @@ end subroutine d2dump
 !
 !------------------------------------------------------------------------------
  subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn)
+
  implicit none

+!~~~~~~> Input parameters:
  integer,intent(in) :: ordn
  real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a
  real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya
  real*8, intent(in) :: x1,x2,x3
  real*8, intent(out) :: y,dy

-#ifdef POLINT_LEGACY_ORDER
+!~~~~~~> Other parameters:
+
  integer  :: i,j,m,n
  real*8, dimension(ordn,ordn) :: yatmp
  real*8, dimension(ordn) :: ymtmp
@@ -1234,33 +1239,24 @@ end subroutine d2dump

  m=size(x1a)
  n=size(x2a)
+  
  do i=1,m
   do j=1,n
+
    yqtmp=ya(i,j,:)
    call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn)
+
   end do
+
    yntmp=yatmp(i,:)
    call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
-  end do
-  call polint(x1a,ymtmp,x1,y,dy,ordn)
-#else
-  integer  :: j, k
-  real*8, dimension(ordn,ordn) :: yatmp
-  real*8, dimension(ordn) :: ymtmp
-  real*8 :: dy_temp

-  do k=1,ordn
-    do j=1,ordn
-      call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn)
-    end do
  end do
-  do k=1,ordn
-    call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn)
-  end do
-  call polint(x3a, ymtmp, x3, y, dy, ordn)
-#endif
+
+  call polint(x1a,ymtmp,x1,y,dy,ordn)

  return
+
  end subroutine polin3
 !--------------------------------------------------------------------------------------
 ! calculate L2norm
--- a/AMSS_NCKU_source/kodiss.f90
+++ b/AMSS_NCKU_source/kodiss.f90
@@ -6,6 +6,101 @@
 ! Vertex or Cell is distinguished in routine symmetry_bd which locates in
 ! file "fmisc.f90"

+#if (ghost_width == 2)
+! second order code
+
+!------------------------------------------------------------------------------------------------------------------------------
+!usual type Kreiss-Oliger type numerical dissipation
+!We support cell center only
+!  (D_+D_-)^2 =
+!   f(i-2) - 4 f(i-1) + 6 f(i) - 4 f(i+1) + f(i+2)
+! ------------------------------------------------------
+!                       dx^4
+!------------------------------------------------------------------------------------------------------------------------------
+! do not add dissipation near boundary
+subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
+
+implicit none
+! argument variables
+integer,intent(in) :: Symmetry
+integer,dimension(3),intent(in)::ex
+real*8, dimension(1:3), intent(in) :: SoA
+double precision,intent(in),dimension(ex(1))::X
+double precision,intent(in),dimension(ex(2))::Y
+double precision,intent(in),dimension(ex(3))::Z
+double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
+double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
+real*8,intent(in) :: eps
+
+!~~~~~~ other variables
+
+  real*8 :: dX,dY,dZ
+  real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3))   :: fh
+  integer :: imin,jmin,kmin,imax,jmax,kmax
+  integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
+  real*8,parameter   :: cof = 1.6d1 ! 2^4
+  real*8,  parameter :: F4=4.d0,F6=6.d0
+  integer::i,j,k
+
+  dX = X(2)-X(1)
+  dY = Y(2)-Y(1)
+  dZ = Z(2)-Z(1)
+
+  imax = ex(1)
+  jmax = ex(2)
+  kmax = ex(3)
+
+  imin = 1
+  jmin = 1
+  kmin = 1
+
+  if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
+  if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
+  if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
+
+  call symmetry_bd(2,ex,f,fh,SoA)
+
+!   f(i-2) - 4 f(i-1) + 6 f(i) - 4 f(i+1) + f(i+2)
+! ------------------------------------------------------
+!                       dx^4
+
+!  note the sign (-1)^r-1, now r=2
+  do k=1,ex(3)
+  do j=1,ex(2)
+  do i=1,ex(1)
+
+  if(i-2 >= imin .and. i+2 <= imax .and. &
+     j-2 >= jmin .and. j+2 <= jmax .and. &
+     k-2 >= kmin .and. k+2 <= kmax) then
+! x direction
+   f_rhs(i,j,k)       = f_rhs(i,j,k) - eps/dX/cof * (     &
+                                (fh(i-2,j,k)+fh(i+2,j,k)) &
+                         - F4 * (fh(i-1,j,k)+fh(i+1,j,k)) &
+                         + F6 *  fh(i,j,k) )
+! y direction
+
+   f_rhs(i,j,k)       = f_rhs(i,j,k) - eps/dY/cof * (     &
+                                (fh(i,j-2,k)+fh(i,j+2,k)) &
+                         - F4 * (fh(i,j-1,k)+fh(i,j+1,k)) &
+                         + F6 *  fh(i,j,k) )
+! z direction
+
+   f_rhs(i,j,k)       = f_rhs(i,j,k) - eps/dZ/cof * (     &
+                                (fh(i,j,k-2)+fh(i,j,k+2)) &
+                         - F4 * (fh(i,j,k-1)+fh(i,j,k+1)) &
+                         + F6 *  fh(i,j,k) )
+
+  endif
+
+  enddo
+  enddo
+  enddo
+
+  return
+
+end subroutine kodis
+
+#elif (ghost_width == 3)
 ! fourth order code

 !---------------------------------------------------------------------------------------------
@@ -61,7 +156,7 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
  if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
  if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin = -2
  if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin = -2
-  !print*,'imin,jmin,kmin=',imin,jmin,kmin
+
  call symmetry_bd(3,ex,f,fh,SoA)

  do k=1,ex(3)
@@ -71,7 +166,28 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
  if(i-3 >= imin .and. i+3 <= imax .and. &
     j-3 >= jmin .and. j+3 <= jmax .and. &
     k-3 >= kmin .and. k+3 <= kmax) then
+#if 0     
+! x direction
+   f_rhs(i,j,k)       = f_rhs(i,j,k) + eps/dX/cof * (     &
+                              (fh(i-3,j,k)+fh(i+3,j,k)) - &
+                          SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
+                          FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
+                          TWT* fh(i,j,k)            )
+! y direction

+   f_rhs(i,j,k)       = f_rhs(i,j,k) + eps/dY/cof * (     &
+                              (fh(i,j-3,k)+fh(i,j+3,k)) - &
+                          SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
+                          FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
+                          TWT* fh(i,j,k)            )
+! z direction
+
+   f_rhs(i,j,k)       = f_rhs(i,j,k) + eps/dZ/cof * (     &
+                              (fh(i,j,k-3)+fh(i,j,k+3)) - &
+                          SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
+                          FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
+                          TWT* fh(i,j,k)            )
+#else
 ! calculation order if important ?
   f_rhs(i,j,k)       = f_rhs(i,j,k) + eps/cof *( (     &
                              (fh(i-3,j,k)+fh(i+3,j,k)) - &
@@ -88,7 +204,7 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
                          SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
                          FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
                          TWT* fh(i,j,k)            )/dZ )
-
+#endif
  endif

  enddo
@@ -99,6 +215,218 @@ integer, parameter :: NO_SYMM=0, OCTANT=2

  end subroutine kodis

+#elif (ghost_width == 4)
+! sixth order code
+!------------------------------------------------------------------------------------------------------------------------------
+!usual type Kreiss-Oliger type numerical dissipation
+!We support cell center only
+!  (D_+D_-)^4 =
+!   f(i-4) - 8 f(i-3) + 28 f(i-2) - 56 f(i-1) + 70 f(i) - 56 f(i+1) + 28 f(i+2) - 8 f(i+3) + f(i+4)
+! ----------------------------------------------------------------------------------------------------------
+!                                              dx^8
+!------------------------------------------------------------------------------------------------------------------------------
+! do not add dissipation near boundary
+subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)

+implicit none
+! argument variables
+integer,intent(in) :: Symmetry
+integer,dimension(3),intent(in)::ex
+real*8, dimension(1:3), intent(in) :: SoA
+double precision,intent(in),dimension(ex(1))::X
+double precision,intent(in),dimension(ex(2))::Y
+double precision,intent(in),dimension(ex(3))::Z
+double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
+double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
+real*8,intent(in) :: eps

+!~~~~~~ other variables

+  real*8 :: dX,dY,dZ
+  real*8,dimension(-3:ex(1),-3:ex(2),-3:ex(3))   :: fh
+  integer :: imin,jmin,kmin,imax,jmax,kmax
+  integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
+  real*8,parameter   :: cof = 2.56d2 ! 2^8
+  real*8,  parameter :: F8=8.d0,F28=2.8d1,F56=5.6d1,F70=7.d1
+  integer::i,j,k
+
+  dX = X(2)-X(1)
+  dY = Y(2)-Y(1)
+  dZ = Z(2)-Z(1)
+  
+  imax = ex(1)
+  jmax = ex(2)
+  kmax = ex(3)
+
+  imin = 1
+  jmin = 1
+  kmin = 1
+
+  if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -3
+  if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -3
+  if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -3
+
+  call symmetry_bd(4,ex,f,fh,SoA)
+
+!   f(i-4) - 8 f(i-3) + 28 f(i-2) - 56 f(i-1) + 70 f(i) - 56 f(i+1) + 28 f(i+2) - 8 f(i+3) + f(i+4)
+! ----------------------------------------------------------------------------------------------------------
+!                                              dx^8
+
+!  note the sign (-1)^r-1, now r=4
+  do k=1,ex(3)
+  do j=1,ex(2)
+  do i=1,ex(1)
+
+  if(i>imin+3 .and. i < imax-3 .and. &
+     j>jmin+3 .and. j < jmax-3 .and. &
+     k>kmin+3 .and. k < kmax-3) then
+! x direction
+   f_rhs(i,j,k)       = f_rhs(i,j,k) - eps/dX/cof * (     &
+                                (fh(i-4,j,k)+fh(i+4,j,k)) &
+                         - F8 * (fh(i-3,j,k)+fh(i+3,j,k)) &
+                         +F28 * (fh(i-2,j,k)+fh(i+2,j,k)) &
+                         -F56 * (fh(i-1,j,k)+fh(i+1,j,k)) &
+                         +F70 *  fh(i,j,k) )
+! y direction
+
+   f_rhs(i,j,k)       = f_rhs(i,j,k) - eps/dY/cof * (     &
+                                (fh(i,j-4,k)+fh(i,j+4,k)) &
+                         - F8 * (fh(i,j-3,k)+fh(i,j+3,k)) &
+                         +F28 * (fh(i,j-2,k)+fh(i,j+2,k)) &
+                         -F56 * (fh(i,j-1,k)+fh(i,j+1,k)) &
+                         +F70 *  fh(i,j,k) )
+! z direction
+
+   f_rhs(i,j,k)       = f_rhs(i,j,k) - eps/dZ/cof * (     &
+                                (fh(i,j,k-4)+fh(i,j,k+4)) &
+                         - F8 * (fh(i,j,k-3)+fh(i,j,k+3)) &
+                         +F28 * (fh(i,j,k-2)+fh(i,j,k+2)) &
+                         -F56 * (fh(i,j,k-1)+fh(i,j,k+1)) &
+                         +F70 *  fh(i,j,k) )
+
+  endif
+
+  enddo
+  enddo
+  enddo
+
+  return
+
+end subroutine kodis
+
+#elif (ghost_width == 5)
+! eighth order code
+!------------------------------------------------------------------------------------------------------------------------------
+!usual type Kreiss-Oliger type numerical dissipation
+!We support cell center only
+! Note the notation D_+ and D_- [P240 of B. Gustafsson, H.-O. Kreiss, and J. Oliger, Time
+! Dependent Problems and Difference Methods (Wiley, New York, 1995).]
+! D_+ = (f(i+1) - f(i))/h
+! D_- = (f(i) - f(i-1))/h
+! then we have D_+D_- = D_-D_+ = (f(i+1) - 2f(i) + f(i-1))/h^2
+! for nth order accurate finite difference code, we need r =n/2+1
+!              D_+^rD_-^r = (D_+D_-)^r 
+! following the tradiation of PRD 77, 024027 (BB's calibration paper, Eq.(64),
+!  correct some typo according to above book) :
+! + eps*(-1)^(r-1)*h^(2r-1)/2^(2r)*(D_+D_-)^r 
+!
+!
+! this is for 8th order accurate finite difference scheme
+!  (D_+D_-)^5 =
+!  f(i-5) - 10 f(i-4) + 45 f(i-3) - 120 f(i-2) + 210 f(i-1) - 252 f(i) + 210 f(i+1) - 120 f(i+2) + 45 f(i+3) - 10 f(i+4) + f(i+5)
+! -------------------------------------------------------------------------------------------------------------------------------
+!                                                              dx^10
+!---------------------------------------------------------------------------------------------------------------------------------
+! do not add dissipation near boundary
+subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
+
+implicit none
+! argument variables
+integer,intent(in) :: Symmetry
+integer,dimension(3),intent(in)::ex
+real*8, dimension(1:3), intent(in) :: SoA
+double precision,intent(in),dimension(ex(1))::X
+double precision,intent(in),dimension(ex(2))::Y
+double precision,intent(in),dimension(ex(3))::Z
+double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
+double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
+real*8,intent(in) :: eps
+
+!~~~~~~ other variables
+
+  real*8 :: dX,dY,dZ
+  real*8,dimension(-4:ex(1),-4:ex(2),-4:ex(3))   :: fh
+  integer :: imin,jmin,kmin,imax,jmax,kmax
+  integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
+  real*8,parameter   :: cof = 1.024d3 ! 2^2r = 2^10
+  real*8,  parameter :: F10=1.d1,F45=4.5d1,F120=1.2d2,F210=2.1d2,F252=2.52d2
+  integer::i,j,k
+
+  dX = X(2)-X(1)
+  dY = Y(2)-Y(1)
+  dZ = Z(2)-Z(1)
+  
+  imax = ex(1)
+  jmax = ex(2)
+  kmax = ex(3)
+
+  imin = 1
+  jmin = 1
+  kmin = 1
+
+  if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -4
+  if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -4
+  if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -4
+
+  call symmetry_bd(5,ex,f,fh,SoA)
+
+!  f(i-5) - 10 f(i-4) + 45 f(i-3) - 120 f(i-2) + 210 f(i-1) - 252 f(i) + 210 f(i+1) - 120 f(i+2) + 45 f(i+3) - 10 f(i+4) + f(i+5)
+! -------------------------------------------------------------------------------------------------------------------------------
+!                                                              dx^10
+
+!  note the sign (-1)^r-1, now r=5
+  do k=1,ex(3)
+  do j=1,ex(2)
+  do i=1,ex(1)
+
+  if(i>imin+4 .and. i < imax-4 .and. &
+     j>jmin+4 .and. j < jmax-4 .and. &
+     k>kmin+4 .and. k < kmax-4) then
+! x direction
+   f_rhs(i,j,k)       = f_rhs(i,j,k) + eps/dX/cof * (      &
+                                 (fh(i-5,j,k)+fh(i+5,j,k)) &
+                         - F10 * (fh(i-4,j,k)+fh(i+4,j,k)) &
+                         + F45 * (fh(i-3,j,k)+fh(i+3,j,k)) &
+                         - F120* (fh(i-2,j,k)+fh(i+2,j,k)) &
+                         + F210* (fh(i-1,j,k)+fh(i+1,j,k)) &
+                         - F252 * fh(i,j,k) )
+! y direction
+
+   f_rhs(i,j,k)       = f_rhs(i,j,k) + eps/dY/cof * (      &
+                                 (fh(i,j-5,k)+fh(i,j+5,k)) &
+                         - F10 * (fh(i,j-4,k)+fh(i,j+4,k)) &
+                         + F45 * (fh(i,j-3,k)+fh(i,j+3,k)) &
+                         - F120* (fh(i,j-2,k)+fh(i,j+2,k)) &
+                         + F210* (fh(i,j-1,k)+fh(i,j+1,k)) &
+                         - F252 * fh(i,j,k) )
+! z direction
+
+   f_rhs(i,j,k)       = f_rhs(i,j,k) + eps/dZ/cof * (      &
+                                 (fh(i,j,k-5)+fh(i,j,k+5)) &
+                         - F10 * (fh(i,j,k-4)+fh(i,j,k+4)) &
+                         + F45 * (fh(i,j,k-3)+fh(i,j,k+3)) &
+                         - F120* (fh(i,j,k-2)+fh(i,j,k+2)) &
+                         + F210* (fh(i,j,k-1)+fh(i,j,k+1)) &
+                         - F252 * fh(i,j,k) )
+
+  endif
+
+  enddo
+  enddo
+  enddo
+
+  return
+
+end subroutine kodis
+
+#endif  
--- a/AMSS_NCKU_source/lopsidediff.f90
+++ b/AMSS_NCKU_source/lopsidediff.f90
@@ -7,7 +7,163 @@
 ! Vertex or Cell is distinguished in routine symmetry_bd which locates in
 ! file "fmisc.f90"

+#if (ghost_width == 2)
+! second order code

+!-----------------------------------------------------------------------------
+!         v
+! D f = ------[ - 3 f  + 4 f   - f     ]
+!  i     2dx         i      i+v   i+2v
+!
+! where
+!
+!        i
+!      |B |
+! v = -----
+!        i
+!       B
+!
+!-----------------------------------------------------------------------------
+subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
+  implicit none
+
+!~~~~~~> Input parameters:
+
+  integer, intent(in)  :: ex(1:3),Symmetry
+  real*8,  intent(in)  :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
+  real*8,dimension(ex(1),ex(2),ex(3)),intent(in)   :: f,Sfx,Sfy,Sfz
+
+  real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
+  real*8,dimension(3),intent(in) ::SoA
+
+!~~~~~~> local variables:
+! note index -1,0, so we have 2 extra points
+  real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3))   :: fh
+  integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
+  real*8 :: dX,dY,dZ
+  real*8 :: d2dx,d2dy,d2dz
+  real*8,  parameter :: ZEO=0.d0,ONE=1.d0,TWO=2.d0,THR=3.d0,FOUR=4.d0
+  integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
+
+  dX = X(2)-X(1)
+  dY = Y(2)-Y(1)
+  dZ = Z(2)-Z(1)
+
+  d2dx = ONE/TWO/dX
+  d2dy = ONE/TWO/dY
+  d2dz = ONE/TWO/dZ
+
+  imax = ex(1)
+  jmax = ex(2)
+  kmax = ex(3)
+
+  imin = 1
+  jmin = 1
+  kmin = 1
+  if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
+  if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
+  if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
+
+  call symmetry_bd(2,ex,f,fh,SoA)
+
+! upper bound set ex-1 only for efficiency, 
+! the loop body will set ex 0 also
+  do k=1,ex(3)-1
+  do j=1,ex(2)-1
+  do i=1,ex(1)-1
+! x direction   
+    if(Sfx(i,j,k) >= ZEO)then
+       if( i+2 <= imax .and. i >= imin)then
+!         v
+! D f = ------[ - 3 f  + 4 f   - f     ]
+!  i     2dx         i      i+v   i+2v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                           &
+                  Sfx(i,j,k)*d2dx*(-THR*fh(i,j,k)+FOUR*fh(i+1,j,k)-fh(i+2,j,k))
+       elseif(i+1 <= imax .and. i >= imin)then
+!         v
+! D f = ------[ - f  + f   ]
+!  i      dx       i    i+v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                           &
+                  Sfx(i,j,k)*d2dx*(-fh(i,j,k)+fh(i+1,j,k))
+
+       endif
+
+    elseif(Sfx(i,j,k) <= ZEO)then
+      if( i-2 >= imin .and. i <= imax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                           &
+                  Sfx(i,j,k)*d2dx*(-THR*fh(i,j,k)+FOUR*fh(i-1,j,k)-fh(i-2,j,k))
+      elseif(i-1 >= imin .and. i <= imax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                           &
+                  Sfx(i,j,k)*d2dx*(-fh(i,j,k)+fh(i-1,j,k))
+      endif
+
+! set imax and imin 0
+    endif
+
+! y direction   
+    if(Sfy(i,j,k) >= ZEO)then
+       if( j+2 <= jmax .and. j >= jmin)then
+!         v
+! D f = ------[ - 3 f  + 4 f   - f     ]
+!  i     2dx         i      i+v   i+2v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                           &
+                  Sfy(i,j,k)*d2dy*(-THR*fh(i,j,k)+FOUR*fh(i,j+1,k)-fh(i,j+2,k))
+       elseif(j+1 <= jmax .and. j >= jmin)then
+!         v
+! D f = ------[ - f  + f   ]
+!  i      dx       i    i+v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                           &
+                  Sfy(i,j,k)*d2dy*(-fh(i,j,k)+fh(i,j+1,k))
+       endif
+
+    elseif(Sfy(i,j,k) <= ZEO)then
+      if( j-2 >= jmin .and. j <= jmax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                           &
+                  Sfy(i,j,k)*d2dy*(-THR*fh(i,j,k)+FOUR*fh(i,j-1,k)-fh(i,j-2,k))
+      elseif(j-1 >= jmin .and. j <= jmax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                           &
+                  Sfy(i,j,k)*d2dy*(-fh(i,j,k)+fh(i,j-1,k))
+      endif
+
+! set jmin and jmax 0
+     endif
+!! z direction   
+    if(Sfz(i,j,k) >= ZEO)then
+      if( k+2 <= kmax .and. k >= kmin)then
+!         v
+! D f = ------[ - 3 f  + 4 f   - f     ]
+!  i     2dx         i      i+v   i+2v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                           &
+                  Sfz(i,j,k)*d2dz*(-THR*fh(i,j,k)+FOUR*fh(i,j,k+1)-fh(i,j,k+2))
+       elseif(k+1 <= kmax .and. k >= kmin)then
+!         v
+! D f = ------[ - f  + f   ]
+!  i      dx       i    i+v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                           &
+                  Sfz(i,j,k)*d2dz*(-fh(i,j,k)+fh(i,j,k+1))
+       endif
+
+    elseif(Sfz(i,j,k) <= ZEO)then
+      if( k-2 >= kmin .and. k <= kmax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                           &
+                  Sfz(i,j,k)*d2dz*(-THR*fh(i,j,k)+FOUR*fh(i,j,k-1)-fh(i,j,k-2))
+      elseif(k-1 >= kmin .and. k <= kmax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                           &
+                  Sfz(i,j,k)*d2dz*(-fh(i,j,k)+fh(i,j,k-1))
+      endif
+
+! set kmin and kmax 0
+     endif
+
+  enddo
+  enddo
+  enddo
+
+  return
+
+  end subroutine lopsided
+
+#elif (ghost_width == 3)
 ! fourth order code

 !-----------------------------------------------------------------------------
@@ -80,7 +236,89 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
  do k=1,ex(3)-1
  do j=1,ex(2)-1
  do i=1,ex(1)-1
+#if 0  
+!! old code
+! x direction   
+    if(Sfx(i,j,k) >= ZEO .and. i+3 <= imax .and. i-1 >= imin)then
+!         v
+! D f = ------[ - 3f    - 10f  + 18f    - 6f     + f     ]
+!  i     12dx       i-v      i      i+v     i+2v    i+3v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                   &
+                  Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
+                                    -F6*fh(i+2,j,k)+    fh(i+3,j,k))

+    elseif(Sfx(i,j,k) <= ZEO .and. i-3 >= imin .and. i+1 <= imax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                   &
+                  Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
+                                    -F6*fh(i-2,j,k)+    fh(i-3,j,k))
+
+     elseif(i+2 <= imax .and. i-2 >= imin)then
+!
+!              f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
+!  fx(i) = ---------------------------------------------
+!                             12 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                           &
+                  Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
+
+     elseif(i+1 <= imax .and. i-1 >= imin)then
+!
+!              - f(i-1) + f(i+1)
+!  fx(i) = --------------------------------
+!                     2 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
+
+! set imax and imin 0
+    endif
+
+! y direction   
+    if(Sfy(i,j,k) >= ZEO .and. j+3 <= jmax .and. j-1 >= jmin)then
+!         v
+! D f = ------[ - 3f    - 10f  + 18f    - 6f     + f     ]
+!  i     12dx       i-v      i      i+v     i+2v    i+3v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                   &
+                  Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
+                                    -F6*fh(i,j+2,k)+    fh(i,j+3,k))
+
+    elseif(Sfy(i,j,k) <= ZEO .and. j-3 >= jmin .and. j+1 <= jmax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                   &
+                  Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
+                                    -F6*fh(i,j-2,k)+    fh(i,j-3,k))
+
+     elseif(j+2 <= jmax .and. j-2 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                            &
+                  Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
+
+     elseif(j+1 <= jmax .and. j-1 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
+! set jmin and jmax 0
+     endif
+!! z direction   
+    if(Sfz(i,j,k) >= ZEO .and. k+3 <= kmax .and. k-1 >= kmin)then
+!         v
+! D f = ------[ - 3f    - 10f  + 18f    - 6f     + f     ]
+!  i     12dx       i-v      i      i+v     i+2v    i+3v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                   &
+                  Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
+                                    -F6*fh(i,j,k+2)+    fh(i,j,k+3))
+
+    elseif(Sfz(i,j,k) <= ZEO .and. k-3 >= kmin .and. k+1 <= kmax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                   &
+                  Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
+                                    -F6*fh(i,j,k-2)+    fh(i,j,k-3))
+
+     elseif(k+2 <= kmax .and. k-2 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                            &
+                  Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
+
+     elseif(k+1 <= kmax .and. k-1 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
+! set kmin and kmax 0
+     endif
+#else
 !! new code, 2012dec27, based on bam
 ! x direction   
    if(Sfx(i,j,k) > ZEO)then
@@ -240,6 +478,7 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
 ! set kmax and kmin 0
     endif
   endif
+#endif
  enddo
  enddo
  enddo
@@ -247,3 +486,417 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
  return

  end subroutine lopsided
+
+#elif (ghost_width == 4)
+! sixth order code
+! Compute advection terms in right hand sides of field equations
+!         v
+! D f = ------[ 2f     - 24f    - 35f  + 80f    - 30f     + 8f     - f    ]
+!  i     60dx     i-2v      i-v      i      i+v      i+2v     i+3v    i+4v
+!
+! where
+!
+!        i
+!      |B |
+! v = -----
+!        i
+!       B
+!
+!-----------------------------------------------------------------------------
+subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
+  implicit none
+
+!~~~~~~> Input parameters:
+
+  integer, intent(in)  :: ex(1:3),Symmetry
+  real*8,  intent(in)  :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
+  real*8,dimension(ex(1),ex(2),ex(3)),intent(in)   :: f,Sfx,Sfy,Sfz
+
+  real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
+  real*8,dimension(3),intent(in) ::SoA
+
+!~~~~~~> local variables:
+
+  real*8,dimension(-3:ex(1),-3:ex(2),-3:ex(3))   :: fh
+  integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
+  real*8 :: dX,dY,dZ
+  real*8 :: d60dx,d60dy,d60dz,d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
+  real*8,  parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
+  real*8,  parameter :: TWO=2.d0,F24=2.4d1,F35=3.5d1,F80=8.d1,F30=3.d1,EIT=8.d0
+  real*8,  parameter ::  F9=9.d0,F45=4.5d1,F12=1.2d1
+  real*8,  parameter ::  F10=1.d1,F77=7.7d1,F150=1.5d2,F100=1.d2,F50=5.d1,F15=1.5d1
+  integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
+
+  dX = X(2)-X(1)
+  dY = Y(2)-Y(1)
+  dZ = Z(2)-Z(1)
+
+  d60dx = ONE/F60/dX
+  d60dy = ONE/F60/dY
+  d60dz = ONE/F60/dZ
+
+  d12dx = ONE/F12/dX
+  d12dy = ONE/F12/dY
+  d12dz = ONE/F12/dZ
+
+  d2dx = ONE/TWO/dX
+  d2dy = ONE/TWO/dY
+  d2dz = ONE/TWO/dZ
+
+  imax = ex(1)
+  jmax = ex(2)
+  kmax = ex(3)
+
+  imin = 1
+  jmin = 1
+  kmin = 1
+  if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -3
+  if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -3
+  if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -3
+
+  call symmetry_bd(4,ex,f,fh,SoA)
+
+! upper bound set ex-1 only for efficiency, 
+! the loop body will set ex 0 also
+  do k=1,ex(3)-1
+  do j=1,ex(2)-1
+  do i=1,ex(1)-1
+! x direction   
+    if(Sfx(i,j,k) >= ZEO .and. i+4 <= imax .and. i-2 >= imin)then
+!         v
+! D f = ------[ 2f     - 24f    - 35f  + 80f    - 30f     + 8f     - f    ]
+!  i     60dx     i-2v      i-v      i      i+v      i+2v     i+3v    i+4v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                             &
+                  Sfx(i,j,k)*d60dx*(TWO*fh(i-2,j,k)-F24*fh(i-1,j,k)-F35*fh(i,j,k)+F80*fh(i+1,j,k) &
+                                   -F30*fh(i+2,j,k)+EIT*fh(i+3,j,k)-    fh(i+4,j,k))
+    elseif(Sfx(i,j,k) >= ZEO .and. i+5 <= imax .and. i-1 >= imin)then
+!         v
+! D f = ------[-10f    - 77f  + 150f    - 100f     + 50f     -15f     + 2f    ]
+!  i     60dx      i-v      i       i+v       i+2v      i+3v     i+4v    i+5v
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                        &
+                  Sfx(i,j,k)*d60dx*(-F10*fh(i-1,j,k)-F77*fh(i  ,j,k)+F150*fh(i+1,j,k)-F100*fh(i+2,j,k) &
+                                    +F50*fh(i+3,j,k)-F15*fh(i+4,j,k)+ TWO*fh(i+5,j,k))
+
+    elseif(Sfx(i,j,k) <= ZEO .and. i-4 >= imin .and. i+2 <= imax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                   &
+                  Sfx(i,j,k)*d60dx*(TWO*fh(i+2,j,k)-F24*fh(i+1,j,k)-F35*fh(i,j,k)+F80*fh(i-1,j,k) &
+                                   -F30*fh(i-2,j,k)+EIT*fh(i-3,j,k)-    fh(i-4,j,k))
+    elseif(Sfx(i,j,k) <= ZEO .and. i-5 >= imin .and. i+1 <= imax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                        &
+                  Sfx(i,j,k)*d60dx*(-F10*fh(i+1,j,k)-F77*fh(i  ,j,k)+F150*fh(i-1,j,k)-F100*fh(i-2,j,k) &
+                                    +F50*fh(i-3,j,k)-F15*fh(i-4,j,k)+ TWO*fh(i-5,j,k))
+
+     elseif(i+3 <= imax .and. i-3 >= imin)then
+!           - f(i-3) + 9 f(i-2) - 45 f(i-1) + 45 f(i+1) - 9 f(i+2) + f(i+3)
+!  fx(i) = -----------------------------------------------------------------
+!                                        60 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                              &
+                  Sfx(i,j,k)*d60dx*(-fh(i-3,j,k)+F9*fh(i-2,j,k)-F45*fh(i-1,j,k)+F45*fh(i+1,j,k)-F9*fh(i+2,j,k)+fh(i+3,j,k))
+
+     elseif(i+2 <= imax .and. i-2 >= imin)then
+!
+!              f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
+!  fx(i) = ---------------------------------------------
+!                             12 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                           &
+                  Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
+
+     elseif(i+1 <= imax .and. i-1 >= imin)then
+!
+!              - f(i-1) + f(i+1)
+!  fx(i) = --------------------------------
+!                     2 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
+
+! set imax and imin 0
+    endif
+
+! y direction   
+     if(Sfy(i,j,k) >= ZEO .and. j+4 <= jmax .and. j-2 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                   &
+                  Sfy(i,j,k)*d60dy*(TWO*fh(i,j-2,k)-F24*fh(i,j-1,k)-F35*fh(i,j,k)+F80*fh(i,j+1,k) &
+                                   -F30*fh(i,j+2,k)+EIT*fh(i,j+3,k)-    fh(i,j+4,k))
+     elseif(Sfy(i,j,k) >= ZEO .and. j+5 <= jmax .and. j-1 >= jmin)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                        &
+                  Sfy(i,j,k)*d60dy*(-F10*fh(i,j-1,k)-F77*fh(i,j  ,k)+F150*fh(i,j+1,k)-F100*fh(i,j+2,k) &
+                                    +F50*fh(i,j+3,k)-F15*fh(i,j+4,k)+ TWO*fh(i,j+5,k))
+
+     elseif(Sfy(i,j,k) <= ZEO .and. j-4 >= jmin .and. j+2 <= jmax)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                   &
+                  Sfy(i,j,k)*d60dy*(TWO*fh(i,j+2,k)-F24*fh(i,j+1,k)-F35*fh(i,j,k)+F80*fh(i,j-1,k) &
+                                   -F30*fh(i,j-2,k)+EIT*fh(i,j-3,k)-    fh(i,j-4,k))
+
+     elseif(Sfy(i,j,k) <= ZEO .and. j-5 >= jmin .and. j+1 <= jmax)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                        &
+                  Sfy(i,j,k)*d60dy*(-F10*fh(i,j+1,k)-F77*fh(i,j  ,k)+F150*fh(i,j-1,k)-F100*fh(i,j-2,k) &
+                                    +F50*fh(i,j-3,k)-F15*fh(i,j-4,k)+ TWO*fh(i,j-5,k))
+
+     elseif(j+3 <= jmax .and. j-3 >= jmin)then
+          
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                                         &
+                  Sfy(i,j,k)*d60dy*(-fh(i,j-3,k)+F9*fh(i,j-2,k)-F45*fh(i,j-1,k)+F45*fh(i,j+1,k)-F9*fh(i,j+2,k)+fh(i,j+3,k))
+
+     elseif(j+2 <= jmax .and. j-2 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                            &
+                  Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
+
+     elseif(j+1 <= jmax .and. j-1 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
+! set jmin and jmax 0
+     endif
+!! z direction   
+     if(Sfz(i,j,k) >= ZEO .and. k+4 <= kmax .and. k-2 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                   &
+                  Sfz(i,j,k)*d60dz*(TWO*fh(i,j,k-2)-F24*fh(i,j,k-1)-F35*fh(i,j,k)+F80*fh(i,j,k+1) &
+                                   -F30*fh(i,j,k+2)+EIT*fh(i,j,k+3)-    fh(i,j,k+4))
+     elseif(Sfz(i,j,k) >= ZEO .and. k+5 <= kmax .and. k-1 >= kmin)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                        &
+                  Sfz(i,j,k)*d60dz*(-F10*fh(i,j,k-1)-F77*fh(i,j,k  )+F150*fh(i,j,k+1)-F100*fh(i,j,k+2) &
+                                    +F50*fh(i,j,k+3)-F15*fh(i,j,k+4)+ TWO*fh(i,j,k+5))
+
+     elseif(Sfz(i,j,k) <= ZEO .and. k-4 >= kmin .and. k+2 <= kmax)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                   &
+                  Sfz(i,j,k)*d60dz*(TWO*fh(i,j,k+2)-F24*fh(i,j,k+1)-F35*fh(i,j,k)+F80*fh(i,j,k-1) &
+                                   -F30*fh(i,j,k-2)+EIT*fh(i,j,k-3)-    fh(i,j,k-4))
+
+     elseif(Sfz(i,j,k) <= ZEO .and. k-5 >= kmin .and. k+1 <= kmax)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                        &
+                  Sfz(i,j,k)*d60dz*(-F10*fh(i,j,k+1)-F77*fh(i,j,k  )+F150*fh(i,j,k-1)-F100*fh(i,j,k-2) &
+                                    +F50*fh(i,j,k-3)-F15*fh(i,j,k-4)+ TWO*fh(i,j,k-5))
+     
+     elseif(k+3 <= kmax .and. k-3 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                                         &
+                  Sfz(i,j,k)*d60dz*(-fh(i,j,k-3)+F9*fh(i,j,k-2)-F45*fh(i,j,k-1)+F45*fh(i,j,k+1)-F9*fh(i,j,k+2)+fh(i,j,k+3))
+
+     elseif(k+2 <= kmax .and. k-2 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                            &
+                  Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
+
+     elseif(k+1 <= kmax .and. k-1 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
+! set kmin and kmax 0
+     endif
+
+  enddo
+  enddo
+  enddo
+
+  return
+
+  end subroutine lopsided
+
+#elif (ghost_width == 5)
+! eighth order code
+!-----------------------------------------------------------------------------
+! PRD 77, 024034 (2008)
+! Compute advection terms in right hand sides of field equations
+!        v [ - 5 f(i-3v) + 60 f(i-2v) - 420 f(i-v) - 378 f(i) + 1050 f(i+v) - 420 f(i+2v) + 140 f(i+3v) - 30 f(i+4v) + 3 f(i+5v)]
+! D f = --------------------------------------------------------------------------------------------------------------------------
+!  i                                                             840 dx           
+!
+! where
+!
+!        i
+!      |B |
+! v = -----
+!        i
+!       B
+!
+!-----------------------------------------------------------------------------
+subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
+  implicit none
+
+!~~~~~~> Input parameters:
+
+  integer, intent(in)  :: ex(1:3),Symmetry
+  real*8,  intent(in)  :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
+  real*8,dimension(ex(1),ex(2),ex(3)),intent(in)   :: f,Sfx,Sfy,Sfz
+
+  real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
+  real*8,dimension(3),intent(in) ::SoA
+
+!~~~~~~> local variables:
+
+  real*8,dimension(-4:ex(1),-4:ex(2),-4:ex(3))   :: fh
+  integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
+  real*8 :: dX,dY,dZ
+  real*8 :: d840dx,d840dy,d840dz,d60dx,d60dy,d60dz,d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
+  real*8,  parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
+  real*8,  parameter :: TWO=2.d0,F30=3.d1,EIT=8.d0
+  real*8,  parameter ::  F9=9.d0,F45=4.5d1,F12=1.2d1,F140=1.4d2,THR=3.d0
+  real*8,  parameter :: F840=8.4d2,F5=5.d0,F420=4.2d2,F378=3.78d2,F1050=1.05d3
+  real*8,  parameter :: F32=3.2d1,F168=1.68d2,F672=6.72d2
+  integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
+
+  dX = X(2)-X(1)
+  dY = Y(2)-Y(1)
+  dZ = Z(2)-Z(1)
+
+  d840dx = ONE/F840/dX
+  d840dy = ONE/F840/dY
+  d840dz = ONE/F840/dZ
+
+  d60dx = ONE/F60/dX
+  d60dy = ONE/F60/dY
+  d60dz = ONE/F60/dZ
+
+  d12dx = ONE/F12/dX
+  d12dy = ONE/F12/dY
+  d12dz = ONE/F12/dZ
+
+  d2dx = ONE/TWO/dX
+  d2dy = ONE/TWO/dY
+  d2dz = ONE/TWO/dZ
+
+  imax = ex(1)
+  jmax = ex(2)
+  kmax = ex(3)
+
+  imin = 1
+  jmin = 1
+  kmin = 1
+  if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -4
+  if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -4
+  if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -4
+
+  call symmetry_bd(5,ex,f,fh,SoA)
+
+! upper bound set ex-1 only for efficiency, 
+! the loop body will set ex 0 also
+  do k=1,ex(3)-1
+  do j=1,ex(2)-1
+  do i=1,ex(1)-1
+! x direction   
+    if(Sfx(i,j,k) >= ZEO .and. i+5 <= imax .and. i-3 >= imin)then
+!        v [ - 5 f(i-3v) + 60 f(i-2v) - 420 f(i-v) - 378 f(i) + 1050 f(i+v) - 420 f(i+2v) + 140 f(i+3v) - 30 f(i+4v) + 3 f(i+5v)]
+! D f = --------------------------------------------------------------------------------------------------------------------------
+!  i                                                             840 dx    
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                         &
+                  Sfx(i,j,k)*d840dx*(-F5*fh(i-3,j,k)+F60 *fh(i-2,j,k)-F420*fh(i-1,j,k)-F378*fh(i  ,j,k) &
+                                  +F1050*fh(i+1,j,k)-F420*fh(i+2,j,k)+F140*fh(i+3,j,k)-F30 *fh(i+4,j,k)+THR*fh(i+5,j,k))
+
+    elseif(Sfx(i,j,k) <= ZEO .and. i-5 >= imin .and. i+3 <= imax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                          &
+                  Sfx(i,j,k)*d840dx*(-F5*fh(i+3,j,k)+F60 *fh(i+2,j,k)-F420*fh(i+1,j,k)-F378*fh(i   ,j,k) &
+                                  +F1050*fh(i-1,j,k)-F420*fh(i-2,j,k)+F140*fh(i-3,j,k)- F30*fh(i-4,j,k)+THR*fh(i-5,j,k))
+
+    elseif(i+4 <= imax .and. i-4 >= imin)then
+!           3 f(i-4) - 32 f(i-3) + 168 f(i-2) - 672 f(i-1) + 672 f(i+1) - 168 f(i+2) + 32 f(i+3) - 3 f(i+4)
+!  fx(i) = -------------------------------------------------------------------------------------------------
+!                                                        840 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                              &
+                  Sfx(i,j,k)*d840dx*( THR*fh(i-4,j,k)-F32 *fh(i-3,j,k)+F168*fh(i-2,j,k)-F672*fh(i-1,j,k)+    &
+                                     F672*fh(i+1,j,k)-F168*fh(i+2,j,k)+F32 *fh(i+3,j,k)-THR *fh(i+4,j,k))
+
+     elseif(i+3 <= imax .and. i-3 >= imin)then
+!           - f(i-3) + 9 f(i-2) - 45 f(i-1) + 45 f(i+1) - 9 f(i+2) + f(i+3)
+!  fx(i) = -----------------------------------------------------------------
+!                                        60 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                              &
+                  Sfx(i,j,k)*d60dx*(-fh(i-3,j,k)+F9*fh(i-2,j,k)-F45*fh(i-1,j,k)+F45*fh(i+1,j,k)-F9*fh(i+2,j,k)+fh(i+3,j,k))
+
+     elseif(i+2 <= imax .and. i-2 >= imin)then
+!
+!              f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
+!  fx(i) = ---------------------------------------------
+!                             12 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                           &
+                  Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
+
+     elseif(i+1 <= imax .and. i-1 >= imin)then
+!
+!              - f(i-1) + f(i+1)
+!  fx(i) = --------------------------------
+!                     2 dx
+     f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
+
+! set imax and imin 0
+    endif
+
+! y direction   
+    if(Sfy(i,j,k) >= ZEO .and. j+5 <= jmax .and. j-3 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                         &
+                  Sfy(i,j,k)*d840dy*(-F5*fh(i,j-3,k)+F60 *fh(i,j-2,k)-F420*fh(i,j-1,k)-F378*fh(i,j  ,k) &
+                                  +F1050*fh(i,j+1,k)-F420*fh(i,j+2,k)+F140*fh(i,j+3,k)-F30 *fh(i,j+4,k)+THR*fh(i,j+5,k))
+
+    elseif(Sfy(i,j,k) <= ZEO .and. j-5 >= jmin .and. j+3 <= jmax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                         &
+                  Sfy(i,j,k)*d840dy*(-F5*fh(i,j+3,k)+F60 *fh(i,j+2,k)-F420*fh(i,j+1,k)-F378*fh(i,j  ,k) &
+                                  +F1050*fh(i,j-1,k)-F420*fh(i,j-2,k)+F140*fh(i,j-3,k)- F30*fh(i,j-4,k)+THR*fh(i,j-5,k))
+
+    elseif(j+4 <= jmax .and. j-4 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                              &
+                  Sfy(i,j,k)*d840dy*( THR*fh(i,j-4,k)-F32 *fh(i,j-3,k)+F168*fh(i,j-2,k)-F672*fh(i,j-1,k)+    &
+                                     F672*fh(i,j+1,k)-F168*fh(i,j+2,k)+F32 *fh(i,j+3,k)-THR *fh(i,j+4,k))
+
+     elseif(j+3 <= jmax .and. j-3 >= jmin)then
+          
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                                         &
+                  Sfy(i,j,k)*d60dy*(-fh(i,j-3,k)+F9*fh(i,j-2,k)-F45*fh(i,j-1,k)+F45*fh(i,j+1,k)-F9*fh(i,j+2,k)+fh(i,j+3,k))
+
+     elseif(j+2 <= jmax .and. j-2 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                            &
+                  Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
+
+     elseif(j+1 <= jmax .and. j-1 >= jmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
+! set jmin and jmax 0
+     endif
+!! z direction   
+    if(Sfz(i,j,k) >= ZEO .and. k+5 <= kmax .and. k-3 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                         &
+                  Sfz(i,j,k)*d840dz*(-F5*fh(i,j,k-3)+F60 *fh(i,j,k-2)-F420*fh(i,j,k-1)-F378*fh(i,j,k  ) &
+                                  +F1050*fh(i,j,k+1)-F420*fh(i,j,k+2)+F140*fh(i,j,k+3)-F30 *fh(i,j,k+4)+THR*fh(i,j,k+5))
+
+    elseif(Sfz(i,j,k) <= ZEO .and. k-5 >= kmin .and. k+3 <= kmax)then
+     f_rhs(i,j,k)=f_rhs(i,j,k)-                                                                         &
+                  Sfz(i,j,k)*d840dz*(-F5*fh(i,j,k+3)+F60 *fh(i,j,k+2)-F420*fh(i,j,k+1)-F378*fh(i,j,k  ) &
+                                  +F1050*fh(i,j,k-1)-F420*fh(i,j,k-2)+F140*fh(i,j,k-3)- F30*fh(i,j,k-4)+THR*fh(i,j,k-5))
+
+    elseif(k+4 <= kmax .and. k-4 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                              &
+                  Sfz(i,j,k)*d840dz*( THR*fh(i,j,k-4)-F32 *fh(i,j,k-3)+F168*fh(i,j,k-2)-F672*fh(i,j,k-1)+    &
+                                     F672*fh(i,j,k+1)-F168*fh(i,j,k+2)+F32 *fh(i,j,k+3)-THR *fh(i,j,k+4))
+     
+     elseif(k+3 <= kmax .and. k-3 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                                                         &
+                  Sfz(i,j,k)*d60dz*(-fh(i,j,k-3)+F9*fh(i,j,k-2)-F45*fh(i,j,k-1)+F45*fh(i,j,k+1)-F9*fh(i,j,k+2)+fh(i,j,k+3))
+
+     elseif(k+2 <= kmax .and. k-2 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+                                                            &
+                  Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
+
+     elseif(k+1 <= kmax .and. k-1 >= kmin)then
+
+     f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
+! set kmin and kmax 0
+     endif
+
+  enddo
+  enddo
+  enddo
+
+  return
+
+  end subroutine lopsided
+
+#endif  
--- a/AMSS_NCKU_source/makefile
+++ b/AMSS_NCKU_source/makefile
@@ -16,12 +16,6 @@ include makefile.inc
 .cu.o:
 	$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)

-TwoPunctures.o: TwoPunctures.C
-	${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
-
-TwoPunctureABE.o: TwoPunctureABE.C
-	${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
-
 # Input files
 C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
           cgh.o bssn_class.o surface_integral.o ShellPatch.o\
@@ -40,7 +34,7 @@ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o

 F90FILES = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
 	   prolongrestrict_cell.o prolongrestrict_vertex.o\
-	   rungekutta4_rout.o bssn_rhs.o diff_new.o kodiss.o kodiss_sh.o\
+	   rungekutta4_rout.o bssn_rhs_opt.o bssn_rhs.o bssn_rhs_legacy.o diff_new.o kodiss.o kodiss_sh.o\
 	   lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
 	   shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
           getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
@@ -102,7 +96,7 @@ ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
 	$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)

 TwoPunctureABE: $(TwoPunctureFILES)
-	$(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
+	$(CLINKER) $(CXXAPPFLAGS) -o $@ $(TwoPunctureFILES) $(LDLIBS)

 clean:
 	rm *.o ABE ABEGPU TwoPunctureABE make.log -f
--- a/AMSS_NCKU_source/makefile.inc
+++ b/AMSS_NCKU_source/makefile.inc
@@ -7,18 +7,20 @@
 filein  = -I/usr/include/ -I${MKLROOT}/include

 ## Using sequential MKL (OpenMP disabled for better single-threaded performance)
-## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
-LDLIBS  = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
+LDLIBS  = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -lifcore -limf -lmpi \
+          -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core \
+          -lpthread -lm -ldl

 ## Aggressive optimization flags:
 ## -O3: Maximum optimization
 ## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
 ## -fp-model fast=2: Aggressive floating-point optimizations
 ## -fma: Enable fused multiply-add instructions
-CXXAPPFLAGS  = -O3 -xHost -fp-model fast=2 -fma -ipo \
+## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues
+CXXAPPFLAGS  = -O3 -xHost -fp-model fast=2 -fma \
               -Dfortran3 -Dnewc -I${MKLROOT}/include
-f90appflags  = -O3 -xHost -fp-model fast=2 -fma -ipo \
-               -align array64byte -fpp -I${MKLROOT}/include
+f90appflags  = -O3 -xHost -fp-model fast=2 -fma \
+               -fpp -I${MKLROOT}/include
 f90          = ifx
 f77          = ifx
 CXX          = icpx
--- a/makefile_and_run.py
+++ b/makefile_and_run.py
@@ -10,12 +10,12 @@

 import AMSS_NCKU_Input as input_data
 import subprocess
-import time
+
 ## CPU core binding configuration using taskset
 ## taskset ensures all child processes inherit the CPU affinity mask
 ## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
 ## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
-NUMACTL_CPU_BIND = "taskset -c 0-111"
+NUMACTL_CPU_BIND = "taskset -c 4-55,60-111"

 ## Build parallelism configuration
 ## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
@@ -152,7 +152,7 @@ def run_ABE():
 ## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE

 def run_TwoPunctureABE():
-    tp_time1=time.time()
+
    print(                                                          )
    print( " Running the AMSS-NCKU executable file TwoPunctureABE " ) 
    print(                                                          )
@@ -179,9 +179,7 @@ def run_TwoPunctureABE():
    print(                                               )
    print( " The TwoPunctureABE simulation is finished " ) 
    print(                                               )
-    tp_time2=time.time()
-    et=tp_time2-tp_time1
-    print(f"Used time: {et}")
+    
    return

 ##################################################################
Author	SHA1	Message	Date
CGH0S7	ed89bc029b	Fix potential division by zero in reta_val calculation and enable NaN checks Added a safety check for the denominator in the reta_val calculation to prevent division by zero when chi approaches zero (e.g., at far-field boundaries). Also enabled DEBUG_NAN_CHECK macro to catch invalid inputs early. Initialized output arrays to zero to prevent uninitialized memory access.	2026-01-19 20:29:48 +08:00
CGH0S7	19274e93d1	Fix boundary handling in bssn_rhs_opt.f90 to prevent NaNs Refactored calc_derivs and calc_dderivs to include correct boundary handling logic matching the legacy code. Implemented fallback to 2nd order derivatives when near boundaries where 4th order stencils cannot be used. Added logic to initialize output arrays to zero to avoid uninitialized memory access.	2026-01-19 20:03:22 +08:00
CGH0S7	ae1a474cca	Fix compilation errors and complete logic in BSSN RHS optimization	2026-01-19 19:22:52 +08:00
CGH0S7	cbb8fb3a87	patched last commit	2026-01-19 17:14:28 +08:00
CGH0S7	4472d89a9f	Optimize bssn_rhs calculation with cache blocking and vectorization - Implemented cache blocking (BLK=8) in bssn_rhs_opt.f90 to improve L1/L2 cache hit rate. - Introduced bssn_rhs_opt.f90 module with vectorized derivative and physics kernels. - Renamed original implementation to bssn_rhs_legacy.f90 for fallback. - Updated bssn_rhs.f90 to act as a dispatcher, using the optimized path for ghost_width=3. - Updated makefile to include new source files. - Added DEBUG_NAN_CHECK macro to optionally disable NaN checks in production.	2026-01-19 16:39:24 +08:00