diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C
index 1a94d3c..2d550fa 100644
--- a/AMSS_NCKU_source/Parallel.C
+++ b/AMSS_NCKU_source/Parallel.C
@@ -245,11 +245,15 @@ bool cuda_cell_gw3_restrict_params(const Parallel::gridseg *src,
                                    const Parallel::gridseg *dst,
                                    int first_fine[3])
 {
-#if (USE_CUDA_BSSN || (USE_CUDA_Z4C && (ABEtype == 2))) && defined(Cell) && ((ghost_width == 3) || (ghost_width == 4))
-#if ghost_width == 4
+#if (USE_CUDA_BSSN || (USE_CUDA_Z4C && (ABEtype == 2))) && defined(Cell) && (ghost_width >= 2) && (ghost_width <= 5)
+#if ghost_width == 5
+  const int stencil_hi = 5;
+#elif ghost_width == 4
   const int stencil_hi = 4;
-#else
+#elif ghost_width == 3
   const int stencil_hi = 3;
+#else
+  const int stencil_hi = 2;
 #endif
   if (!src || !dst || !src->Bg || !dst->Bg)
     return false;
@@ -291,11 +295,15 @@ bool cuda_cell_gw3_prolong_params(const Parallel::gridseg *src,
                                   int first_fine_ii[3],
                                   int coarse_lb[3])
 {
-#if (USE_CUDA_BSSN || (USE_CUDA_Z4C && (ABEtype == 2))) && defined(Cell) && ((ghost_width == 3) || (ghost_width == 4))
-#if ghost_width == 4
+#if (USE_CUDA_BSSN || (USE_CUDA_Z4C && (ABEtype == 2))) && defined(Cell) && (ghost_width >= 2) && (ghost_width <= 5)
+#if ghost_width == 5
+  const int stencil_hi = 5;
+#elif ghost_width == 4
   const int stencil_hi = 4;
-#else
+#elif ghost_width == 3
   const int stencil_hi = 3;
+#else
+  const int stencil_hi = 2;
 #endif
   if (!src || !dst || !src->Bg || !dst->Bg)
     return false;
diff --git a/AMSS_NCKU_source/bssn_rhs_cuda.cu b/AMSS_NCKU_source/bssn_rhs_cuda.cu
index ed63fae..52b4303 100644
--- a/AMSS_NCKU_source/bssn_rhs_cuda.cu
+++ b/AMSS_NCKU_source/bssn_rhs_cuda.cu
@@ -7622,7 +7622,16 @@ __global__ void kern_restrict_state_region_batch(const double * __restrict__ src
 {
     const int state_index = blockIdx.y;
     if (state_index >= state_count) return;
-#if ghost_width == 4
+#if ghost_width == 5
+    const double c1 = 35.0 / 65536.0;
+    const double c2 = -405.0 / 65536.0;
+    const double c3 = 567.0 / 16384.0;
+    const double c4 = -2205.0 / 16384.0;
+    const double c5 = 19845.0 / 32768.0;
+    const int offs[10] = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5};
+    const double w[10] = {c1, c2, c3, c4, c5, c5, c4, c3, c2, c1};
+    const int nst = 10;
+#elif ghost_width == 4
     const double c1 = -5.0 / 2048.0;
     const double c2 = 49.0 / 2048.0;
     const double c3 = -245.0 / 2048.0;
@@ -7630,13 +7639,19 @@ __global__ void kern_restrict_state_region_batch(const double * __restrict__ src
     const int offs[8] = {-3, -2, -1, 0, 1, 2, 3, 4};
     const double w[8] = {c1, c2, c3, c4, c4, c3, c2, c1};
     const int nst = 8;
-#else
+#elif ghost_width == 3
     const double c1 = 3.0 / 256.0;
     const double c2 = -25.0 / 256.0;
     const double c3 = 75.0 / 128.0;
     const int offs[6] = {-2, -1, 0, 1, 2, 3};
     const double w[6] = {c1, c2, c3, c3, c2, c1};
     const int nst = 6;
+#else
+    const double c1 = -1.0 / 16.0;
+    const double c2 = 9.0 / 16.0;
+    const int offs[4] = {-1, 0, 1, 2};
+    const double w[4] = {c1, c2, c2, c1};
+    const int nst = 4;
 #endif
 
     for (int local = blockIdx.x * blockDim.x + threadIdx.x;
@@ -7708,7 +7723,16 @@ __global__ void kern_restrict_state_segments_batch(const double * __restrict__ s
     const int offset = m[4];
     const int fi0 = m[5], fj0 = m[6], fk0 = m[7];
     if (state_index >= state_count) return;
-#if ghost_width == 4
+#if ghost_width == 5
+    const double c1 = 35.0 / 65536.0;
+    const double c2 = -405.0 / 65536.0;
+    const double c3 = 567.0 / 16384.0;
+    const double c4 = -2205.0 / 16384.0;
+    const double c5 = 19845.0 / 32768.0;
+    const int offs[10] = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5};
+    const double w[10] = {c1, c2, c3, c4, c5, c5, c4, c3, c2, c1};
+    const int nst = 10;
+#elif ghost_width == 4
     const double c1 = -5.0 / 2048.0;
     const double c2 = 49.0 / 2048.0;
     const double c3 = -245.0 / 2048.0;
@@ -7716,13 +7740,19 @@ __global__ void kern_restrict_state_segments_batch(const double * __restrict__ s
     const int offs[8] = {-3, -2, -1, 0, 1, 2, 3, 4};
     const double w[8] = {c1, c2, c3, c4, c4, c3, c2, c1};
     const int nst = 8;
-#else
+#elif ghost_width == 3
     const double c1 = 3.0 / 256.0;
     const double c2 = -25.0 / 256.0;
     const double c3 = 75.0 / 128.0;
     const int offs[6] = {-2, -1, 0, 1, 2, 3};
     const double w[6] = {c1, c2, c3, c3, c2, c1};
     const int nst = 6;
+#else
+    const double c1 = -1.0 / 16.0;
+    const double c2 = 9.0 / 16.0;
+    const int offs[4] = {-1, 0, 1, 2};
+    const double w[4] = {c1, c2, c2, c1};
+    const int nst = 4;
 #endif
 
     for (int local = blockIdx.x * blockDim.x + threadIdx.x;
@@ -7768,7 +7798,22 @@ __global__ void kern_prolong_state_region_batch(const double * __restrict__ src_
 {
     const int state_index = blockIdx.y;
     if (state_index >= state_count) return;
-#if ghost_width == 4
+#if ghost_width == 5
+    const double c1 = 13585.0 / 33554432.0;
+    const double c2 = -159885.0 / 33554432.0;
+    const double c3 = 230945.0 / 8388608.0;
+    const double c4 = -969969.0 / 8388608.0;
+    const double c5 = 14549535.0 / 16777216.0;
+    const double c6 = 4849845.0 / 16777216.0;
+    const double c7 = -692835.0 / 8388608.0;
+    const double c8 = 188955.0 / 8388608.0;
+    const double c9 = -138567.0 / 33554432.0;
+    const double c10 = 12155.0 / 33554432.0;
+    const int offs[10] = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5};
+    const double wl[10] = {c1, c2, c3, c4, c5, c6, c7, c8, c9, c10};
+    const double wr[10] = {c10, c9, c8, c7, c6, c5, c4, c3, c2, c1};
+    const int nst = 10;
+#elif ghost_width == 4
     const double c1 = -495.0 / 262144.0;
     const double c2 = 5005.0 / 262144.0;
     const double c3 = -27027.0 / 262144.0;
@@ -7781,7 +7826,7 @@ __global__ void kern_prolong_state_region_batch(const double * __restrict__ src_
     const double wl[8] = {c1, c2, c3, c4, c5, c6, c7, c8};
     const double wr[8] = {c8, c7, c6, c5, c4, c3, c2, c1};
     const int nst = 8;
-#else
+#elif ghost_width == 3
     const double c1 = 77.0 / 8192.0;
     const double c2 = -693.0 / 8192.0;
     const double c3 = 3465.0 / 4096.0;
@@ -7792,6 +7837,15 @@ __global__ void kern_prolong_state_region_batch(const double * __restrict__ src_
     const double wl[6] = {c1, c2, c3, c4, c5, c6};
     const double wr[6] = {c6, c5, c4, c3, c2, c1};
     const int nst = 6;
+#else
+    const double c1 = -7.0 / 128.0;
+    const double c2 = 105.0 / 128.0;
+    const double c3 = 35.0 / 128.0;
+    const double c4 = -5.0 / 128.0;
+    const int offs[4] = {-1, 0, 1, 2};
+    const double wl[4] = {c1, c2, c3, c4};
+    const double wr[4] = {c4, c3, c2, c1};
+    const int nst = 4;
 #endif
 
     for (int local = blockIdx.x * blockDim.x + threadIdx.x;
@@ -7847,7 +7901,22 @@ __global__ void kern_prolong_state_segments_batch(const double * __restrict__ sr
     const int ii0 = m[5], jj0 = m[6], kk0 = m[7];
     const int lbc_i = m[8], lbc_j = m[9], lbc_k = m[10];
     if (state_index >= state_count) return;
-#if ghost_width == 4
+#if ghost_width == 5
+    const double c1 = 13585.0 / 33554432.0;
+    const double c2 = -159885.0 / 33554432.0;
+    const double c3 = 230945.0 / 8388608.0;
+    const double c4 = -969969.0 / 8388608.0;
+    const double c5 = 14549535.0 / 16777216.0;
+    const double c6 = 4849845.0 / 16777216.0;
+    const double c7 = -692835.0 / 8388608.0;
+    const double c8 = 188955.0 / 8388608.0;
+    const double c9 = -138567.0 / 33554432.0;
+    const double c10 = 12155.0 / 33554432.0;
+    const int offs[10] = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5};
+    const double wl[10] = {c1, c2, c3, c4, c5, c6, c7, c8, c9, c10};
+    const double wr[10] = {c10, c9, c8, c7, c6, c5, c4, c3, c2, c1};
+    const int nst = 10;
+#elif ghost_width == 4
     const double c1 = -495.0 / 262144.0;
     const double c2 = 5005.0 / 262144.0;
     const double c3 = -27027.0 / 262144.0;
@@ -7860,7 +7929,7 @@ __global__ void kern_prolong_state_segments_batch(const double * __restrict__ sr
     const double wl[8] = {c1, c2, c3, c4, c5, c6, c7, c8};
     const double wr[8] = {c8, c7, c6, c5, c4, c3, c2, c1};
     const int nst = 8;
-#else
+#elif ghost_width == 3
     const double c1 = 77.0 / 8192.0;
     const double c2 = -693.0 / 8192.0;
     const double c3 = 3465.0 / 4096.0;
@@ -7871,6 +7940,15 @@ __global__ void kern_prolong_state_segments_batch(const double * __restrict__ sr
     const double wl[6] = {c1, c2, c3, c4, c5, c6};
     const double wr[6] = {c6, c5, c4, c3, c2, c1};
     const int nst = 6;
+#else
+    const double c1 = -7.0 / 128.0;
+    const double c2 = 105.0 / 128.0;
+    const double c3 = 35.0 / 128.0;
+    const double c4 = -5.0 / 128.0;
+    const int offs[4] = {-1, 0, 1, 2};
+    const double wl[4] = {c1, c2, c3, c4};
+    const double wr[4] = {c4, c3, c2, c1};
+    const int nst = 4;
 #endif
 
     for (int local = blockIdx.x * blockDim.x + threadIdx.x;
diff --git a/AMSS_NCKU_source/z4c_rhs_cuda.cu b/AMSS_NCKU_source/z4c_rhs_cuda.cu
index 36de461..4e5b3c9 100644
--- a/AMSS_NCKU_source/z4c_rhs_cuda.cu
+++ b/AMSS_NCKU_source/z4c_rhs_cuda.cu
@@ -5232,7 +5232,16 @@ __global__ void kern_restrict_state_region_batch(const double * __restrict__ src
 {
     const int state_index = blockIdx.y;
     if (state_index >= state_count) return;
-#if ghost_width == 4
+#if ghost_width == 5
+    const double c1 = 35.0 / 65536.0;
+    const double c2 = -405.0 / 65536.0;
+    const double c3 = 567.0 / 16384.0;
+    const double c4 = -2205.0 / 16384.0;
+    const double c5 = 19845.0 / 32768.0;
+    const int offs[10] = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5};
+    const double w[10] = {c1, c2, c3, c4, c5, c5, c4, c3, c2, c1};
+    const int nst = 10;
+#elif ghost_width == 4
     const double c1 = -5.0 / 2048.0;
     const double c2 = 49.0 / 2048.0;
     const double c3 = -245.0 / 2048.0;
@@ -5240,13 +5249,19 @@ __global__ void kern_restrict_state_region_batch(const double * __restrict__ src
     const int offs[8] = {-3, -2, -1, 0, 1, 2, 3, 4};
     const double w[8] = {c1, c2, c3, c4, c4, c3, c2, c1};
     const int nst = 8;
-#else
+#elif ghost_width == 3
     const double c1 = 3.0 / 256.0;
     const double c2 = -25.0 / 256.0;
     const double c3 = 75.0 / 128.0;
     const int offs[6] = {-2, -1, 0, 1, 2, 3};
     const double w[6] = {c1, c2, c3, c3, c2, c1};
     const int nst = 6;
+#else
+    const double c1 = -1.0 / 16.0;
+    const double c2 = 9.0 / 16.0;
+    const int offs[4] = {-1, 0, 1, 2};
+    const double w[4] = {c1, c2, c2, c1};
+    const int nst = 4;
 #endif
 
     for (int local = blockIdx.x * blockDim.x + threadIdx.x;
@@ -5289,7 +5304,22 @@ __global__ void kern_prolong_state_region_batch(const double * __restrict__ src_
 {
     const int state_index = blockIdx.y;
     if (state_index >= state_count) return;
-#if ghost_width == 4
+#if ghost_width == 5
+    const double c1 = 13585.0 / 33554432.0;
+    const double c2 = -159885.0 / 33554432.0;
+    const double c3 = 230945.0 / 8388608.0;
+    const double c4 = -969969.0 / 8388608.0;
+    const double c5 = 14549535.0 / 16777216.0;
+    const double c6 = 4849845.0 / 16777216.0;
+    const double c7 = -692835.0 / 8388608.0;
+    const double c8 = 188955.0 / 8388608.0;
+    const double c9 = -138567.0 / 33554432.0;
+    const double c10 = 12155.0 / 33554432.0;
+    const int offs[10] = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5};
+    const double wl[10] = {c1, c2, c3, c4, c5, c6, c7, c8, c9, c10};
+    const double wr[10] = {c10, c9, c8, c7, c6, c5, c4, c3, c2, c1};
+    const int nst = 10;
+#elif ghost_width == 4
     const double c1 = -495.0 / 262144.0;
     const double c2 = 5005.0 / 262144.0;
     const double c3 = -27027.0 / 262144.0;
@@ -5302,7 +5332,7 @@ __global__ void kern_prolong_state_region_batch(const double * __restrict__ src_
     const double wl[8] = {c1, c2, c3, c4, c5, c6, c7, c8};
     const double wr[8] = {c8, c7, c6, c5, c4, c3, c2, c1};
     const int nst = 8;
-#else
+#elif ghost_width == 3
     const double c1 = 77.0 / 8192.0;
     const double c2 = -693.0 / 8192.0;
     const double c3 = 3465.0 / 4096.0;
@@ -5313,6 +5343,15 @@ __global__ void kern_prolong_state_region_batch(const double * __restrict__ src_
     const double wl[6] = {c1, c2, c3, c4, c5, c6};
     const double wr[6] = {c6, c5, c4, c3, c2, c1};
     const int nst = 6;
+#else
+    const double c1 = -7.0 / 128.0;
+    const double c2 = 105.0 / 128.0;
+    const double c3 = 35.0 / 128.0;
+    const double c4 = -5.0 / 128.0;
+    const int offs[4] = {-1, 0, 1, 2};
+    const double wl[4] = {c1, c2, c3, c4};
+    const double wr[4] = {c4, c3, c2, c1};
+    const int nst = 4;
 #endif
 
     for (int local = blockIdx.x * blockDim.x + threadIdx.x;
diff --git a/makefile_and_run.py b/makefile_and_run.py
index b8971d8..2edd4ef 100755
--- a/makefile_and_run.py
+++ b/makefile_and_run.py
@@ -138,6 +138,7 @@ def _stop_cuda_mps(runtime_env):
 
 def _gpu_runtime_env():
     runtime_env = os.environ.copy()
+    finite_difference = str(getattr(input_data, "Finite_Diffenence_Method", "4th-order")).strip()
 
     defaults = {
         "AMSS_EVOLVE_TIMING": "1",
@@ -160,6 +161,12 @@ def _gpu_runtime_env():
         "AMSS_CUDA_DEVICE_SEGMENT_BATCH": "0",
         "AMSS_CUDA_UNCACHED_DEVICE_BUFFERS": "1",
     }
+    if finite_difference in ("2nd-order", "8th-order"):
+        defaults.update({
+            "AMSS_INTERP_FAST": "0",
+            "AMSS_INTERP_GPU": "0",
+            "AMSS_CUDA_AWARE_MPI": "0",
+        })
     if getattr(input_data, "Equation_Class", "") in ("BSSN", "BSSN-EScalar", "Z4C"):
         defaults["AMSS_CUDA_AMR_RESTRICT_DEVICE"] = "1"
     if getattr(input_data, "Equation_Class", "") == "Z4C":