[TEST]UPSTREAM: Pick some source changes from 48080d0a97

* Sync new folder structure
2026-04-23 20:55:40 +08:00
parent c185f99ee3
commit 17109fde9b
211 changed files with 189504 additions and 189280 deletions
--- a/AMSS_NCKU_source/AHF_Direct/BH_diagnostics.C
+++ b/AMSS_NCKU_source/AHF_Direct/BH_diagnostics.C
@@ -0,0 +1,724 @@
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+#include "myglobal.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "driver.h"
+
+namespace AHFinderDirect
+{
+	using jtutil::error_exit;
+
+	BH_diagnostics::BH_diagnostics()
+		: centroid_x(0.0), centroid_y(0.0), centroid_z(0.0),
+		  quadrupole_xx(0.0), quadrupole_xy(0.0), quadrupole_xz(0.0),
+		  quadrupole_yy(0.0), quadrupole_yz(0.0),
+		  quadrupole_zz(0.0),
+		  min_radius(0.0), max_radius(0.0),
+		  mean_radius(0.0),
+		  min_x(0.0), max_x(0.0),
+		  min_y(0.0), max_y(0.0),
+		  min_z(0.0), max_z(0.0),
+		  circumference_xy(0.0), circumference_xz(0.0), circumference_yz(0.0),
+		  area(0.0), irreducible_mass(0.0), areal_radius(0.0) // no comma
+	{
+	}
+
+	void BH_diagnostics::copy_to_buffer(double buffer[N_buffer])
+		const
+	{
+		buffer[posn__centroid_x] = centroid_x;
+		buffer[posn__centroid_y] = centroid_y;
+		buffer[posn__centroid_z] = centroid_z;
+
+		buffer[posn__quadrupole_xx] = quadrupole_xx;
+		buffer[posn__quadrupole_xy] = quadrupole_xy;
+		buffer[posn__quadrupole_xz] = quadrupole_xz;
+		buffer[posn__quadrupole_yy] = quadrupole_yy;
+		buffer[posn__quadrupole_xz] = quadrupole_yz;
+		buffer[posn__quadrupole_zz] = quadrupole_zz;
+
+		buffer[posn__min_radius] = min_radius;
+		buffer[posn__max_radius] = max_radius;
+		buffer[posn__mean_radius] = mean_radius;
+
+		buffer[posn__min_x] = min_x;
+		buffer[posn__max_x] = max_x;
+		buffer[posn__min_y] = min_y;
+		buffer[posn__max_y] = max_y;
+		buffer[posn__min_z] = min_z;
+		buffer[posn__max_z] = max_z;
+
+		buffer[posn__circumference_xy] = circumference_xy;
+		buffer[posn__circumference_xz] = circumference_xz;
+		buffer[posn__circumference_yz] = circumference_yz;
+
+		buffer[posn__area] = area;
+		buffer[posn__irreducible_mass] = irreducible_mass;
+		buffer[posn__areal_radius] = areal_radius;
+	}
+
+	void BH_diagnostics::copy_from_buffer(const double buffer[N_buffer])
+	{
+		centroid_x = buffer[posn__centroid_x];
+		centroid_y = buffer[posn__centroid_y];
+		centroid_z = buffer[posn__centroid_z];
+
+		quadrupole_xx = buffer[posn__quadrupole_xx];
+		quadrupole_xy = buffer[posn__quadrupole_xy];
+		quadrupole_xz = buffer[posn__quadrupole_xz];
+		quadrupole_yy = buffer[posn__quadrupole_yy];
+		quadrupole_yz = buffer[posn__quadrupole_yz];
+		quadrupole_zz = buffer[posn__quadrupole_zz];
+
+		min_radius = buffer[posn__min_radius];
+		max_radius = buffer[posn__max_radius];
+		mean_radius = buffer[posn__mean_radius];
+
+		min_x = buffer[posn__min_x];
+		max_x = buffer[posn__max_x];
+		min_y = buffer[posn__min_y];
+		max_y = buffer[posn__max_y];
+		min_z = buffer[posn__min_z];
+		max_z = buffer[posn__max_z];
+
+		circumference_xy = buffer[posn__circumference_xy];
+		circumference_xz = buffer[posn__circumference_xz];
+		circumference_yz = buffer[posn__circumference_yz];
+
+		area = buffer[posn__area];
+		irreducible_mass = buffer[posn__irreducible_mass];
+		areal_radius = buffer[posn__areal_radius];
+	}
+	void BH_diagnostics::compute(patch_system &ps)
+	{
+		jtutil::norm<fp> h_norms;
+		ps.ghosted_gridfn_norms(gfns::gfn__h, h_norms);
+		min_radius = h_norms.min_abs_value();
+		max_radius = h_norms.max_abs_value();
+
+		jtutil::norm<fp> x_norms;
+		jtutil::norm<fp> y_norms;
+		jtutil::norm<fp> z_norms;
+
+		ps.gridfn_norms(gfns::gfn__global_x, x_norms);
+		ps.gridfn_norms(gfns::gfn__global_y, y_norms);
+		ps.gridfn_norms(gfns::gfn__global_z, z_norms);
+
+		min_x = x_norms.min_value();
+		max_x = x_norms.max_value();
+		min_y = y_norms.min_value();
+		max_y = y_norms.max_value();
+		min_z = z_norms.min_value();
+		max_z = z_norms.max_value();
+
+// adjust the bounding box for the symmetries
+#define REFLECT(origin_, max_) (origin_ - (max_ - origin_))
+		switch (ps.type())
+		{
+		case patch_system::patch_system__full_sphere:
+			break;
+		case patch_system::patch_system__plus_z_hemisphere:
+			min_z = REFLECT(ps.origin_z(), max_z);
+			break;
+		case patch_system::patch_system__plus_xy_quadrant_mirrored:
+		case patch_system::patch_system__plus_xy_quadrant_rotating:
+			min_x = REFLECT(ps.origin_x(), max_x);
+			min_y = REFLECT(ps.origin_y(), max_y);
+			break;
+		case patch_system::patch_system__plus_xz_quadrant_mirrored:
+		case patch_system::patch_system__plus_xz_quadrant_rotating:
+			min_x = REFLECT(ps.origin_x(), max_x);
+			min_z = REFLECT(ps.origin_z(), max_z);
+			break;
+		case patch_system::patch_system__plus_xyz_octant_mirrored:
+		case patch_system::patch_system__plus_xyz_octant_rotating:
+			min_x = REFLECT(ps.origin_x(), max_x);
+			min_y = REFLECT(ps.origin_y(), max_y);
+			min_z = REFLECT(ps.origin_z(), max_z);
+			break;
+		default:
+			error_exit(PANIC_EXIT,
+					   "***** BH_diagnostics::compute(): unknown patch system type()=(int)%d!\n"
+					   "                                 (this should never happen!)\n",
+					   int(ps.type())); /*NOTREACHED*/
+		}
+
+		//
+		// surface integrals
+		//
+		const fp integral_one = surface_integral(ps,
+												 gfns::gfn__one, true, true, true,
+												 patch::integration_method__automatic_choice);
+		const fp integral_h = surface_integral(ps,
+											   gfns::gfn__h, true, true, true,
+											   patch::integration_method__automatic_choice);
+		const fp integral_x = surface_integral(ps,
+											   gfns::gfn__global_x, true, true, false,
+											   patch::integration_method__automatic_choice);
+		const fp integral_y = surface_integral(ps,
+											   gfns::gfn__global_y, true, false, true,
+											   patch::integration_method__automatic_choice);
+		const fp integral_z = surface_integral(ps,
+											   gfns::gfn__global_z, false, true, true,
+											   patch::integration_method__automatic_choice);
+		const fp integral_xx = surface_integral(ps,
+												gfns::gfn__global_xx, true, true, true,
+												patch::integration_method__automatic_choice);
+		const fp integral_xy = surface_integral(ps,
+												gfns::gfn__global_xy, true, false, false,
+												patch::integration_method__automatic_choice);
+		const fp integral_xz = surface_integral(ps,
+												gfns::gfn__global_xz, false, true, false,
+												patch::integration_method__automatic_choice);
+		const fp integral_yy = surface_integral(ps,
+												gfns::gfn__global_yy, true, true, true,
+												patch::integration_method__automatic_choice);
+		const fp integral_yz = surface_integral(ps,
+												gfns::gfn__global_yz, false, false, true,
+												patch::integration_method__automatic_choice);
+		const fp integral_zz = surface_integral(ps,
+												gfns::gfn__global_zz, true, true, true,
+												patch::integration_method__automatic_choice);
+
+		//
+		// centroids
+		//
+		centroid_x = integral_x / integral_one;
+		centroid_y = integral_y / integral_one;
+		centroid_z = integral_z / integral_one;
+
+		//
+		// quadrupoles (taken about centroid position)
+		//
+		quadrupole_xx = integral_xx / integral_one - centroid_x * centroid_x;
+		quadrupole_xy = integral_xy / integral_one - centroid_x * centroid_y;
+		quadrupole_xz = integral_xz / integral_one - centroid_x * centroid_z;
+		quadrupole_yy = integral_yy / integral_one - centroid_y * centroid_y;
+		quadrupole_yz = integral_yz / integral_one - centroid_y * centroid_z;
+		quadrupole_zz = integral_zz / integral_one - centroid_z * centroid_z;
+
+		//
+		// mean radius of horizon
+		//
+		mean_radius = integral_h / integral_one;
+
+		//
+		// surface area and quantities derived from it
+		//
+		area = integral_one;
+		irreducible_mass = sqrt(area / (16.0 * PI));
+		areal_radius = sqrt(area / (4.0 * PI));
+
+		//
+		// proper circumferences
+		//
+		circumference_xy = ps.circumference("xy", gfns::gfn__h,
+											gfns::gfn__g_dd_11, gfns::gfn__g_dd_12, gfns::gfn__g_dd_13,
+											gfns::gfn__g_dd_22, gfns::gfn__g_dd_23,
+											gfns::gfn__g_dd_33,
+											patch::integration_method__automatic_choice);
+		circumference_xz = ps.circumference("xz", gfns::gfn__h,
+											gfns::gfn__g_dd_11, gfns::gfn__g_dd_12, gfns::gfn__g_dd_13,
+											gfns::gfn__g_dd_22, gfns::gfn__g_dd_23,
+											gfns::gfn__g_dd_33,
+											patch::integration_method__automatic_choice);
+		circumference_yz = ps.circumference("yz", gfns::gfn__h,
+											gfns::gfn__g_dd_11, gfns::gfn__g_dd_12, gfns::gfn__g_dd_13,
+											gfns::gfn__g_dd_22, gfns::gfn__g_dd_23,
+											gfns::gfn__g_dd_33,
+											patch::integration_method__automatic_choice);
+
+		// prepare P^i,S^i in xx,xy,xz and yy,yz,zz
+		{
+			for (int pn = 0; pn < ps.N_patches(); ++pn)
+			{
+				patch &p = ps.ith_patch(pn);
+
+				for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+				{
+					for (int isigma = p.min_isigma();
+						 isigma <= p.max_isigma();
+						 ++isigma)
+					{
+						const fp g_xx = p.gridfn(gfns::gfn__g_dd_11, irho, isigma);
+						const fp g_xy = p.gridfn(gfns::gfn__g_dd_12, irho, isigma);
+						const fp g_xz = p.gridfn(gfns::gfn__g_dd_13, irho, isigma);
+						const fp g_yy = p.gridfn(gfns::gfn__g_dd_22, irho, isigma);
+						const fp g_yz = p.gridfn(gfns::gfn__g_dd_23, irho, isigma);
+						const fp g_zz = p.gridfn(gfns::gfn__g_dd_33, irho, isigma);
+
+						const fp k_xx = p.gridfn(gfns::gfn__K_dd_11, irho, isigma);
+						const fp k_xy = p.gridfn(gfns::gfn__K_dd_12, irho, isigma);
+						const fp k_xz = p.gridfn(gfns::gfn__K_dd_13, irho, isigma);
+						const fp k_yy = p.gridfn(gfns::gfn__K_dd_22, irho, isigma);
+						const fp k_yz = p.gridfn(gfns::gfn__K_dd_23, irho, isigma);
+						const fp k_zz = p.gridfn(gfns::gfn__K_dd_33, irho, isigma);
+						const fp trk = p.gridfn(gfns::gfn__trK, irho, isigma);
+
+						const fp r = p.ghosted_gridfn(gfns::gfn__h, irho, isigma);
+						const fp rho = p.rho_of_irho(irho);
+						const fp sigma = p.sigma_of_isigma(isigma);
+						fp xx, yy, zz; // local Cardesian coordinate
+						p.xyz_of_r_rho_sigma(r, rho, sigma, xx, yy, zz);
+						const fp X_ud_11 = p.partial_rho_wrt_x(xx, yy, zz);
+						const fp X_ud_12 = p.partial_rho_wrt_y(xx, yy, zz);
+						const fp X_ud_13 = p.partial_rho_wrt_z(xx, yy, zz);
+						const fp X_ud_21 = p.partial_sigma_wrt_x(xx, yy, zz);
+						const fp X_ud_22 = p.partial_sigma_wrt_y(xx, yy, zz);
+						const fp X_ud_23 = p.partial_sigma_wrt_z(xx, yy, zz);
+#if 0 // for P^i and S^i
+	  // F,i = x^i/r-X_ud_1i(dh/drho)-X_ud_2i(dh/dsigma)
+		  double nx,ny,nz;
+		  nx = xx/r-X_ud_11*p.partial_rho(gfns::gfn__h, irho,isigma)-X_ud_21*p.partial_sigma(gfns::gfn__h, irho,isigma);
+		  ny = yy/r-X_ud_12*p.partial_rho(gfns::gfn__h, irho,isigma)-X_ud_22*p.partial_sigma(gfns::gfn__h, irho,isigma);
+		  nz = zz/r-X_ud_13*p.partial_rho(gfns::gfn__h, irho,isigma)-X_ud_23*p.partial_sigma(gfns::gfn__h, irho,isigma);
+		  double eps; // volume element
+		  fp g_uu_11, g_uu_12, g_uu_13, g_uu_22, g_uu_23, g_uu_33;
+		  double pxx,pxy,pxz,pyy,pyz,pzz;
+  		    {
+		    fp t1, t2, t4, t5, t7, t8, t11, t12, t14, t15;
+		    fp t18, t21;
+	      	    t1 = g_yy;
+	      	    t2 = g_zz;
+	      	    t4 = g_yz;
+	      	    t5 = t4*t4;
+	      	    t7 = g_xx;
+	      	    t8 = t7*t1;
+	      	    t11 = g_xy;
+	      	    t12 = t11*t11;
+	      	    t14 = g_xz;
+	      	    t15 = t11*t14;
+	      	    t18 = t14*t14;
+		    eps = t8*t2-t7*t5-t12*t2+2.0*t15*t4-t18*t1;
+	      	    t21 = 1/eps;
+		    eps = sqrt(eps);
+	      	    g_uu_11 = (t1*t2-t5)*t21;
+	      	    g_uu_12 = -(t11*t2-t14*t4)*t21;
+	      	    g_uu_13 = -(-t11*t4+t14*t1)*t21;
+	      	    g_uu_22 = (t7*t2-t18)*t21;
+	      	    g_uu_23 = -(t7*t4-t15)*t21;
+	      	    g_uu_33 = (t8-t12)*t21;
+
+		    t5 = g_uu_11*nx*nx+g_uu_22*ny*ny+g_uu_33*nz*nz+2*(g_uu_12*nx*ny+g_uu_13*nx*nz+g_uu_23*ny*nz);
+		    t5 = sqrt(t5);
+		    nx = nx/t5;  // lower index
+		    ny = ny/t5;
+		    nz = nz/t5;
+
+		    pxx= g_uu_11*(g_uu_11*k_xx+g_uu_12*k_xy+g_uu_13*k_xz)
+			+g_uu_12*(g_uu_11*k_xy+g_uu_12*k_yy+g_uu_13*k_yz)
+			+g_uu_13*(g_uu_11*k_xz+g_uu_12*k_yz+g_uu_13*k_zz); //k^xx
+		    pxy= g_uu_11*(g_uu_12*k_xx+g_uu_22*k_xy+g_uu_23*k_xz)
+			+g_uu_12*(g_uu_12*k_xy+g_uu_22*k_yy+g_uu_23*k_yz)
+			+g_uu_13*(g_uu_12*k_xz+g_uu_22*k_yz+g_uu_23*k_zz); //k^xy
+		    pxz= g_uu_11*(g_uu_13*k_xx+g_uu_23*k_xy+g_uu_33*k_xz)
+			+g_uu_12*(g_uu_13*k_xy+g_uu_23*k_yy+g_uu_33*k_yz)
+			+g_uu_13*(g_uu_13*k_xz+g_uu_23*k_yz+g_uu_33*k_zz); //k^xz
+		    pyy= g_uu_12*(g_uu_12*k_xx+g_uu_22*k_xy+g_uu_23*k_xz)
+			+g_uu_22*(g_uu_12*k_xy+g_uu_22*k_yy+g_uu_23*k_yz)
+			+g_uu_23*(g_uu_12*k_xz+g_uu_22*k_yz+g_uu_23*k_zz); //k^yy
+		    pyz= g_uu_12*(g_uu_13*k_xx+g_uu_23*k_xy+g_uu_33*k_xz)
+			+g_uu_22*(g_uu_13*k_xy+g_uu_23*k_yy+g_uu_33*k_yz)
+			+g_uu_23*(g_uu_13*k_xz+g_uu_23*k_yz+g_uu_33*k_zz); //k^yz
+		    pzz= g_uu_13*(g_uu_13*k_xx+g_uu_23*k_xy+g_uu_33*k_xz)
+			+g_uu_23*(g_uu_13*k_xy+g_uu_23*k_yy+g_uu_33*k_yz)
+			+g_uu_33*(g_uu_13*k_xz+g_uu_23*k_yz+g_uu_33*k_zz); //k^zz
+		  }
+
+		  pxx = pxx-g_uu_11*trk; // tracefree
+		  pyy = pyy-g_uu_22*trk;
+		  pzz = pzz-g_uu_33*trk;
+		  double tx,ty,tz;
+		  double sxx,sxy,sxz,syx,syy,syz,szx,szy,szz;
+		  tx = nx*pxx + ny*pxy + nz*pxz;
+		  ty = nx*pxy + ny*pyy + nz*pyz;
+		  tz = nx*pxz + ny*pyz + nz*pzz;
+		  sxx = xx*tx;
+		  sxy = xx*ty;
+		  sxz = xx*tz;
+		  syx = yy*tx;
+		  syy = yy*ty;
+		  syz = yy*tz;
+		  szx = zz*tx;
+	          szy = zz*ty;
+		  szz = zz*tz;
+                  p.gridfn(gfns::gfn__global_xx, irho,isigma) = tx; //p^x
+                  p.gridfn(gfns::gfn__global_xy, irho,isigma) = ty; //p^y
+                  p.gridfn(gfns::gfn__global_xz, irho,isigma) = tz; //p^z
+		  tx = eps*(syz-szy); //s_x
+		  ty = eps*(szx-sxz);
+		  tz = eps*(sxy-syx);
+                  p.gridfn(gfns::gfn__global_yy, irho,isigma) = g_uu_11*tx+g_uu_12*ty+g_uu_13*tz; //s^x
+                  p.gridfn(gfns::gfn__global_yz, irho,isigma) = g_uu_12*tx+g_uu_22*ty+g_uu_23*tz; //s^y
+                  p.gridfn(gfns::gfn__global_zz, irho,isigma) = g_uu_13*tx+g_uu_23*ty+g_uu_33*tz; //s^z
+#endif
+#if 1 // for P_i and S_i
+	  // F,i = x^i/r-X_ud_1i(dh/drho)-X_ud_2i(dh/dsigma)
+						double nx, ny, nz;
+						nx = xx / r - X_ud_11 * p.partial_rho(gfns::gfn__h, irho, isigma) - X_ud_21 * p.partial_sigma(gfns::gfn__h, irho, isigma);
+						ny = yy / r - X_ud_12 * p.partial_rho(gfns::gfn__h, irho, isigma) - X_ud_22 * p.partial_sigma(gfns::gfn__h, irho, isigma);
+						nz = zz / r - X_ud_13 * p.partial_rho(gfns::gfn__h, irho, isigma) - X_ud_23 * p.partial_sigma(gfns::gfn__h, irho, isigma);
+						{
+							fp g_uu_11, g_uu_12, g_uu_13, g_uu_22, g_uu_23, g_uu_33;
+							fp t1, t2, t4, t5, t7, t8, t11, t12, t14, t15;
+							fp t18, t21;
+							t1 = g_yy;
+							t2 = g_zz;
+							t4 = g_yz;
+							t5 = t4 * t4;
+							t7 = g_xx;
+							t8 = t7 * t1;
+							t11 = g_xy;
+							t12 = t11 * t11;
+							t14 = g_xz;
+							t15 = t11 * t14;
+							t18 = t14 * t14;
+							t21 = 1 / (t8 * t2 - t7 * t5 - t12 * t2 + 2.0 * t15 * t4 - t18 * t1);
+							g_uu_11 = (t1 * t2 - t5) * t21;
+							g_uu_12 = -(t11 * t2 - t14 * t4) * t21;
+							g_uu_13 = -(-t11 * t4 + t14 * t1) * t21;
+							g_uu_22 = (t7 * t2 - t18) * t21;
+							g_uu_23 = -(t7 * t4 - t15) * t21;
+							g_uu_33 = (t8 - t12) * t21;
+
+							t1 = g_uu_11 * nx + g_uu_12 * ny + g_uu_13 * nz;
+							t2 = g_uu_12 * nx + g_uu_22 * ny + g_uu_23 * nz;
+							t4 = g_uu_13 * nx + g_uu_23 * ny + g_uu_33 * nz;
+							t5 = g_uu_11 * nx * nx + g_uu_22 * ny * ny + g_uu_33 * nz * nz + 2 * (g_uu_12 * nx * ny + g_uu_13 * nx * nz + g_uu_23 * ny * nz);
+							t5 = sqrt(t5);
+							nx = t1 / t5; // uper index
+							ny = t2 / t5;
+							nz = t4 / t5;
+						}
+
+						double pxx, pxy, pxz, pyy, pyz, pzz;
+						double sxx, sxy, sxz, syx, syy, syz, szx, szy, szz;
+						// these tensor components are same for local Cardisean and global Cardisean
+						pxx = k_xx - g_xx * trk; // lower index
+						pxy = k_xy;
+						pxz = k_xz;
+						pyy = k_yy - g_yy * trk;
+						pyz = k_yz;
+						pzz = k_zz - g_zz * trk;
+						/*
+								  sxx = yy*pxy - zz*pxz;
+								  sxy = yy*pyy - zz*pyz;
+								  sxz = yy*pyz - zz*pzz;
+								  syx = zz*pxy - yy*pxz;
+								  syy = zz*pyy - yy*pyz;
+								  syz = zz*pyz - yy*pzz;
+								  szx = xx*pxy - yy*pxx;
+								  szy = xx*pyy - yy*pxy;
+								  szz = xx*pyz - yy*pxz;
+						*/
+						// we need Cardisean coordinate whose original point coincide with centroid_x^i
+						xx = p.gridfn(gfns::gfn__global_x, irho, isigma) - centroid_x;
+						yy = p.gridfn(gfns::gfn__global_y, irho, isigma) - centroid_y;
+						zz = p.gridfn(gfns::gfn__global_z, irho, isigma) - centroid_z;
+						sxx = yy * pxz - zz * pxy;
+						sxy = zz * pxx - xx * pxz;
+						sxz = xx * pxy - yy * pxx;
+						syx = yy * pyz - zz * pyy;
+						syy = zz * pxy - xx * pyz;
+						syz = xx * pyy - yy * pxy;
+						szx = yy * pzz - zz * pyz;
+						szy = zz * pxz - xx * pzz;
+						szz = xx * pyz - yy * pxz;
+
+						p.gridfn(gfns::gfn__global_xx, irho, isigma) = nx * pxx + ny * pxy + nz * pxz; // p_x
+						p.gridfn(gfns::gfn__global_xy, irho, isigma) = nx * pxy + ny * pyy + nz * pyz; // p_y
+						p.gridfn(gfns::gfn__global_xz, irho, isigma) = nx * pxz + ny * pyz + nz * pzz; // p_z
+						p.gridfn(gfns::gfn__global_yy, irho, isigma) = nx * sxx + ny * syx + nz * szx; // s_x
+						p.gridfn(gfns::gfn__global_yz, irho, isigma) = nx * sxy + ny * syy + nz * szy; // s_y
+						p.gridfn(gfns::gfn__global_zz, irho, isigma) = nx * sxz + ny * syz + nz * szz; // s_z
+#endif
+					}
+				}
+			}
+		}
+
+		Px = surface_integral(ps,
+							  gfns::gfn__global_xx, true, true, false, // z,y,x direction, even or odd function
+							  patch::integration_method__automatic_choice);
+		Py = surface_integral(ps,
+							  gfns::gfn__global_xy, true, false, true,
+							  patch::integration_method__automatic_choice);
+		Pz = surface_integral(ps,
+							  gfns::gfn__global_xz, false, true, true,
+							  patch::integration_method__automatic_choice);
+		Sx = surface_integral(ps,
+							  gfns::gfn__global_yy, false, false, true,
+							  patch::integration_method__automatic_choice);
+		Sy = surface_integral(ps,
+							  gfns::gfn__global_yz, false, true, false,
+							  patch::integration_method__automatic_choice);
+		Sz = surface_integral(ps,
+							  gfns::gfn__global_zz, true, false, false,
+							  patch::integration_method__automatic_choice);
+		const double F1o8pi = 1.0 / 8 / PI;
+		Px = Px * F1o8pi;
+		Py = Py * F1o8pi;
+		Pz = Pz * F1o8pi;
+		Sx = Sx * F1o8pi;
+		Sy = Sy * F1o8pi;
+		Sz = Sz * F1o8pi;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes the surface integral of a gridfn over the
+	// horizon.
+	//
+	fp BH_diagnostics::surface_integral(const patch_system &ps,
+										int src_gfn, bool src_gfn_is_even_across_xy_plane,
+										bool src_gfn_is_even_across_xz_plane,
+										bool src_gfn_is_even_across_yz_plane,
+										enum patch::integration_method method)
+	{
+		return ps.integrate_gridfn(src_gfn, src_gfn_is_even_across_xy_plane,
+								   src_gfn_is_even_across_xz_plane,
+								   src_gfn_is_even_across_yz_plane,
+								   gfns::gfn__h,
+								   gfns::gfn__g_dd_11, gfns::gfn__g_dd_12, gfns::gfn__g_dd_13,
+								   gfns::gfn__g_dd_22, gfns::gfn__g_dd_23,
+								   gfns::gfn__g_dd_33,
+								   method);
+	}
+	// with triad theta and phi
+	// since Thornburg uses vertex center, we will meet nan at pole points
+	void BH_diagnostics::compute_signature(patch_system &ps, const double dT)
+	{
+		for (int pn = 0; pn < ps.N_patches(); ++pn)
+		{
+			patch &p = ps.ith_patch(pn);
+
+			for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+				for (int isigma = p.min_isigma(); isigma <= p.max_isigma(); ++isigma)
+				{
+					const fp r = p.ghosted_gridfn(gfns::gfn__h, irho, isigma);
+					const fp rho = p.rho_of_irho(irho);
+					const fp sigma = p.sigma_of_isigma(isigma);
+					fp xx, yy, zz;
+					p.xyz_of_r_rho_sigma(r, rho, sigma, xx, yy, zz);
+
+					const fp sintheta = sqrt(1 - zz * zz / r / r);
+
+					const fp X_ud_11 = xx * zz / r / r / sqrt(xx * xx + yy * yy);
+					const fp X_ud_12 = yy * zz / r / r / sqrt(xx * xx + yy * yy);
+					const fp X_ud_13 = -sqrt(xx * xx + yy * yy) / r / r;
+					const fp X_ud_21 = -yy / (xx * xx + yy * yy);
+					const fp X_ud_22 = xx / (xx * xx + yy * yy);
+					const fp X_ud_23 = 0;
+
+					const fp g_dd_11 = p.gridfn(gfns::gfn__g_dd_11, irho, isigma);
+					const fp g_dd_12 = p.gridfn(gfns::gfn__g_dd_12, irho, isigma);
+					const fp g_dd_13 = p.gridfn(gfns::gfn__g_dd_13, irho, isigma);
+					const fp g_dd_22 = p.gridfn(gfns::gfn__g_dd_22, irho, isigma);
+					const fp g_dd_23 = p.gridfn(gfns::gfn__g_dd_23, irho, isigma);
+					const fp g_dd_33 = p.gridfn(gfns::gfn__g_dd_33, irho, isigma);
+
+					const fp Lap = 1.0 + p.gridfn(gfns::gfn__global_xx, irho, isigma);
+					const fp Sfx = p.gridfn(gfns::gfn__global_xy, irho, isigma);
+					const fp Sfy = p.gridfn(gfns::gfn__global_xz, irho, isigma);
+					const fp Sfz = p.gridfn(gfns::gfn__global_yy, irho, isigma);
+
+					const fp dfdt = (r - p.gridfn(gfns::gfn__oldh, irho, isigma)) / dT;
+
+					double Br = Sfx * xx / r + Sfy * yy / r + Sfz * zz / r;
+					double Brho = Sfx * X_ud_11 + Sfy * X_ud_12 + Sfz * X_ud_13;
+					double Bsigma = Sfx * X_ud_21 + Sfy * X_ud_22 + Sfz * X_ud_23;
+
+					double g_uu_11, g_uu_12, g_uu_13, g_uu_22, g_uu_23, g_uu_33;
+					double g11, g12, g13, g22, g23, g33;
+					{
+						// g^uu
+						fp t1, t2, t4, t5, t7, t8, t11, t12, t14, t15;
+						fp t18, t21;
+						t1 = g_dd_22;
+						t2 = g_dd_33;
+						t4 = g_dd_23;
+						t5 = t4 * t4;
+						t7 = g_dd_11;
+						t8 = t7 * t1;
+						t11 = g_dd_12;
+						t12 = t11 * t11;
+						t14 = g_dd_13;
+						t15 = t11 * t14;
+						t18 = t14 * t14;
+						t21 = 1 / (t8 * t2 - t7 * t5 - t12 * t2 + 2.0 * t15 * t4 - t18 * t1);
+						g11 = (t1 * t2 - t5) * t21;
+						g12 = -(t11 * t2 - t14 * t4) * t21;
+						g13 = -(-t11 * t4 + t14 * t1) * t21;
+						g22 = (t7 * t2 - t18) * t21;
+						g23 = -(t7 * t4 - t15) * t21;
+						g33 = (t8 - t12) * t21;
+					}
+					// 1 r;2 rho; 3 sigma
+					g_uu_22 = (g11 * X_ud_11 + g12 * X_ud_12 + g13 * X_ud_13) * X_ud_11 + (g12 * X_ud_11 + g22 * X_ud_12 + g23 * X_ud_13) * X_ud_12 + (g13 * X_ud_11 + g23 * X_ud_12 + g33 * X_ud_13) * X_ud_13;
+					g_uu_23 = (g11 * X_ud_11 + g12 * X_ud_12 + g13 * X_ud_13) * X_ud_21 + (g12 * X_ud_11 + g22 * X_ud_12 + g23 * X_ud_13) * X_ud_22 + (g13 * X_ud_11 + g23 * X_ud_12 + g33 * X_ud_13) * X_ud_23;
+					g_uu_12 = (g11 * X_ud_11 + g12 * X_ud_12 + g13 * X_ud_13) * xx / r + (g12 * X_ud_11 + g22 * X_ud_12 + g23 * X_ud_13) * yy / r + (g13 * X_ud_11 + g23 * X_ud_12 + g33 * X_ud_13) * zz / r;
+					g_uu_33 = (g11 * X_ud_21 + g12 * X_ud_22 + g13 * X_ud_23) * X_ud_21 + (g12 * X_ud_21 + g22 * X_ud_22 + g23 * X_ud_23) * X_ud_22 + (g13 * X_ud_21 + g23 * X_ud_22 + g33 * X_ud_23) * X_ud_23;
+					g_uu_13 = (g11 * X_ud_21 + g12 * X_ud_22 + g13 * X_ud_23) * xx / r + (g12 * X_ud_21 + g22 * X_ud_22 + g23 * X_ud_23) * yy / r + (g13 * X_ud_21 + g23 * X_ud_22 + g33 * X_ud_23) * zz / r;
+					g_uu_11 = (g11 * xx / r + g12 * yy / r + g13 * zz / r) * xx / r + (g12 * xx / r + g22 * yy / r + g23 * zz / r) * yy / r + (g13 * xx / r + g23 * yy / r + g33 * zz / r) * zz / r;
+					{
+						// g_uu
+						fp t1, t2, t4, t5, t7, t8, t11, t12, t14, t15;
+						fp t18, t21;
+						t1 = g_uu_22;
+						t2 = g_uu_33;
+						t4 = g_uu_23;
+						t5 = t4 * t4;
+						t7 = g_uu_11;
+						t8 = t7 * t1;
+						t11 = g_uu_12;
+						t12 = t11 * t11;
+						t14 = g_uu_13;
+						t15 = t11 * t14;
+						t18 = t14 * t14;
+						t21 = 1 / (t8 * t2 - t7 * t5 - t12 * t2 + 2.0 * t15 * t4 - t18 * t1);
+						g11 = (t1 * t2 - t5) * t21;
+						g12 = -(t11 * t2 - t14 * t4) * t21;
+						g13 = -(-t11 * t4 + t14 * t1) * t21;
+						g22 = (t7 * t2 - t18) * t21;
+						g23 = -(t7 * t4 - t15) * t21;
+						g33 = (t8 - t12) * t21;
+					}
+
+					double q11 = g22, q12 = g23, q13 = Br + dfdt * g12;
+					double q22 = g33, q23 = Bsigma + dfdt * g13;
+					double q33 = (-Lap * Lap + g11 * Br * Br + g22 * Brho * Brho + g33 * Bsigma * Bsigma +
+								  2 * (g12 * Br * Brho + g13 * Br * Bsigma + g23 * Brho * Bsigma)) +
+								 2 * dfdt * Br + dfdt * dfdt * g11;
+					q12 = q12 / sintheta;
+					q22 = q22 / sintheta / sintheta;
+					q23 = q23 / sintheta;
+					// we use gfns::gfn__global_zz to store determinant
+					p.gridfn(gfns::gfn__global_zz, irho, isigma) = q11 * q22 * q33 + q12 * q23 * q13 + q13 * q12 * q23 - q13 * q22 * q13 - q12 * q12 * q33 - q11 * q23 * q23;
+				} // end for irho isigma
+		}
+	}
+	FILE *BH_diagnostics::setup_output_file(int N_horizons, int hn)
+		const
+	{
+		char file_name_buffer[50];
+		sprintf(file_name_buffer, "infoah%02d.dat", hn);
+		const char *const file_open_mode = "w";
+
+		FILE *fileptr = fopen(file_name_buffer, file_open_mode);
+		if (fileptr == NULL)
+			printf("\n"
+				   "   BH_diagnostics::setup_output_file():\n"
+				   "        can't open BH-diagnostics output file\n"
+				   "        \"%s\"!",
+				   file_name_buffer);
+		/*
+		fprintf(fileptr, "# apparent horizon %d/%d\n", hn, N_horizons);
+		fprintf(fileptr, "#\n");
+		fprintf(fileptr, "# column  1 = cctk_time\n");
+		fprintf(fileptr, "# column  2 = centroid_x\n");
+		fprintf(fileptr, "# column  3 = centroid_y\n");
+		fprintf(fileptr, "# column  4 = centroid_z\n");
+		fprintf(fileptr, "# column  5 = min radius\n");
+		fprintf(fileptr, "# column  6 = max radius\n");
+		fprintf(fileptr, "# column  7 = mean radius\n");
+		fprintf(fileptr, "# column  8 = quadrupole_xx\n");
+		fprintf(fileptr, "# column  9 = quadrupole_xy\n");
+		fprintf(fileptr, "# column 10 = quadrupole_xz\n");
+		fprintf(fileptr, "# column 11 = quadrupole_yy\n");
+		fprintf(fileptr, "# column 12 = quadrupole_yz\n");
+		fprintf(fileptr, "# column 13 = quadrupole_zz\n");
+		fprintf(fileptr, "# column 14 = min x\n");
+		fprintf(fileptr, "# column 15 = max x\n");
+		fprintf(fileptr, "# column 16 = min y\n");
+		fprintf(fileptr, "# column 17 = max y\n");
+		fprintf(fileptr, "# column 18 = min z\n");
+		fprintf(fileptr, "# column 19 = max z\n");
+		fprintf(fileptr, "# column 20 = xy-plane circumference\n");
+		fprintf(fileptr, "# column 21 = xz-plane circumference\n");
+		fprintf(fileptr, "# column 22 = yz-plane circumference\n");
+		fprintf(fileptr, "# column 23 = ratio of xz/xy-plane circumferences\n");
+		fprintf(fileptr, "# column 24 = ratio of yz/xy-plane circumferences\n");
+		fprintf(fileptr, "# column 25 = area\n");
+		fprintf(fileptr, "# column 26 = irreducible mass\n");
+		fprintf(fileptr, "# column 27 = areal radius\n");
+		*/
+
+		fprintf(fileptr, "#time Mass x y z Px Py Pz Sx Sy Sz\n");
+		fflush(fileptr);
+
+		return fileptr;
+	}
+	void BH_diagnostics::output(FILE *fileptr, double time)
+		const
+	{
+		assert(fileptr != NULL);
+		/*
+		fprintf(fileptr,
+			"%f\t%f\t%f\t%f\t%#.10g\t%#.10g\t%#.10g\t",
+			double(time),
+			double(centroid_x), double(centroid_y), double(centroid_z),
+			double(min_radius), double(max_radius), double(mean_radius));
+
+		fprintf(fileptr,
+			"%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t",
+			double(quadrupole_xx), double(quadrupole_xy), double(quadrupole_xz),
+						   double(quadrupole_yy), double(quadrupole_yz),
+									  double(quadrupole_zz));
+
+		fprintf(fileptr,
+			"%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t",
+			double(min_x), double(max_x),
+			double(min_y), double(max_y),
+			double(min_z), double(max_z));
+
+		fprintf(fileptr,
+			"%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t",
+			double(circumference_xy),
+			double(circumference_xz),
+			double(circumference_yz),
+			double(circumference_xz / circumference_xy),
+			double(circumference_yz / circumference_xy));
+
+		fprintf(fileptr,
+			"%#.10g\t%#.10g\t%#.10g\n",
+			double(area), double(irreducible_mass), double(areal_radius));
+		*/
+
+		fprintf(fileptr,
+				"%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\t%#.10g\n",
+				double(time), double(irreducible_mass),
+				double(centroid_x), double(centroid_y), double(centroid_z),
+				double(Px), double(Py), double(Pz), double(Sx), double(Sy), double(Sz));
+
+		fflush(fileptr);
+	}
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/BH_diagnostics.h
+++ b/AMSS_NCKU_source/AHF_Direct/BH_diagnostics.h
@@ -0,0 +1,101 @@
+#ifndef BH_DIAGNOSTICS_H
+#define BH_DIAGNOSTICS_H
+namespace AHFinderDirect
+{
+
+	struct BH_diagnostics
+	{
+	public:
+		// mean x,y,z
+		fp centroid_x, centroid_y, centroid_z;
+
+		// these are quadrupole moments about the centroid, i.e.
+		// mean(xi*xj) - centroid_i*centroid_j
+		fp quadrupole_xx, quadrupole_xy, quadrupole_xz,
+			quadrupole_yy, quadrupole_yz,
+			quadrupole_zz;
+
+		// min,max,mean surface radius about local coordinate origin
+		fp min_radius, max_radius, mean_radius;
+
+		// xyz bounding box
+		fp min_x, max_x,
+			min_y, max_y,
+			min_z, max_z;
+
+		// proper circumference
+		// (computed using induced metric along these local-coordinate planes)
+		fp circumference_xy,
+			circumference_xz,
+			circumference_yz;
+
+		// surface area (computed using induced metric)
+		// and quantities derived from it
+		fp area, irreducible_mass, areal_radius;
+
+		double Px, Py, Pz, Sx, Sy, Sz;
+
+	public:
+		// position of diagnostics in buffer and number of diagnostics
+		enum
+		{
+			posn__centroid_x = 0,
+			posn__centroid_y,
+			posn__centroid_z,
+			posn__quadrupole_xx,
+			posn__quadrupole_xy,
+			posn__quadrupole_xz,
+			posn__quadrupole_yy,
+			posn__quadrupole_yz,
+			posn__quadrupole_zz,
+			posn__min_radius,
+			posn__max_radius,
+			posn__mean_radius,
+
+			posn__min_x,
+			posn__max_x,
+			posn__min_y,
+			posn__max_y,
+			posn__min_z,
+			posn__max_z,
+
+			posn__circumference_xy,
+			posn__circumference_xz,
+			posn__circumference_yz,
+
+			posn__area,
+			posn__irreducible_mass,
+			posn__areal_radius,
+
+			N_buffer // no comma	// size of buffer
+		};
+
+		// copy diagnostics to/from buffer
+		void copy_to_buffer(double buffer[N_buffer]) const;
+		void copy_from_buffer(const double buffer[N_buffer]);
+
+	public:
+		void compute(patch_system &ps);
+
+		void compute_signature(patch_system &ps, const double dT);
+
+		FILE *setup_output_file(int N_horizons, int hn)
+			const;
+
+		void output(FILE *fileptr, double time)
+			const;
+
+		BH_diagnostics();
+
+	private:
+		static double surface_integral(const patch_system &ps,
+									   int src_gfn, bool src_gfn_is_even_across_xy_plane,
+									   bool src_gfn_is_even_across_xz_plane,
+									   bool src_gfn_is_even_across_yz_plane,
+									   enum patch::integration_method method);
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* BH_DIAGNOSTICS_H */
--- a/AMSS_NCKU_source/AHF_Direct/FFT.f90
+++ b/AMSS_NCKU_source/AHF_Direct/FFT.f90
@@ -0,0 +1,87 @@
+
+
+#if 0
+program checkFFT
+use dfport
+implicit none
+double precision::x
+integer,parameter::N=256
+double precision,dimension(N*2)::p
+double precision,dimension(N/2)::s
+integer::ncount,j,idum
+character(len=8)::tt
+tt=clock()
+idum=iachar(tt(8:8))-48
+p=0.0
+open(77,file='prime.dat',status='unknown')
+loop1:do ncount=1,N
+   x=ran(idum)
+   p(2*ncount-1)=x
+   write(77,'(f15.3)')x
+enddo loop1
+close(77)
+call four1(p,N,1)
+do j=1,N/2
+  s(j)=p(2*j)*p(2*j)+p(2*j-1)*p(2*j-1)
+enddo
+x=0.0
+do j=1,N/2
+  x=x+s(j)
+enddo
+s=s/x
+open(77,file='power.dat',status='unknown')
+do j=1,N/2
+  write(77,'(2(1x,f15.3))')dble(j-1)/dble(N),s(j)
+enddo
+close(77)
+end program checkFFT
+#endif
+
+!-------------
+! Optimized FFT using Intel oneMKL DFTI
+! Mathematical equivalence: Standard DFT definition
+!   Forward (isign=1):  X[k] = sum_{n=0}^{N-1} x[n] * exp(-2*pi*i*k*n/N)
+!   Backward (isign=-1): X[k] = sum_{n=0}^{N-1} x[n] * exp(+2*pi*i*k*n/N)
+! Input/Output: dataa is interleaved complex array [Re(0),Im(0),Re(1),Im(1),...]
+!-------------
+SUBROUTINE four1(dataa,nn,isign)
+use MKL_DFTI
+implicit none
+INTEGER, intent(in) :: isign, nn
+DOUBLE PRECISION, dimension(2*nn), intent(inout) :: dataa
+
+type(DFTI_DESCRIPTOR), pointer :: desc
+integer :: status
+
+! Create DFTI descriptor for 1D complex-to-complex transform
+status = DftiCreateDescriptor(desc, DFTI_DOUBLE, DFTI_COMPLEX, 1, nn)
+if (status /= 0) return
+
+! Set input/output storage as interleaved complex (default)
+status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_INPLACE)
+if (status /= 0) then
+   status = DftiFreeDescriptor(desc)
+   return
+endif
+
+! Commit the descriptor
+status = DftiCommitDescriptor(desc)
+if (status /= 0) then
+   status = DftiFreeDescriptor(desc)
+   return
+endif
+
+! Execute FFT based on direction
+if (isign == 1) then
+   ! Forward FFT: exp(-2*pi*i*k*n/N)
+   status = DftiComputeForward(desc, dataa)
+else
+   ! Backward FFT: exp(+2*pi*i*k*n/N)
+   status = DftiComputeBackward(desc, dataa)
+endif
+
+! Free descriptor
+status = DftiFreeDescriptor(desc)
+
+return
+END SUBROUTINE four1
--- a/AMSS_NCKU_source/AHF_Direct/IntPnts.C
+++ b/AMSS_NCKU_source/AHF_Direct/IntPnts.C
@@ -0,0 +1,97 @@
+//$Id: IntPnts.C,v 1.1 2012/04/03 10:49:42 zjcao Exp $
+
+#include "macrodef.h"
+#ifdef With_AHF
+
+#include <math.h>
+#include <stdio.h>
+
+#include <iostream>
+using namespace std;
+
+#include "myglobal.h"
+
+namespace AHFinderDirect
+{
+  extern struct state state;
+  int globalInterpGFL(double *X, double *Y, double *Z, int Ns,
+                      double *Data)
+  {
+    if (Ns == 0)
+      return 0;
+    int n;
+    double *pox[3];
+    for (int i = 0; i < 3; i++)
+      pox[i] = new double[Ns];
+    for (n = 0; n < Ns; n++)
+    {
+      pox[0][n] = X[n];
+      pox[1][n] = Y[n];
+      pox[2][n] = Z[n];
+    }
+
+    const int InList = 35;
+
+    double *datap;
+    datap = new double[Ns * InList];
+    if (!(state.ADM->AH_Interp_Points(state.AHList, Ns, pox, datap, state.Symmetry)))
+      return 0;
+    // reform data
+    for (int pnt = 0; pnt < Ns; pnt++)
+      for (int ii = 0; ii < InList; ii++)
+      {
+        if (ii == 0 || ii == 12 || ii == 20)
+          Data[pnt + ii * Ns] = datap[ii + pnt * InList] + 1;
+        else if (ii == 24) // from chi-1 to psi
+          Data[pnt + ii * Ns] = pow(datap[ii + pnt * InList] + 1, -0.25);
+        else if (ii == 25 || ii == 26 || ii == 27) // from chi,i to psi,i
+          Data[pnt + ii * Ns] = -pow(datap[24 + pnt * InList] + 1, -1.25) / 4 * datap[ii + pnt * InList];
+        else
+          Data[pnt + ii * Ns] = datap[ii + pnt * InList];
+      }
+    delete[] datap;
+
+    delete[] pox[0];
+    delete[] pox[1];
+    delete[] pox[2];
+
+    return 1;
+  }
+  // inerpolate lapse and shift
+  int globalInterpGFLlash(double *X, double *Y, double *Z, int Ns,
+                          double *Data)
+  {
+    if (Ns == 0)
+      return 0;
+    int n;
+    double *pox[3];
+    for (int i = 0; i < 3; i++)
+      pox[i] = new double[Ns];
+    for (n = 0; n < Ns; n++)
+    {
+      pox[0][n] = X[n];
+      pox[1][n] = Y[n];
+      pox[2][n] = Z[n];
+    }
+
+    double SYM = 1.0, ANT = -1.0;
+    const int InList = 4;
+
+    double *datap;
+    datap = new double[Ns * InList];
+    state.ADM->AH_Interp_Points(state.GaugeList, Ns, pox, datap, state.Symmetry);
+    // reform data
+    for (int pnt = 0; pnt < Ns; pnt++)
+      for (int ii = 0; ii < InList; ii++)
+        Data[pnt + ii * Ns] = datap[ii + pnt * InList];
+
+    delete[] datap;
+    delete[] pox[0];
+    delete[] pox[1];
+    delete[] pox[2];
+
+    return 1;
+  }
+
+} // namespace AHFinderDirect
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/IntPnts0.C
+++ b/AMSS_NCKU_source/AHF_Direct/IntPnts0.C
@@ -0,0 +1,43 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include <mpi.h>
+
+#include "myglobal.h"
+
+int CCTK_VInfo(const char *thorn, const char *format, ...)
+{
+   int myrank;
+   MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+   if (myrank !=0) return 0;
+   
+   va_list ap;
+   va_start (ap, format);
+   fprintf (stdout, "INFO (%s): ", thorn);
+   vfprintf (stdout, format, ap);
+   fprintf (stdout, "\n");
+   va_end (ap);
+   return 0;
+}
+int CCTK_VWarn (int level,
+                int line,
+                const char *file,
+                const char *thorn,
+                const char *format,
+                ...) 
+{  
+   int myrank;
+   MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+   if (myrank !=0) return 0;
+   
+   va_list ap;
+   va_start (ap, format);
+   fprintf (stdout, "WARN (%s): ", thorn);
+   vfprintf (stdout, format, ap);
+   fprintf (stdout, "\n");
+   va_end (ap);
+   return 0;
+}
--- a/AMSS_NCKU_source/AHF_Direct/Jacobian.C
+++ b/AMSS_NCKU_source/AHF_Direct/Jacobian.C
@@ -0,0 +1,270 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+#include "ilucg.h"
+// all the code in this file is inside this namespace
+namespace AHFinderDirect
+{
+	// this represents a single element stored in the matrix for
+	// sort_row_into_column_order()  and  sort_row_into_column_order__cmp()
+	struct matrix_element
+	{
+		int JA;
+		fp A;
+	};
+
+	Jacobian::Jacobian(patch_system &ps)
+		: ps_(ps),
+		  N_rows_(ps.N_grid_points()),
+		  N_nonzeros_(0), current_N_rows_(0), N_nonzeros_allocated_(0),
+		  IA_(new integer[N_rows_ + 1]), JA_(NULL), A_(NULL),
+		  itemp_(NULL), rtemp_(NULL)
+	{
+		IO_ = 1;
+		zero_matrix();
+	}
+
+	Jacobian::~Jacobian()
+	{
+		if (A_)
+			delete[] A_;
+		if (JA_)
+			delete[] JA_;
+		if (IA_)
+			delete[] IA_;
+		if (rtemp_)
+			delete[] rtemp_;
+		if (itemp_)
+			delete[] itemp_;
+	}
+
+	double Jacobian::element(int II, int JJ)
+		const
+	{
+		const int posn = find_element(II, JJ);
+		return (posn >= 0) ? A_[posn] : 0.0;
+	}
+
+	void Jacobian::zero_matrix()
+	{
+
+		N_nonzeros_ = 0;
+		current_N_rows_ = 0;
+		IA_[0] = IO_;
+	}
+
+	void Jacobian::set_element(int II, int JJ, fp value)
+	{
+		const int posn = find_element(II, JJ);
+		if (posn >= 0)
+			then A_[posn] = value;
+		else
+			insert_element(II, JJ, value);
+	}
+
+	void Jacobian::sum_into_element(int II, int JJ, fp value)
+	{
+		const int posn = find_element(II, JJ);
+		if (posn >= 0)
+			then A_[posn] += value;
+		else
+			insert_element(II, JJ, value);
+	}
+
+	int Jacobian::find_element(int II, int JJ)
+		const
+	{
+		if (II >= current_N_rows_)
+			then return -1; // this row not defined yet
+
+		const int start = IA_[II] - IO_;
+		const int stop = IA_[II + 1] - IO_;
+		for (int posn = start; posn < stop; ++posn)
+		{
+			if (JA_[posn] - IO_ == JJ)
+				then return posn; // found
+		}
+
+		return -1; // not found
+	}
+
+	int Jacobian::insert_element(int II, int JJ, double value)
+	{
+		if (!((II == current_N_rows_ - 1) || (II == current_N_rows_)))
+		{
+			printf(
+				"***** row_sparse_Jacobian::insert_element(II=%d, JJ=%d, value=%g):\n"
+				"        attempt to insert element elsewhere than {last row, last row+1}!\n"
+				"        N_rows_=%d   current_N_rows_=%d   IO_=%d\n"
+				"        N_nonzeros_=%d   N_nonzeros_allocated_=%d\n",
+				II, JJ, double(value),
+				N_rows_, current_N_rows_, IO_,
+				N_nonzeros_, N_nonzeros_allocated_);
+			abort();
+		}
+
+		// start a new row if necessary
+		if (II == current_N_rows_)
+			then
+			{
+				assert(current_N_rows_ < N_rows_);
+				IA_[current_N_rows_ + 1] = IA_[current_N_rows_];
+				++current_N_rows_;
+			}
+
+		// insert into current row
+		assert(II == current_N_rows_ - 1);
+		if (IA_[II + 1] - IO_ >= N_nonzeros_allocated_)
+			then grow_arrays();
+		const int posn = IA_[II + 1] - IO_;
+		assert(posn < N_nonzeros_allocated_);
+		JA_[posn] = JJ + IO_;
+		A_[posn] = value;
+		++IA_[II + 1];
+		++N_nonzeros_;
+
+		return posn;
+	}
+
+	void Jacobian::grow_arrays()
+	{
+		N_nonzeros_allocated_ += base_growth_amount + (N_nonzeros_allocated_ >> 1);
+
+		int *const new_JA = new int[N_nonzeros_allocated_];
+		double *const new_A = new double[N_nonzeros_allocated_];
+		for (int posn = 0; posn < N_nonzeros_; ++posn)
+		{
+			new_JA[posn] = JA_[posn];
+			new_A[posn] = A_[posn];
+		}
+		delete[] A_;
+		delete[] JA_;
+		JA_ = new_JA;
+		A_ = new_A;
+	}
+
+	int compare_matrix_elements(const void *x, const void *y)
+	{
+		const struct matrix_element *const px = static_cast<const struct matrix_element *>(x);
+		const struct matrix_element *const py = static_cast<const struct matrix_element *>(y);
+
+		return px->JA - py->JA;
+	}
+
+	void Jacobian::sort_each_row_into_column_order()
+	{
+		// buffer must be big enough to hold the largest row
+		int max_N_in_row = 0;
+		{
+			for (int II = 0; II < N_rows_; ++II)
+			{
+				max_N_in_row = max(max_N_in_row, IA_[II + 1] - IA_[II]);
+			}
+		}
+
+		// contiguous buffer for sorting
+		struct matrix_element *const buffer = new struct matrix_element[max_N_in_row];
+
+		{
+			for (int II = 0; II < N_rows_; ++II)
+			{
+				const int N_in_row = IA_[II + 1] - IA_[II];
+
+				// copy this row's JA_[] and A_[] values to the buffer
+				const int start = IA_[II] - IO_;
+				for (int p = 0; p < N_in_row; ++p)
+				{
+					const int posn = start + p;
+					buffer[p].JA = JA_[posn];
+					buffer[p].A = A_[posn];
+				}
+
+				// sort the buffer
+				qsort(static_cast<void *>(buffer), N_in_row, sizeof(buffer[0]),
+					  &compare_matrix_elements);
+
+				// copy the buffer values back to this row's JA_[] and A_[]
+				for (int p = 0; p < N_in_row; ++p)
+				{
+					const int posn = start + p;
+					JA_[posn] = buffer[p].JA;
+					A_[posn] = buffer[p].A;
+				}
+			}
+		}
+
+		delete[] buffer;
+	}
+
+	double Jacobian::solve_linear_system(int rhs_gfn, int x_gfn, bool print_msg_flag)
+	{
+		assert(IO_ == Fortran_index_origin);
+		assert(current_N_rows_ == N_rows_);
+
+		if (itemp_ == NULL)
+			then
+			{
+				itemp_ = new int[3 * N_rows_ + 3 * N_nonzeros_ + 2];
+				rtemp_ = new double[4 * N_rows_ + N_nonzeros_];
+			}
+
+		// initial guess = all zeros
+		double *x = ps_.gridfn_data(x_gfn);
+		for (int II = 0; II < N_rows_; ++II)
+		{
+			x[II] = 0.0;
+		}
+
+		const int N = N_rows_;
+		const double *rhs = ps_.gridfn_data(rhs_gfn);
+		const double eps = 1e-10;
+		const int max_iterations = N_rows_;
+		int istatus;
+
+		// the actual linear solution
+		f_ilucg(N,
+				IA_, JA_, A_,
+				rhs, x,
+				itemp_, rtemp_,
+				eps, max_iterations,
+				istatus);
+
+		if (istatus < 0)
+		{
+			printf(
+				"***** row_sparse_Jacobian__ILUCG::solve_linear_system(rhs_gfn=%d, x_gfn=%d):\n"
+				"        error return from [sd]ilucg() routine!\n"
+				"        istatus=%d < 0 ==> bad matrix structure, eg. zero diagonal element!\n",
+				rhs_gfn, x_gfn,
+				int(istatus));
+			abort();
+		}
+
+		return -1.0;
+	}
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/Jacobian.h
+++ b/AMSS_NCKU_source/AHF_Direct/Jacobian.h
@@ -0,0 +1,90 @@
+#ifndef AHFINDERDIRECT__JACOBIAN_HH
+#define AHFINDERDIRECT__JACOBIAN_HH
+
+namespace AHFinderDirect
+{
+	class Jacobian
+	{
+	public:
+		// basic meta-info
+		patch_system &my_patch_system() const { return ps_; }
+		int N_rows() const { return N_rows_; }
+
+		// convert (patch,irho,isigma) <--> row/column index
+		int II_of_patch_irho_isigma(const patch &p, int irho, int isigma)
+			const
+		{
+			return ps_.gpn_of_patch_irho_isigma(p, irho, isigma);
+		}
+		const patch &patch_irho_isigma_of_II(int II, int &irho, int &isigma)
+			const
+		{
+			return ps_.patch_irho_isigma_of_gpn(II, irho, isigma);
+		}
+
+		double element(int II, int JJ) const;
+
+		// is the matrix element (II,JJ) stored explicitly?
+		bool is_explicitly_stored(int II, int JJ) const
+		{
+			return find_element(II, JJ) > 0;
+		}
+
+		int IO() const { return IO_; }
+		enum
+		{
+			C_index_origin = 0,
+			Fortran_index_origin = 1
+		};
+
+		void zero_matrix();
+
+		void set_element(int II, int JJ, fp value);
+
+		void sum_into_element(int II, int JJ, fp value);
+
+		int find_element(int II, int JJ) const;
+
+		int insert_element(int II, int JJ, fp value);
+
+		void grow_arrays();
+
+		enum
+		{
+			base_growth_amount = 1000
+		};
+
+		void sort_each_row_into_column_order();
+
+		double solve_linear_system(int rhs_gfn, int x_gfn,
+								   bool print_msg_flag);
+
+	public:
+		Jacobian(patch_system &ps);
+		~Jacobian();
+
+	protected:
+		patch_system &ps_;
+		int N_rows_;
+
+		int IO_;
+
+		int N_nonzeros_;
+		int current_N_rows_;
+
+		int N_nonzeros_allocated_;
+
+		int *IA_;
+
+		int *JA_;
+
+		double *A_;
+
+		int *itemp_;
+		double *rtemp_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* AHFINDERDIRECT__JACOBIAN_HH */
--- a/AMSS_NCKU_source/AHF_Direct/Newton.C
+++ b/AMSS_NCKU_source/AHF_Direct/Newton.C
@@ -0,0 +1,555 @@
+//$Id: Newton.C,v 1.1 2012/04/03 10:49:44 zjcao Exp $
+
+#include "macrodef.h"
+#ifdef With_AHF
+
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+#include <mpi.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "driver.h"
+#include "myglobal.h"
+
+namespace AHFinderDirect
+{
+	extern struct state state;
+	using jtutil::error_exit;
+
+	void recentering(patch_system &ps, double max_x, double max_y, double max_z,
+					 double min_x, double min_y, double min_z,
+					 double centroid_x, double centroid_y, double centroid_z)
+	{
+		fp ox = ps.origin_x();
+		fp oy = ps.origin_y();
+		fp oz = ps.origin_z();
+
+		const fp CTR_TOLERENCE = .45;
+		bool center = (abs(max_x + min_x - 2.0 * ox) < CTR_TOLERENCE * (max_x - min_x)) &&
+					  (abs(max_y + min_y - 2.0 * oy) < CTR_TOLERENCE * (max_y - min_y)) &&
+					  (abs(max_z + min_z - 2.0 * oz) < CTR_TOLERENCE * (max_z - min_z));
+
+		if (!center)
+		{
+
+			for (int pn = 0; pn < ps.N_patches(); ++pn)
+			{
+				patch &p = ps.ith_patch(pn);
+
+				for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+					for (int isigma = p.min_isigma(); isigma <= p.max_isigma(); ++isigma)
+					{
+
+						p.ghosted_gridfn(gfns::gfn__h, irho, isigma) =
+							sqrt(jtutil::pow2(p.gridfn(gfns::gfn__global_x, irho, isigma) - centroid_x) +
+								 jtutil::pow2(p.gridfn(gfns::gfn__global_y, irho, isigma) - centroid_y) +
+								 jtutil::pow2(p.gridfn(gfns::gfn__global_z, irho, isigma) - centroid_z));
+					}
+			}
+
+			ps.recentering(centroid_x, centroid_y, centroid_z);
+		}
+	}
+
+	namespace
+	{
+		bool broadcast_status(int N_procs, int N_active_procs,
+							  int my_proc, bool my_active_flag,
+							  int hn, int iteration,
+							  enum expansion_status expansion_status,
+							  fp mean_horizon_radius, fp infinity_norm,
+							  bool found_this_horizon, bool I_need_more_iterations,
+							  struct iteration_status_buffers &isb);
+
+		void Newton_step(patch_system &ps,
+						 fp mean_horizon_radius, fp max_allowable_Delta_h_over_h);
+
+		void save_oldh(patch_system &ps);
+
+		int interpolate_alsh(patch_system *ps_ptr)
+		{
+			int status = 1;
+
+#define CAST_PTR_OR_NULL(type_, ptr_) \
+	(ps_ptr == NULL) ? NULL : static_cast<type_>(ptr_)
+
+			//
+			// ***** interpolation points *****
+			//
+			const int N_interp_points = (ps_ptr == NULL) ? 0 : ps_ptr->N_grid_points();
+			double *interp_coords[3] = {
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_x)),
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_y)),
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_z)),
+			};
+
+			double *const output_arrays[] = {
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xx)), // Lapse-1
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xy)), // Sfx
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xz)), // Sfy
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_yy)), // Sfz
+			};
+
+			const int N_output_arrays_dim = sizeof(output_arrays) / sizeof(output_arrays[0]);
+			const int N_output_arrays_use = N_output_arrays_dim;
+
+			double *Data, *oX, *oY, *oZ;
+
+			int s;
+			int Npts = 0;
+			for (int ncpu = 0; ncpu < state.N_procs; ncpu++)
+			{
+
+				if (state.my_proc == ncpu)
+					Npts = N_interp_points;
+
+				MPI_Bcast(&Npts, 1, MPI_INT, ncpu, MPI_COMM_WORLD);
+
+				if (Npts != 0)
+				{
+					Data = new double[Npts * N_output_arrays_use];
+
+					oX = new double[Npts];
+					oY = new double[Npts];
+					oZ = new double[Npts];
+					if (state.my_proc == ncpu)
+					{
+						memcpy(oX, interp_coords[0], Npts * sizeof(double));
+						memcpy(oY, interp_coords[1], Npts * sizeof(double));
+						memcpy(oZ, interp_coords[2], Npts * sizeof(double));
+					}
+					MPI_Bcast(oX, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD);
+					MPI_Bcast(oY, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD);
+					MPI_Bcast(oZ, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD);
+
+					// each cpu calls interpolator
+					s = globalInterpGFLlash(
+						oX, oY, oZ, Npts,
+						Data); // 1 succuss; 0 fail
+
+					if (state.my_proc == ncpu)
+					{
+						status = s;
+
+						if (status == 1)
+						{
+							for (int ngf = 0; ngf < N_output_arrays_use; ngf++)
+							{
+								memcpy(output_arrays[ngf], Data + ngf * N_interp_points,
+									   sizeof(double) * N_interp_points);
+							}
+						}
+					}
+
+					delete[] oX;
+					delete[] oY;
+					delete[] oZ;
+					delete[] Data;
+				}
+			}
+
+			return status;
+		}
+
+	}
+
+	//******************************************************************************
+	void Newton(int N_procs, int N_active_procs, int my_proc,
+				horizon_sequence &hs, struct AH_data *const AH_data_array[],
+				struct iteration_status_buffers &isb, int *dumpid, double *dT)
+	{
+		const bool my_active_flag = hs.has_genuine_horizons();
+		const int N_horizons = hs.N_horizons();
+
+		for (int hn = hs.init_hn();; hn = hs.next_hn()) // hn always =0 for cpu who has no patch_system
+		{
+			bool horizon_is_genuine = hs.is_genuine();
+			const bool there_is_another_genuine_horizon = hs.is_next_genuine();
+
+			struct AH_data *AH_data_ptr = horizon_is_genuine ? AH_data_array[hn] : NULL;
+
+			horizon_is_genuine = horizon_is_genuine && AH_data_ptr->find_trigger && !AH_data_ptr->stop_finding;
+			if (horizon_is_genuine)
+				cout << "being finding horizon #" << hn << endl;
+			patch_system *const ps_ptr = horizon_is_genuine ? AH_data_ptr->ps_ptr : NULL;
+			Jacobian *const Jac_ptr = horizon_is_genuine ? AH_data_ptr->Jac_ptr : NULL;
+			const double add_to_expansion = horizon_is_genuine ? -AH_data_ptr->surface_expansion : 0.0;
+			const int max_iterations = horizon_is_genuine
+										   ? (AH_data_ptr->initial_find_flag ? 80 : 20)
+										   : INT_MAX;
+
+			if (horizon_is_genuine)
+				save_oldh(*ps_ptr);
+
+			for (int iteration = 1;; ++iteration)
+			{
+				if (horizon_is_genuine && iteration == max_iterations)
+					cout << "AHfinder: fail to find horizon #" << hn
+						 << " with Newton iteration " << iteration << " steps!!!" << endl;
+				jtutil::norm<fp> Theta_norms;
+
+				const enum expansion_status raw_expansion_status = expansion(ps_ptr, add_to_expansion,
+																			 (iteration == 1), true, &Theta_norms);
+
+				const bool Theta_is_ok = (raw_expansion_status == expansion_success);
+				const bool norms_are_ok = horizon_is_genuine && Theta_is_ok;
+
+				//
+				// have we found this horizon?
+				// if so, compute and output BH diagnostics
+				//
+				const bool found_this_horizon = norms_are_ok && (Theta_norms.infinity_norm() <= 1e-11);
+
+				if (horizon_is_genuine)
+					AH_data_ptr->found_flag = found_this_horizon;
+
+				if (horizon_is_genuine && found_this_horizon)
+					cout << "found horizon #" << hn << " with " << iteration << " steps!!!" << endl;
+				//
+				// see if the expansion is too big
+				// (if so, we'll give up on this horizon)
+				//
+				const bool expansion_is_too_large = norms_are_ok && (Theta_norms.infinity_norm() > 1e10);
+
+				//
+				// compute the mean horizon radius, and if it's too large,
+				// then pretend expansion() returned a "surface too large" error status
+				//
+				jtutil::norm<fp> h_norms;
+				if (horizon_is_genuine)
+					then ps_ptr->ghosted_gridfn_norms(gfns::gfn__h, h_norms);
+				const fp mean_horizon_radius = horizon_is_genuine ? h_norms.mean()
+																  : 0.0;
+				const bool horizon_is_too_large = (mean_horizon_radius > 1e10);
+
+				const enum expansion_status effective_expansion_status = horizon_is_too_large ? expansion_failure__surface_too_large
+																							  : raw_expansion_status;
+
+				//
+				// see if we need more iterations (either on this or another horizon)
+				//
+
+				// does *this* horizon need more iterations?
+				// i.e. has this horizon's Newton iteration not yet converged?
+				const bool this_horizon_needs_more_iterations = horizon_is_genuine && Theta_is_ok && !found_this_horizon && !expansion_is_too_large && !horizon_is_too_large && (iteration < max_iterations);
+
+				// do I (this processor) need to do more iterations
+				// on this or a following horizon?
+				const bool I_need_more_iterations = this_horizon_needs_more_iterations || there_is_another_genuine_horizon;
+
+				//
+				// broadcast iteration status from each active processor
+				// to all processors, and inclusive-or the "we need more iterations"
+				// flags to see if *any* (active) processor needs more iterations
+				//
+				const bool any_proc_needs_more_iterations = broadcast_status(N_procs, N_active_procs,
+																			 my_proc, my_active_flag,
+																			 hn, iteration, effective_expansion_status,
+																			 mean_horizon_radius,
+																			 (norms_are_ok ? Theta_norms.infinity_norm() : 0.0),
+																			 found_this_horizon, I_need_more_iterations,
+																			 isb);
+				// set found-this-horizon flags
+				// for all active processors' non-dummy horizons
+				for (int found_proc = 0; found_proc < N_active_procs; ++found_proc)
+				{
+					const int found_hn = isb.hn_buffer[found_proc];
+					if (found_hn > 0)
+						AH_data_array[found_hn]->found_flag = isb.found_horizon_buffer[found_proc];
+				}
+
+				//
+				// prepare lapse and shift
+				{
+					int ff = 0, fft = 0;
+					if (found_this_horizon && dumpid[hn - 1] > 0 && dT[hn - 1] > 0)
+						fft = 1;
+					MPI_Allreduce(&fft, &ff, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+					if (ff)
+					{
+						if ((interpolate_alsh(ps_ptr) == 0) && (state.my_proc == 0))
+							cout << "interpolation of lapse and shift for AH failed." << endl;
+					}
+				}
+
+				if (found_this_horizon)
+				{
+					struct BH_diagnostics &BH_diagnostics = AH_data_ptr->BH_diagnostics;
+					// output data
+					if (dumpid[hn - 1] > 0)
+					{
+						char filename[100];
+						sprintf(filename, "ah%02d_%05d.dat", hn, dumpid[hn - 1]);
+						if (dT[hn - 1] > 0)
+						{
+							// gridfunction xx,xy,xz,yy,yz,zz will be used as temp storage
+							BH_diagnostics.compute_signature(*ps_ptr, dT[hn - 1]);
+							ps_ptr->print_gridfn_with_xyz(gfns::gfn__global_zz, true, gfns::gfn__h, filename);
+						}
+						else
+							ps_ptr->print_ghosted_gridfn_with_xyz(gfns::gfn__h, true, gfns::gfn__h, filename, false);
+					}
+
+					BH_diagnostics.compute(*ps_ptr); // gridfunction xx,xy,xz,yy,yz,zz changed
+
+					if (AH_data_ptr->BH_diagnostics_fileptr == NULL)
+						AH_data_ptr->BH_diagnostics_fileptr = BH_diagnostics.setup_output_file(N_horizons, hn);
+					BH_diagnostics.output(AH_data_ptr->BH_diagnostics_fileptr, (*state.PhysTime));
+
+					// recentering
+					recentering(*ps_ptr, (AH_data_ptr->BH_diagnostics).max_x, (AH_data_ptr->BH_diagnostics).max_y, (AH_data_ptr->BH_diagnostics).max_z,
+								(AH_data_ptr->BH_diagnostics).min_x, (AH_data_ptr->BH_diagnostics).min_y, (AH_data_ptr->BH_diagnostics).min_z,
+								(AH_data_ptr->BH_diagnostics).centroid_x, (AH_data_ptr->BH_diagnostics).centroid_y, (AH_data_ptr->BH_diagnostics).centroid_z);
+					AH_data_ptr->recentering_flag = true;
+				}
+
+				//
+				// are all processors done with all their genuine horizons?
+				// or if this is a single-processor run, are we done with this horizon?
+				//
+				if (!any_proc_needs_more_iterations)
+					return; // *** NORMAL RETURN ***
+
+				//
+				// compute the Jacobian matrix
+				// *** this is a synchronous operation across all processors ***
+				//
+
+				const enum expansion_status
+					Jacobian_status = expansion_Jacobian(this_horizon_needs_more_iterations ? ps_ptr : NULL,
+														 this_horizon_needs_more_iterations ? Jac_ptr : NULL,
+														 add_to_expansion,
+														 (iteration == 1),
+														 false);
+				const bool Jacobian_is_ok = (Jacobian_status == expansion_success);
+
+				//
+				// skip to the next horizon unless
+				// this is a genuine Jacobian computation, and it went ok
+				//
+				if (!(this_horizon_needs_more_iterations && Jacobian_is_ok))
+					break; // *** LOOP EXIT ***
+
+				//
+				// compute the Newton step
+				//
+				Jac_ptr->solve_linear_system(gfns::gfn__Theta, gfns::gfn__Delta_h, false);
+
+				Newton_step(*ps_ptr, mean_horizon_radius, 0.1);
+
+				// end of this Newton iteration
+			}
+
+			// end of this horizon
+		}
+
+		// we should never get to here
+		assert(false);
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+	namespace
+	{
+		bool broadcast_status(int N_procs, int N_active_procs,
+							  int my_proc, bool my_active_flag,
+							  int hn, int iteration,
+							  enum expansion_status effective_expansion_status,
+							  fp mean_horizon_radius, fp infinity_norm,
+							  bool found_this_horizon, bool I_need_more_iterations,
+							  struct iteration_status_buffers &isb)
+		{
+			assert(my_proc >= 0);
+			assert(my_proc < N_procs);
+
+			enum
+			{
+				buffer_var__hn = 0,	   // also encodes found_this_horizon flag
+									   // in sign: +=true, -=false
+				buffer_var__iteration, // also encodes I_need_more_iterations flag
+									   // in sign: +=true, -=false
+				buffer_var__expansion_status,
+				buffer_var__mean_horizon_radius,
+				buffer_var__Theta_infinity_norm,
+				N_buffer_vars // no comma
+			};
+
+			//
+			// allocate buffers if this is the first use
+			//
+			if (isb.hn_buffer == NULL)
+				then
+				{
+					isb.hn_buffer = new int[N_active_procs];
+					isb.iteration_buffer = new int[N_active_procs];
+					isb.expansion_status_buffer = new enum expansion_status[N_active_procs];
+					isb.mean_horizon_radius_buffer = new fp[N_active_procs];
+					isb.Theta_infinity_norm_buffer = new fp[N_active_procs];
+					isb.found_horizon_buffer = new bool[N_active_procs];
+
+					isb.send_buffer_ptr = new jtutil::array2d<double>(0, N_active_procs - 1,
+																	  0, N_buffer_vars - 1);
+					isb.receive_buffer_ptr = new jtutil::array2d<double>(0, N_active_procs - 1,
+																		 0, N_buffer_vars - 1);
+				}
+			jtutil::array2d<double> &send_buffer = *isb.send_buffer_ptr;
+			jtutil::array2d<double> &receive_buffer = *isb.receive_buffer_ptr;
+
+			//
+			// pack this processor's values into the reduction buffer
+			//
+			jtutil::zero_C_array(send_buffer.N_array(), send_buffer.data_array());
+			if (my_active_flag)
+				then
+				{
+					assert(send_buffer.is_valid_i(my_proc));
+					assert(hn >= 0);	   // encoding scheme assumes this
+					assert(iteration > 0); // encoding scheme assumes this
+					send_buffer(my_proc, buffer_var__hn) = found_this_horizon ? +hn : -hn;
+					send_buffer(my_proc, buffer_var__iteration) = I_need_more_iterations ? +iteration : -iteration;
+					send_buffer(my_proc, buffer_var__expansion_status) = int(effective_expansion_status);
+					send_buffer(my_proc, buffer_var__mean_horizon_radius) = mean_horizon_radius;
+					send_buffer(my_proc, buffer_var__Theta_infinity_norm) = infinity_norm;
+				}
+
+			const int reduction_status = MPI_Allreduce(static_cast<void *>(send_buffer.data_array()),
+													   static_cast<void *>(receive_buffer.data_array()),
+													   send_buffer.N_array(),
+													   MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD);
+
+			// if (reduction_status < 0)
+			if (reduction_status != MPI_SUCCESS)
+				then CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING,
+								"broadcast_status(): error status %d from reduction!",
+								reduction_status); /*NOTREACHED*/
+
+			//
+			// unpack the reduction buffer back to the high-level result buffers and
+			// compute the inclusive-or of the broadcast I_need_more_iterations flags
+			//
+			bool any_proc_needs_more_iterations = false;
+			for (int proc = 0; proc < N_active_procs; ++proc)
+			{
+				const int hn_temp = static_cast<int>(
+					receive_buffer(proc, buffer_var__hn));
+				isb.hn_buffer[proc] = jtutil::abs(hn_temp);
+				isb.found_horizon_buffer[proc] = (hn_temp > 0);
+
+				const int iteration_temp = static_cast<int>(
+					receive_buffer(proc, buffer_var__iteration));
+				isb.iteration_buffer[proc] = jtutil::abs(iteration_temp);
+				const bool proc_needs_more_iterations = (iteration_temp > 0);
+				any_proc_needs_more_iterations |= proc_needs_more_iterations;
+
+				isb.expansion_status_buffer[proc] = static_cast<enum expansion_status>(
+					static_cast<int>(
+						receive_buffer(proc, buffer_var__expansion_status)));
+
+				isb.mean_horizon_radius_buffer[proc] = receive_buffer(proc, buffer_var__mean_horizon_radius);
+				isb.Theta_infinity_norm_buffer[proc] = receive_buffer(proc, buffer_var__Theta_infinity_norm);
+			}
+
+			return any_proc_needs_more_iterations;
+		}
+	}
+	//
+	// This function takes the Newton step, scaling it down if it's too large.
+	//
+	// Arguments:
+	// ps = The patch system containing the gridfns h and Delta_h.
+	// mean_horizon_radius = ||h||_mean
+	// max_allowable_Delta_h_over_h = The maximum allowable
+	//				     ||Delta_h||_infinity / ||h||_mean
+	//				  Any step over this is internally clamped
+	//				  (scaled down) to this size.
+	//
+	namespace
+	{
+		void Newton_step(patch_system &ps,
+						 fp mean_horizon_radius, fp max_allowable_Delta_h_over_h)
+		{
+			//
+			// compute scale factor (1 for small steps, <1 for large steps)
+			//
+
+			const fp max_allowable_Delta_h = max_allowable_Delta_h_over_h * mean_horizon_radius;
+
+			jtutil::norm<fp> Delta_h_norms;
+			ps.gridfn_norms(gfns::gfn__Delta_h, Delta_h_norms);
+			const fp max_Delta_h = Delta_h_norms.infinity_norm();
+
+			const fp scale = (max_Delta_h <= max_allowable_Delta_h)
+								 ? 1.0
+								 : max_allowable_Delta_h / max_Delta_h;
+
+			//
+			// take the Newton step (scaled if necessary)
+			//
+			for (int pn = 0; pn < ps.N_patches(); ++pn)
+			{
+				patch &p = ps.ith_patch(pn);
+
+				for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+				{
+					for (int isigma = p.min_isigma();
+						 isigma <= p.max_isigma();
+						 ++isigma)
+					{
+						p.ghosted_gridfn(gfns::gfn__h, irho, isigma) -= scale * p.gridfn(gfns::gfn__Delta_h, irho, isigma);
+					}
+				}
+			}
+		}
+		void save_oldh(patch_system &ps)
+		{
+			for (int pn = 0; pn < ps.N_patches(); ++pn)
+			{
+				patch &p = ps.ith_patch(pn);
+
+				for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+				{
+					for (int isigma = p.min_isigma();
+						 isigma <= p.max_isigma();
+						 ++isigma)
+					{
+						p.gridfn(gfns::gfn__oldh, irho, isigma) = p.ghosted_gridfn(gfns::gfn__h, irho, isigma);
+					}
+				}
+			}
+		}
+	}
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/array.C
+++ b/AMSS_NCKU_source/AHF_Direct/array.C
@@ -0,0 +1,186 @@
+#include <assert.h>
+#include <stddef.h> // NULL
+#include <stdlib.h> // size_t
+
+#include "cctk.h"
+
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename T>
+		array1d<T>::array1d(int min_i_in, int max_i_in,
+							T *array_in /* = NULL */,
+							int stride_i_in /* = 0 */)
+			: array_(array_in),
+			  offset_(0), // temp value, changed below
+			  stride_i_(stride_i_in),
+			  min_i_(min_i_in), max_i_(max_i_in),
+			  we_own_array_(array_in == NULL)
+		{
+			if (stride_i_ == 0)
+				then stride_i_ = 1;
+
+			// must use unchecked subscripting here since setup isn't done yet
+			offset_ = -subscript_unchecked(min_i_); // RHS uses offset_ = 0
+			assert(subscript_unchecked(min_i_) == 0);
+			max_subscript_ = subscript_unchecked(max_i_);
+
+			if (we_own_array_)
+				then
+				{
+					// allocate it
+					const int N_allocate = N_i();
+					array_ = new T[N_allocate];
+				}
+
+			// explicitly initialize array (new[] *doesn't* do this automagically)
+			for (int i = min_i(); i <= max_i(); ++i)
+			{
+				operator()(i) = T(0);
+			}
+		}
+
+		//
+		// This function destroys an  array1d  object.
+		//
+		template <typename T>
+		array1d<T>::~array1d()
+		{
+			if (we_own_array_)
+				then delete[] array_;
+		}
+
+		//
+		// This function constructs an  array2d  object.
+		//
+		template <typename T>
+		array2d<T>::array2d(int min_i_in, int max_i_in,
+							int min_j_in, int max_j_in,
+							T *array_in /* = NULL */,
+							int stride_i_in /* = 0 */, int stride_j_in /* = 0 */)
+			: array_(array_in),
+			  offset_(0), // temp value, changed below
+			  stride_i_(stride_i_in), stride_j_(stride_j_in),
+			  min_i_(min_i_in), max_i_(max_i_in),
+			  min_j_(min_j_in), max_j_(max_j_in),
+			  we_own_array_(array_in == NULL)
+		{
+			if (stride_j_ == 0)
+				then stride_j_ = 1;
+			if (stride_i_ == 0)
+				then stride_i_ = N_j();
+
+			// must use unchecked subscripting here since setup isn't done yet
+			offset_ = -subscript_unchecked(min_i_, min_j_); // RHS uses offset_ = 0
+			assert(subscript_unchecked(min_i_, min_j_) == 0);
+			max_subscript_ = subscript_unchecked(max_i_, max_j_);
+
+			if (we_own_array_)
+				then
+				{
+					// allocate it
+					const int N_allocate = N_i() * N_j();
+					array_ = new T[N_allocate];
+				}
+
+			// explicitly initialize array (new[] *doesn't* do this automagically)
+			for (int i = min_i(); i <= max_i(); ++i)
+			{
+				for (int j = min_j(); j <= max_j(); ++j)
+				{
+					operator()(i, j) = T(0);
+				}
+			}
+		}
+
+		//
+		// This function destroys an  array2d  object.
+		//
+		template <typename T>
+		array2d<T>::~array2d()
+		{
+			if (we_own_array_)
+				then delete[] array_;
+		}
+
+		//
+		// This function constructs an  array3d  object.
+		//
+		template <typename T>
+		array3d<T>::array3d(int min_i_in, int max_i_in,
+							int min_j_in, int max_j_in,
+							int min_k_in, int max_k_in,
+							T *array_in /* = NULL */,
+							int stride_i_in /* = 0 */, int stride_j_in /* = 0 */,
+							int stride_k_in /* = 0 */)
+			: array_(array_in),
+			  offset_(0), // temp value, changed below
+			  stride_i_(stride_i_in), stride_j_(stride_j_in),
+			  stride_k_(stride_k_in),
+			  min_i_(min_i_in), max_i_(max_i_in),
+			  min_j_(min_j_in), max_j_(max_j_in),
+			  min_k_(min_k_in), max_k_(max_k_in),
+			  we_own_array_(array_in == NULL)
+		{
+			if (stride_k_ == 0)
+				then stride_k_ = 1;
+			if (stride_j_ == 0)
+				then stride_j_ = N_k();
+			if (stride_i_ == 0)
+				then stride_i_ = N_j() * N_k();
+
+			// must use unchecked subscripting here since setup isn't done yet
+			offset_ = -subscript_unchecked(min_i_, min_j_, min_k_); // RHS uses offset_ = 0
+			assert(subscript_unchecked(min_i_, min_j_, min_k_) == 0);
+			max_subscript_ = subscript_unchecked(max_i_, max_j_, max_k_);
+
+			if (we_own_array_)
+				then
+				{
+					// allocate it
+					const int N_allocate = N_i() * N_j() * N_k();
+					array_ = new T[N_allocate];
+				}
+
+			// explicitly initialize array (new[] *doesn't* do this automagically)
+			for (int i = min_i(); i <= max_i(); ++i)
+			{
+				for (int j = min_j(); j <= max_j(); ++j)
+				{
+					for (int k = min_k(); k <= max_k(); ++k)
+					{
+						operator()(i, j, k) = T(0);
+					}
+				}
+			}
+		}
+		//
+		// This function destroys an  array3d  object.
+		//
+		template <typename T>
+		array3d<T>::~array3d()
+		{
+			if (we_own_array_)
+				then delete[] array_;
+		}
+
+		template class array1d<int>;
+
+		// FIXME: we shouldn't have to instantiate these both, the const one
+		//	  is actually trivially derivable from the non-const one. :(
+		template class array1d<void *>;
+		template class array1d<const void *>;
+
+		template class array1d<CCTK_REAL>;
+		template class array2d<CCTK_INT>;
+		template class array2d<CCTK_REAL>;
+		template class array3d<CCTK_REAL>;
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/array.h
+++ b/AMSS_NCKU_source/AHF_Direct/array.h
@@ -0,0 +1,292 @@
+#ifndef AHFINDERDIRECT__ARRAY_HH
+#define AHFINDERDIRECT__ARRAY_HH
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		//******************************************************************************
+
+		template <typename T>
+		class array1d
+		{
+		public:
+			int min_i() const { return min_i_; }
+			int max_i() const { return max_i_; }
+			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
+			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
+
+			int subscript_unchecked(int i) const
+			{
+				return offset_ + stride_i_ * i;
+			}
+			int subscript(int i) const
+			{
+				assert(is_valid_i(i));
+				const int posn = subscript_unchecked(i);
+				assert(posn >= 0);
+				assert(posn <= max_subscript_);
+				return posn;
+			}
+			int subscript_offset() const { return offset_; }
+			int subscript_stride_i() const { return stride_i_; }
+
+			// normal-use access functions
+			// ... rvalue
+			const T &operator()(int i) const { return array_[subscript(i)]; }
+			// ... lvalue
+			T &operator()(int i) { return array_[subscript(i)]; }
+
+			// get access to internal 0-origin 1D storage array
+			// (low-level, dangerous, use with caution!)
+			// ... semantics of N_array() may not be what you want
+			//     if strides specify noncontiguous storage
+			int N_array() const { return max_subscript_ + stride_i_; }
+			const T *data_array() const { return const_cast<const T *>(array_); }
+			T *data_array() { return array_; }
+
+			// constructor, destructor
+			// ... constructor initializes all array elements to T(0.0)
+			// ... omitted strides default to C storage order
+			array1d(int min_i_in, int max_i_in,
+					T *array_in = NULL, // caller-provided storage array
+										// if non-NULL
+					int stride_i_in = 0);
+			~array1d();
+
+		private:
+			// we forbid copying and passing by value
+			// by declaring the copy constructor and assignment operator
+			// private, but never defining them
+			array1d(const array1d<T> &rhs);
+			array1d<T> &operator=(const array1d<T> &rhs);
+
+		private:
+			// n.b. we declare the array pointer first in the class
+			// ==> it's probably at 0 offset
+			// ==> we may get slightly faster array access
+			T *array_; // --> new-allocated 1D storage array
+
+			// subscripting info
+			// n.b. put this next in class so it should be in the same
+			//	cpu cache line as  array_  ==> faster array access
+			int offset_, stride_i_;
+
+			// min/max array bounds
+			const int min_i_, max_i_;
+			int max_subscript_;
+
+			// n.b. put this at end of class since performance doesn't matter
+			bool we_own_array_; // true ==> array_ --> new[] array which we own
+								// false ==> array_ --> client-owned storage
+		};
+
+		//******************************************************************************
+
+		template <typename T>
+		class array2d
+		{
+		public:
+			// array info
+			int min_i() const { return min_i_; }
+			int max_i() const { return max_i_; }
+			int min_j() const { return min_j_; }
+			int max_j() const { return max_j_; }
+			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
+			int N_j() const { return jtutil::how_many_in_range(min_j_, max_j_); }
+			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
+			bool is_valid_j(int j) const { return (j >= min_j_) && (j <= max_j_); }
+			bool is_valid_ij(int i, int j) const
+			{
+				return is_valid_i(i) && is_valid_j(j);
+			}
+
+			int subscript_unchecked(int i, int j) const
+			{
+				return offset_ + stride_i_ * i + stride_j_ * j;
+			}
+			int subscript(int i, int j) const
+			{
+				// n.b. we want each assert() here to be on a separate
+				//	source line, so an assert() failure message can
+				//	pinpoint *which* index is bad
+				assert(is_valid_i(i));
+				assert(is_valid_j(j));
+				const int posn = subscript_unchecked(i, j);
+				assert(posn >= 0);
+				assert(posn <= max_subscript_);
+				return posn;
+			}
+			int subscript_offset() const { return offset_; }
+			int subscript_stride_i() const { return stride_i_; }
+			int subscript_stride_j() const { return stride_j_; }
+
+			// normal-use access functions
+			// ... rvalue
+			const T &operator()(int i, int j) const
+			{
+				return array_[subscript(i, j)];
+			}
+			// ... lvalue
+			T &operator()(int i, int j)
+			{
+				return array_[subscript(i, j)];
+			}
+
+			// get access to internal 0-origin 1D storage array
+			// (low-level, dangerous, use with caution!)
+			// ... semantics of N_array() may not be what you want
+			//     if strides specify noncontiguous storage
+			int N_array() const { return max_subscript_ + stride_j_; }
+			const T *data_array() const { return const_cast<const T *>(array_); }
+			T *data_array() { return array_; }
+
+			// constructor, destructor
+			// ... constructor initializes all array elements to T(0.0)
+			// ... omitted strides default to C storage order
+			array2d(int min_i_in, int max_i_in,
+					int min_j_in, int max_j_in,
+					T *array_in = NULL, // caller-provided storage array
+										// if non-NULL
+					int stride_i_in = 0, int stride_j_in = 0);
+			~array2d();
+
+		private:
+			// we forbid copying and passing by value
+			// by declaring the copy constructor and assignment operator
+			// private, but never defining them
+			array2d(const array2d<T> &rhs);
+			array2d<T> &operator=(const array2d<T> &rhs);
+
+		private:
+			// n.b. we declare the array pointer first in the class
+			// ==> it's probably at 0 offset
+			// ==> we may get slightly faster array access
+			T *array_; // --> new-allocated 1D storage array
+
+			// subscripting info
+			// n.b. put this next in class so it should be in the same
+			//	cpu cache line as  array_  ==> faster array access
+			int offset_, stride_i_, stride_j_;
+
+			// min/max array bounds
+			const int min_i_, max_i_;
+			const int min_j_, max_j_;
+			int max_subscript_;
+
+			// n.b. put this at end of class since performance doesn't matter
+			bool we_own_array_; // true ==> array_ --> new[] array which we own
+								// false ==> array_ --> client-owned storage
+		};
+
+		//******************************************************************************
+
+		template <typename T>
+		class array3d
+		{
+		public:
+			// array info
+			int min_i() const { return min_i_; }
+			int max_i() const { return max_i_; }
+			int min_j() const { return min_j_; }
+			int max_j() const { return max_j_; }
+			int min_k() const { return min_k_; }
+			int max_k() const { return max_k_; }
+			int N_i() const { return jtutil::how_many_in_range(min_i_, max_i_); }
+			int N_j() const { return jtutil::how_many_in_range(min_j_, max_j_); }
+			int N_k() const { return jtutil::how_many_in_range(min_k_, max_k_); }
+			bool is_valid_i(int i) const { return (i >= min_i_) && (i <= max_i_); }
+			bool is_valid_j(int j) const { return (j >= min_j_) && (j <= max_j_); }
+			bool is_valid_k(int k) const { return (k >= min_k_) && (k <= max_k_); }
+			bool is_valid_ijk(int i, int j, int k) const
+			{
+				return is_valid_i(i) && is_valid_j(j) && is_valid_k(k);
+			}
+
+			int subscript_unchecked(int i, int j, int k) const
+			{
+				return offset_ + stride_i_ * i + stride_j_ * j + stride_k_ * k;
+			}
+			int subscript(int i, int j, int k) const
+			{
+				// n.b. we want each assert() here to be on a separate
+				//	source line, so an assert() failure message can
+				//	pinpoint *which* index is bad
+				assert(is_valid_i(i));
+				assert(is_valid_j(j));
+				assert(is_valid_k(k));
+				const int posn = subscript_unchecked(i, j, k);
+				assert(posn >= 0);
+				assert(posn <= max_subscript_);
+				return posn;
+			}
+			int subscript_offset() const { return offset_; }
+			int subscript_stride_i() const { return stride_i_; }
+			int subscript_stride_j() const { return stride_j_; }
+			int subscript_stride_k() const { return stride_k_; }
+
+			// normal-use access functions
+			// ... rvalue
+			const T &operator()(int i, int j, int k) const
+			{
+				return array_[subscript(i, j, k)];
+			}
+			// ... lvalue
+			T &operator()(int i, int j, int k)
+			{
+				return array_[subscript(i, j, k)];
+			}
+
+			// get access to internal 0-origin 1D storage array
+			// (low-level, dangerous, use with caution!)
+			// ... semantics of N_array() may not be what you want
+			//     if strides specify noncontiguous storage
+			int N_array() const { return max_subscript_ + stride_k_; }
+			const T *data_array() const { return const_cast<const T *>(array_); }
+			T *data_array() { return array_; }
+
+			// constructor, destructor
+			// ... constructor initializes all array elements to T(0.0)
+			// ... omitted strides default to C storage order
+			array3d(int min_i_in, int max_i_in,
+					int min_j_in, int max_j_in,
+					int min_k_in, int max_k_in,
+					T *array_in = NULL, // caller-provided storage array
+										// if non-NULL
+					int stride_i_in = 0, int stride_j_in = 0, int stride_k_in = 0);
+			~array3d();
+
+		private:
+			// we forbid copying and passing by value
+			// by declaring the copy constructor and assignment operator
+			// private, but never defining them
+			array3d(const array3d<T> &rhs);
+			array3d<T> &operator=(const array3d<T> &rhs);
+
+		private:
+			// n.b. we declare the array pointer first in the class
+			// ==> it's probably at 0 offset
+			// ==> we may get slightly faster array access
+			T *array_; // --> new-allocated 1D storage array
+
+			// subscripting info
+			// n.b. put this next in class so it should be in the same
+			//	cpu cache line as  array_  ==> faster array access
+			int offset_, stride_i_, stride_j_, stride_k_;
+
+			// min/max array bounds
+			const int min_i_, max_i_;
+			const int min_j_, max_j_;
+			const int min_k_, max_k_;
+			int max_subscript_;
+
+			// n.b. put this at end of class since performance doesn't matter
+			bool we_own_array_; // true ==> array_ --> new[] array which we own
+								// false ==> array_ --> client-owned storage
+		};
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
+
+#endif /* AHFINDERDIRECT__ARRAY_HH */
--- a/AMSS_NCKU_source/AHF_Direct/cctk.h
+++ b/AMSS_NCKU_source/AHF_Direct/cctk.h
@@ -0,0 +1,58 @@
+#ifndef _CCTK_H_
+#define _CCTK_H_ 1
+
+/* Grab the main configuration info. */
+#include "cctk_Config.h"
+
+#define CCTK_THORNSTRING "AHFinderDirect"
+
+/* Include the constants */
+#include "cctk_Constants.h"
+
+/* get the definition of ptrdiff_t */
+#include <stddef.h>
+int CCTK_VInfo(const char *thorn, const char *format, ...);
+int CCTK_VWarn(int level,
+               int line,
+               const char *file,
+               const char *thorn,
+               const char *format,
+               ...);
+#define CCTK_ERROR_INTERP_GHOST_SIZE_TOO_SMALL (-1001)
+#ifdef __cplusplus
+#define HAVE_INLINE
+#else
+#ifndef inline
+#define HAVE_INLINE
+#endif
+#endif
+
+#define CCTK_PRINTSEPARATOR \
+  printf("--------------------------------------------------------------------------------\n");
+
+#define _DECLARE_CCTK_ARGUMENTS _DECLARE_CCTK_CARGUMENTS
+#define _DECLARE_CCTK_CARGUMENTS          \
+  ptrdiff_t cctki_dummy_int;              \
+  CCTK_REAL cctk_time = cctkGH->PhysTime; \
+  int cctk_iteration = 1;                 \
+  int cctk_dim = 3;
+
+#define CCTK_EQUALS(a, b) (CCTK_Equals((a), (b)))
+
+#define CCTK_PASS_CTOC cctkGH
+
+#define CCTK_ORIGIN_SPACE(x) (cctk_origin_space[x] + cctk_delta_space[x] / cctk_levfac[x] * cctk_levoff[x] / cctk_levoffdenom[x])
+#define CCTK_DELTA_SPACE(x) (cctk_delta_space[x] / cctk_levfac[x])
+#define CCTK_DELTA_TIME (cctk_delta_time / cctk_timefac)
+#define CCTK_LSSH(stag, dim) cctk_lssh[(stag) + CCTK_NSTAGGER * (dim)]
+#define CCTK_LSSH_IDX(stag, dim) ((stag) + CCTK_NSTAGGER * (dim))
+
+#define CCTK_WARN(a, b) CCTK_Warn(a, __LINE__, __FILE__, CCTK_THORNSTRING, b)
+
+#define CCTK_MALLOC(s) CCTKi_Malloc(s, __LINE__, __FILE__)
+#define CCTK_FREE(p) CCTKi_Free(p)
+
+#define CCTK_INFO(a) CCTK_Info(CCTK_THORNSTRING, (a))
+#define CCTK_PARAMWARN(a) CCTK_ParamWarn(CCTK_THORNSTRING, (a))
+
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/cctk_Config.h
+++ b/AMSS_NCKU_source/AHF_Direct/cctk_Config.h
@@ -0,0 +1,168 @@
+#ifndef _CCTK_CONFIG_H_
+#define _CCTK_CONFIG_H_
+
+#define STDC_HEADERS 1
+
+#define CCTK_FCALL 
+
+#define HAVE_GETHOSTBYNAME 1
+#define HAVE_GETOPT_LONG_ONLY 1
+#define HAVE_CRYPT 1
+#define HAVE_FINITE 1
+#define HAVE_ISNAN 1
+#define HAVE_ISINF 1
+#define HAVE_MKSTEMP 1
+#define HAVE_VA_COPY 1
+
+/* Do we have mode_t ? */
+#define HAVE_MODE_T 1
+
+#define HAVE_SOCKLEN_T 1
+#ifdef HAVE_SOCKLEN_T
+#  define CCTK_SOCKLEN_T socklen_t
+#else
+#  define CCTK_SOCKLEN_T int
+#endif
+
+#define HAVE_TIME_H 1
+#define HAVE_SYS_IOCTL_H 1
+#define HAVE_SYS_SOCKET_H 1
+#define HAVE_SYS_TIME_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_UNISTD_H 1
+#define HAVE_STRING_H 1
+#define HAVE_ASSERT_H 1
+#define HAVE_TGMATH_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_GETOPT_H 1
+#define HAVE_REGEX_H 1
+#define HAVE_NETINET_IN_H 1
+#define HAVE_NETDB_H 1
+#define HAVE_ARPA_INET_H 1
+#define HAVE_CRYPT_H 1
+#define HAVE_DIRENT_H 1
+#define HAVE_SIGNAL_H 1
+#define HAVE_MALLOC_H 1
+#define HAVE_MALLINFO 1
+#define HAVE_MALLOPT 1
+#define HAVE_M_MMAP_THRESHOLD_VALUE 1
+
+#define TIME_WITH_SYS_TIME 1
+
+#define HAVE_VECTOR 1
+#define HAVE_VECTOR_H 1
+
+#define GETTIMEOFDAY_NEEDS_TIMEZONE 1
+
+#define CCTK_CACHELINE_BYTES 64
+#define CCTK_CACHE_SIZE 1024*1024
+
+#define NULL_DEVICE "/dev/null"
+
+#define CCTK_BUILD_OS "linux-gnu"
+#define CCTK_BUILD_CPU "x86_64"
+#define CCTK_BUILD_VENDOR "unknown"
+
+#define SIZEOF_SHORT_INT 2
+#define SIZEOF_INT 4
+#define SIZEOF_LONG_INT 8
+#define SIZEOF_LONG_LONG 8
+#define SIZEOF_LONG_DOUBLE 16
+#define SIZEOF_DOUBLE 8
+#define SIZEOF_FLOAT 4
+#define SIZEOF_CHAR_P 8
+
+#define CCTK_REAL_PRECISION_8 1
+
+#define CCTK_INTEGER_PRECISION_4 1
+
+#define HAVE_CCTK_INT8 1
+#define HAVE_CCTK_INT4 1
+#define HAVE_CCTK_INT2 1
+#define HAVE_CCTK_INT1 1
+
+#define HAVE_CCTK_REAL16 1
+#define HAVE_CCTK_REAL8 1
+#define HAVE_CCTK_REAL4 1
+
+#define CCTK_INT8 long int
+#define CCTK_INT4 int
+#define CCTK_INT2 short int
+#define CCTK_INT1 signed char
+
+#define CCTK_REAL16 long double
+#define CCTK_REAL8 double
+#define CCTK_REAL4 float
+
+#ifndef __cplusplus
+
+#ifdef CCTK_C_RESTRICT
+#define restrict CCTK_C_RESTRICT
+#endif
+
+/* Allow the use of CCTK_RESTRICT as a qualifier always. */
+#ifdef CCTK_C_RESTRICT
+#define CCTK_RESTRICT CCTK_C_RESTRICT
+#else
+#define CCTK_RESTRICT restrict
+#endif
+
+#ifdef HAVE_CCTK_C_BOOL
+#define CCTK_HAVE_C_BOOL
+#endif
+
+#endif /* ! defined __cplusplus */
+/****************************************************************************/
+
+/****************************************************************************/
+/* C++ specific stuff */
+/****************************************************************************/
+#ifdef __cplusplus
+
+/* Some C++ compilers don't have bool ! */
+#define HAVE_CCTK_CXX_BOOL 1
+
+#ifndef HAVE_CCTK_CXX_BOOL
+typedef enum {false, true} bool;
+#else
+/* deprecated in beta15 */
+#define CCTK_HAVE_CXX_BOOL
+#endif
+
+/* Some C++ compilers recognise the restrict keyword */
+#define CCTK_CXX_RESTRICT __restrict__
+
+/* Since this is non-standard leave commented out for the moment */
+#if 0
+/* Define to empty if the keyword does not work. */
+#ifdef CCTK_CXX_RESTRICT
+#define restrict CCTK_CXX_RESTRICT
+#endif
+#endif
+
+/* Allow the use of CCTK_RESTRICT as a qualifier always. */
+#ifdef CCTK_CXX_RESTRICT
+#define CCTK_RESTRICT CCTK_CXX_RESTRICT
+#else
+#define CCTK_RESTRICT restrict
+#endif
+
+#endif /* __cplusplus */
+/****************************************************************************/
+
+#ifdef FCODE
+
+#define HAVE_CCTK_FORTRAN_REAL4 1
+#define HAVE_CCTK_FORTRAN_REAL8 1
+#define HAVE_CCTK_FORTRAN_REAL16 1
+
+#define HAVE_CCTK_FORTRAN_COMPLEX8 1
+#define HAVE_CCTK_FORTRAN_COMPLEX16 1
+#define HAVE_CCTK_FORTRAN_COMPLEX32 1
+
+#endif /* FCODE */
+
+/* Now include the code to pick an appropriate precison for reals and ints */
+#include "cctk_Types.h"
+
+#endif /* _CCTK_CONFIG_H_ */
--- a/AMSS_NCKU_source/AHF_Direct/cctk_Constants.h
+++ b/AMSS_NCKU_source/AHF_Direct/cctk_Constants.h
@@ -0,0 +1,57 @@
+#ifndef _CCTK_CONSTANTS_H_
+#define _CCTK_CONSTANTS_H_
+
+#define CCTK_VARIABLE_VOID             100
+#define CCTK_VARIABLE_BYTE             101
+#define CCTK_VARIABLE_INT              102
+#define CCTK_VARIABLE_INT1             103
+#define CCTK_VARIABLE_INT2             104
+#define CCTK_VARIABLE_INT4             105
+#define CCTK_VARIABLE_INT8             106
+#define CCTK_VARIABLE_REAL             107
+#define CCTK_VARIABLE_REAL4            108
+#define CCTK_VARIABLE_REAL8            109
+#define CCTK_VARIABLE_REAL16           110
+#define CCTK_VARIABLE_COMPLEX          111
+#define CCTK_VARIABLE_COMPLEX8         112
+#define CCTK_VARIABLE_COMPLEX16        113
+#define CCTK_VARIABLE_COMPLEX32        114
+#define CCTK_VARIABLE_CHAR             115
+#define CCTK_VARIABLE_STRING           116
+#define CCTK_VARIABLE_POINTER          117
+#define CCTK_VARIABLE_POINTER_TO_CONST 118
+#define CCTK_VARIABLE_FPOINTER         119
+
+/* DEPRECATED IN BETA 12 */
+#define CCTK_VARIABLE_FN_POINTER CCTK_VARIABLE_FPOINTER
+
+/* steerable status of parameters */
+#define CCTK_STEERABLE_NEVER   200
+#define CCTK_STEERABLE_ALWAYS  201
+#define CCTK_STEERABLE_RECOVER 202
+
+/* number of staggerings */
+#define CCTK_NSTAGGER      3
+
+/* group distributions */
+#define CCTK_DISTRIB_CONSTANT 301
+#define CCTK_DISTRIB_DEFAULT  302
+
+/* group types */
+#define CCTK_SCALAR 401
+#define CCTK_GF     402
+#define CCTK_ARRAY  403
+
+/* group scopes */
+#define CCTK_PRIVATE   501
+#define CCTK_PROTECTED 502
+#define CCTK_PUBLIC    503
+
+/* constants for CCTK_TraverseString() */
+#define CCTK_VAR          601
+#define CCTK_GROUP        602
+#define CCTK_GROUP_OR_VAR 603
+
+
+#endif /* _CCTK_CONSTANTS_ */
+
--- a/AMSS_NCKU_source/AHF_Direct/cctk_Types.h
+++ b/AMSS_NCKU_source/AHF_Direct/cctk_Types.h
@@ -0,0 +1,180 @@
+#ifndef _CCTK_TYPES_H_
+#define _CCTK_TYPES_H_
+
+#ifndef _CCTK_CONFIG_H_
+#include "cctk_Config.h"
+#endif
+
+typedef void *CCTK_POINTER;
+typedef const void *CCTK_POINTER_TO_CONST;
+typedef void (*CCTK_FPOINTER)(void);
+#define HAVE_CCTK_POINTER 1
+#define HAVE_CCTK_POINTER_TO_CONST 1
+#define HAVE_CCTK_FPOINTER 1
+
+/* Character types */
+typedef char CCTK_CHAR;
+typedef const char * CCTK_STRING;
+#define HAVE_CCTK_CHAR 1
+#define HAVE_CCTK_STRING 1
+
+/* Structures for complex types */
+
+#ifdef HAVE_CCTK_REAL16
+#define HAVE_CCTK_COMPLEX32 1
+typedef struct CCTK_COMPLEX32
+{
+  CCTK_REAL16 Re;
+  CCTK_REAL16 Im;
+#ifdef __cplusplus
+  CCTK_REAL16 real() const { return Re; }
+  CCTK_REAL16 imag() const { return Im; }
+#endif
+} CCTK_COMPLEX32;
+#endif
+
+#ifdef HAVE_CCTK_REAL8
+#define HAVE_CCTK_COMPLEX16 1
+typedef struct CCTK_COMPLEX16
+{
+  CCTK_REAL8 Re;
+  CCTK_REAL8 Im;
+#ifdef __cplusplus
+  CCTK_REAL8 real() const { return Re; }
+  CCTK_REAL8 imag() const { return Im; }
+#endif
+} CCTK_COMPLEX16;
+#endif
+
+#ifdef HAVE_CCTK_REAL4
+#define HAVE_CCTK_COMPLEX8 1
+typedef struct CCTK_COMPLEX8
+{
+  CCTK_REAL4 Re;
+  CCTK_REAL4 Im;
+#ifdef __cplusplus
+  CCTK_REAL4 real() const { return Re; }
+  CCTK_REAL4 imag() const { return Im; }
+#endif
+} CCTK_COMPLEX8;
+#endif
+
+/* Small positive integer type */
+typedef unsigned char CCTK_BYTE;
+#define HAVE_CCTK_BYTE 1
+
+/* Define stuff for fortran. */
+#ifdef FCODE
+
+#define CCTK_POINTER          integer*SIZEOF_CHAR_P
+#define CCTK_POINTER_TO_CONST integer*SIZEOF_CHAR_P
+/* TODO: add autoconf for determining the size of function pointers */
+#define CCTK_FPOINTER         integer*SIZEOF_CHAR_P
+#define HAVE_CCTK_POINTER 1
+#define HAVE_CCTK_POINTER_TO_CONST 1
+#define HAVE_CCTK_FPOINTER 1
+
+/* Character types */
+/* A single character does not exist in Fortran; in Fortran, all
+   character types are strings.  Hence we do not define CCTK_CHAR.  */
+/* #define CCTK_CHAR   CHARACTER */
+/* #define HAVE_CCTK_CHAR 1 */
+/* This is a C-string, i.e., only a pointer */
+#define CCTK_STRING CCTK_POINTER_TO_CONST
+#define HAVE_CCTK_STRING 1
+
+#ifdef HAVE_CCTK_INT8
+#define CCTK_INT8 INTEGER*8
+#endif
+#ifdef HAVE_CCTK_INT4
+#define CCTK_INT4 INTEGER*4
+#endif
+#ifdef HAVE_CCTK_INT2
+#define CCTK_INT2 INTEGER*2
+#endif
+#ifdef HAVE_CCTK_INT1
+#define CCTK_INT1 INTEGER*1
+#endif
+
+#ifdef HAVE_CCTK_REAL16
+#define CCTK_REAL16 REAL*16
+#define HAVE_CCTK_COMPLEX32 1
+#define CCTK_COMPLEX32  COMPLEX*32
+#endif
+
+#ifdef HAVE_CCTK_REAL8
+#define CCTK_REAL8  REAL*8
+#define HAVE_CCTK_COMPLEX16 1
+#define CCTK_COMPLEX16  COMPLEX*16
+#endif
+
+#ifdef HAVE_CCTK_REAL4
+#define CCTK_REAL4  REAL*4
+#define HAVE_CCTK_COMPLEX8 1
+#define CCTK_COMPLEX8   COMPLEX*8
+#endif
+
+/* Should be unsigned, but Fortran doesn't have that */
+#define CCTK_BYTE INTEGER*1
+#define HAVE_CCTK_BYTE 1
+
+#endif /*FCODE */
+
+/* Now pick the types based upon the precision variable. */
+
+/* Floating point precision */
+#ifdef CCTK_REAL_PRECISION_16
+#define CCTK_REAL_PRECISION 16
+#define CCTK_REAL CCTK_REAL16
+#endif
+
+#ifdef CCTK_REAL_PRECISION_8
+#define CCTK_REAL_PRECISION 8
+#define CCTK_REAL CCTK_REAL8
+#endif
+
+#ifdef CCTK_REAL_PRECISION_4
+#define CCTK_REAL_PRECISION 4
+#define CCTK_REAL CCTK_REAL4
+#endif
+
+/* Integer precision */
+
+#ifdef CCTK_INTEGER_PRECISION_8
+#define CCTK_INTEGER_PRECISION 8
+#define CCTK_INT CCTK_INT8
+#endif
+
+#ifdef CCTK_INTEGER_PRECISION_4
+#define CCTK_INTEGER_PRECISION 4
+#define CCTK_INT CCTK_INT4
+#endif
+
+#ifdef CCTK_INTEGER_PRECISION_2
+#define CCTK_INTEGER_PRECISION 2
+#define CCTK_INT CCTK_INT2
+#endif
+
+#ifdef CCTK_INTEGER_PRECISION_1
+#define CCTK_INTEGER_PRECISION 1
+#define CCTK_INT CCTK_INT1
+#endif
+
+/* Complex precision */
+#ifdef CCTK_REAL_PRECISION_16
+#define CCTK_COMPLEX_PRECISION 32
+#define CCTK_COMPLEX CCTK_COMPLEX32
+#endif
+
+#ifdef CCTK_REAL_PRECISION_8
+#define CCTK_COMPLEX_PRECISION 16
+#define CCTK_COMPLEX CCTK_COMPLEX16
+#endif
+
+#ifdef CCTK_REAL_PRECISION_4
+#define CCTK_COMPLEX_PRECISION 8
+#define CCTK_COMPLEX CCTK_COMPLEX8
+#endif
+
+#endif /*_CCTK_TYPES_H_ */
+
--- a/AMSS_NCKU_source/AHF_Direct/config.h
+++ b/AMSS_NCKU_source/AHF_Direct/config.h
@@ -0,0 +1,16 @@
+#ifndef AHFINDERDIRECT__CONFIG_H
+#define AHFINDERDIRECT__CONFIG_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+size_t Util_Strlcat(char* dst, const char* src, size_t dst_size);
+size_t Util_Strlcpy(char* dst, const char* src, size_t dst_size);
+
+typedef CCTK_REAL fp;
+
+typedef CCTK_INT integer;
+
+#endif	/* AHFINDERDIRECT__CONFIG_H */
--- a/AMSS_NCKU_source/AHF_Direct/coords.C
+++ b/AMSS_NCKU_source/AHF_Direct/coords.C
@@ -0,0 +1,533 @@
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+
+#include "coords.h"
+
+namespace AHFinderDirect
+{
+	using jtutil::arctan_xy;
+	using jtutil::error_exit;
+	using jtutil::hypot3;
+	using jtutil::pow2;
+	using jtutil::signum;
+
+	namespace local_coords
+	{
+
+		bool fuzzy_EQ_ang(fp ang1, fp ang2)
+		{
+			return jtutil::fuzzy<fp>::is_integer((ang2 - ang1) / (2.0 * PI));
+		}
+
+		bool fuzzy_EQ_dang(fp dang1, fp dang2)
+		{
+			return jtutil::fuzzy<fp>::is_integer((dang2 - dang1) / 360.0);
+		}
+
+	}
+
+	namespace local_coords
+	{
+
+		fp modulo_reduce_ang(fp ang, fp min_ang, fp max_ang)
+		{
+			return jtutil::modulo_reduce(ang, 2.0 * PI, min_ang, max_ang);
+		}
+
+		fp modulo_reduce_dang(fp dang, fp min_dang, fp max_dang)
+		{
+			return jtutil::modulo_reduce(dang, 360.0, min_dang, max_dang);
+		}
+
+	}
+
+	namespace local_coords
+	{
+		void xyz_of_r_mu_nu(fp r, fp mu, fp nu, fp &x, fp &y, fp &z)
+		{
+			const fp sign_y = signum(sin(mu));
+			const fp sign_z_via_mu = signum(cos(mu));
+			assert(jtutil::fuzzy<fp>::NE(cos(mu), 0.0));
+			const fp y_over_z = tan(mu);
+
+			const fp sign_x = signum(sin(nu));
+			const fp sign_z_via_nu = signum(cos(nu));
+			assert(jtutil::fuzzy<fp>::NE(cos(nu), 0.0));
+			const fp x_over_z = tan(nu);
+
+			// failure of next assert() ==> inconsistent input (mu,nu)
+			assert(sign_z_via_mu == sign_z_via_nu);
+			const fp sign_z = sign_z_via_mu;
+
+			const fp temp = 1.0 / sqrt(1.0 + pow2(y_over_z) + pow2(x_over_z));
+
+			z = sign_z * r * temp;
+			x = x_over_z * z;
+			y = y_over_z * z;
+		}
+	}
+
+	namespace local_coords
+	{
+		void xyz_of_r_mu_phi(fp r, fp mu, fp phi, fp &x, fp &y, fp &z)
+		{
+			const fp mu_bar = 0.5 * PI - mu;
+			const fp phi_bar = 0.5 * PI - phi;
+
+			const fp sign_z = signum(sin(mu_bar));
+			const fp sign_y_via_mu_bar = signum(cos(mu_bar));
+			assert(jtutil::fuzzy<fp>::NE(cos(mu_bar), 0.0));
+			const fp z_over_y = tan(mu_bar);
+
+			const fp sign_x = signum(sin(phi_bar));
+			const fp sign_y_via_phi_bar = signum(cos(phi_bar));
+			assert(jtutil::fuzzy<fp>::NE(cos(phi_bar), 0.0));
+			const fp x_over_y = tan(phi_bar);
+
+			// failure of next assert() ==> inconsistent input (mu,phi)
+			assert(sign_y_via_mu_bar == sign_y_via_phi_bar);
+			const fp sign_y = sign_y_via_mu_bar;
+
+			const fp temp = 1.0 / sqrt(1.0 + pow2(z_over_y) + pow2(x_over_y));
+
+			y = sign_y * r * temp;
+			z = z_over_y * y;
+			x = x_over_y * y;
+		}
+	}
+	namespace local_coords
+	{
+		void xyz_of_r_nu_phi(fp r, fp nu, fp phi, fp &x, fp &y, fp &z)
+		{
+			const fp nu_bar = 0.5 * PI - nu;
+
+			const fp sign_z = signum(sin(nu_bar));
+			const fp sign_x_via_nu_bar = signum(cos(nu_bar));
+			assert(jtutil::fuzzy<fp>::NE(cos(nu_bar), 0.0));
+			const fp z_over_x = tan(nu_bar);
+
+			const fp sign_y = signum(sin(phi));
+			const fp sign_x_via_phi = signum(cos(phi));
+			assert(jtutil::fuzzy<fp>::NE(cos(phi), 0.0));
+			const fp y_over_x = tan(phi);
+
+			// failure of next assert() ==> inconsistent input (nu,phi)
+			assert(sign_x_via_nu_bar == sign_x_via_phi);
+			const fp sign_x = sign_x_via_nu_bar;
+
+			const fp temp = 1.0 / sqrt(1.0 + pow2(z_over_x) + pow2(y_over_x));
+
+			x = sign_x * r * temp;
+			z = z_over_x * x;
+			y = y_over_x * x;
+		}
+	}
+	namespace local_coords
+	{
+		fp phi_of_mu_nu(fp mu, fp nu)
+		{
+			fp x, y, z;
+			xyz_of_r_mu_nu(1.0, mu, nu, x, y, z);
+			return phi_of_xy(x, y);
+		}
+	}
+
+	namespace local_coords
+	{
+		fp nu_of_mu_phi(fp mu, fp phi)
+		{
+			fp x, y, z;
+			xyz_of_r_mu_phi(1.0, mu, phi, x, y, z);
+			return nu_of_xz(x, z);
+		}
+	}
+
+	//**************************************
+
+	// ill-conditioned near x axis
+	// not valid in yz plane (sin(nu) == 0 || cos(phi) == 0)
+	namespace local_coords
+	{
+		fp mu_of_nu_phi(fp nu, fp phi)
+		{
+			fp x, y, z;
+			xyz_of_r_nu_phi(1.0, nu, phi, x, y, z);
+			return mu_of_yz(y, z);
+		}
+	}
+
+	//******************************************************************************
+
+	namespace local_coords
+	{
+		fp r_of_xyz(fp x, fp y, fp z) { return hypot3(x, y, z); }
+		fp mu_of_yz(fp y, fp z) { return arctan_xy(z, y); }
+		fp nu_of_xz(fp x, fp z) { return arctan_xy(z, x); }
+		fp phi_of_xy(fp x, fp y) { return arctan_xy(x, y); }
+	}
+
+	namespace local_coords
+	{
+		void partial_xyz_wrt_r_mu_nu(fp r, fp mu, fp nu,
+									 fp &partial_x_wrt_r, fp &partial_x_wrt_mu, fp &partial_x_wrt_nu,
+									 fp &partial_y_wrt_r, fp &partial_y_wrt_mu, fp &partial_y_wrt_nu,
+									 fp &partial_z_wrt_r, fp &partial_z_wrt_mu, fp &partial_z_wrt_nu)
+		{
+			const fp tan_mu = tan(mu);
+			const fp tan_nu = tan(nu);
+			const fp tan2_mu = pow2(tan_mu);
+			const fp tan2_nu = pow2(tan_nu);
+
+			fp x, y, z;
+			xyz_of_r_mu_nu(r, mu, nu, x, y, z);
+
+			assert(jtutil::fuzzy<fp>::NE(r, 0.0));
+			const fp rinv = 1.0 / r;
+			partial_x_wrt_r = x * rinv;
+			partial_y_wrt_r = y * rinv;
+			partial_z_wrt_r = z * rinv;
+
+			const fp t = 1 + tan2_mu + tan2_nu; // = $r^2/z^2$
+			const fp partial_t_wrt_mu = 2.0 * tan_mu * (1.0 + tan2_mu);
+			const fp partial_t_wrt_nu = 2.0 * tan_nu * (1.0 + tan2_nu);
+
+			const fp r2_over_zt2 = (r * r) / (z * t * t);
+			partial_z_wrt_mu = -0.5 * r2_over_zt2 * partial_t_wrt_mu;
+			partial_z_wrt_nu = -0.5 * r2_over_zt2 * partial_t_wrt_nu;
+
+			partial_x_wrt_mu = tan_nu * partial_z_wrt_mu;
+			partial_x_wrt_nu = tan_nu * partial_z_wrt_nu + z * (1.0 + tan2_nu);
+			partial_y_wrt_mu = tan_mu * partial_z_wrt_mu + z * (1.0 + tan2_mu);
+			partial_y_wrt_nu = tan_mu * partial_z_wrt_nu;
+		}
+	}
+
+	//**************************************
+
+	namespace local_coords
+	{
+		void partial_xyz_wrt_r_mu_phi(fp r, fp mu, fp phi,
+									  fp &partial_x_wrt_r, fp &partial_x_wrt_mu, fp &partial_x_wrt_phi,
+									  fp &partial_y_wrt_r, fp &partial_y_wrt_mu, fp &partial_y_wrt_phi,
+									  fp &partial_z_wrt_r, fp &partial_z_wrt_mu, fp &partial_z_wrt_phi)
+		{
+			const fp mu_bar = 0.5 * PI - mu;
+			const fp phi_bar = 0.5 * PI - phi;
+
+			const fp tan_mu_bar = tan(mu_bar);
+			const fp tan_phi_bar = tan(phi_bar);
+			const fp tan2_mu_bar = pow2(tan_mu_bar);
+			const fp tan2_phi_bar = pow2(tan_phi_bar);
+
+			fp x, y, z;
+			xyz_of_r_mu_phi(r, mu, phi, x, y, z);
+
+			assert(jtutil::fuzzy<fp>::NE(r, 0.0));
+			const fp rinv = 1.0 / r;
+			partial_x_wrt_r = x * rinv;
+			partial_y_wrt_r = y * rinv;
+			partial_z_wrt_r = z * rinv;
+
+			const fp t = 1 + tan2_mu_bar + tan2_phi_bar; // = $r^2/y^2$
+			const fp partial_t_wrt_mu_bar = 2.0 * tan_mu_bar * (1.0 + tan2_mu_bar);
+			const fp partial_t_wrt_phi_bar = 2.0 * tan_phi_bar * (1.0 + tan2_phi_bar);
+
+			const fp r2_over_yt2 = (r * r) / (y * t * t);
+			partial_y_wrt_mu = 0.5 * r2_over_yt2 * partial_t_wrt_mu_bar;
+			partial_y_wrt_phi = 0.5 * r2_over_yt2 * partial_t_wrt_phi_bar;
+
+			partial_x_wrt_mu = tan_phi_bar * partial_y_wrt_mu;
+			partial_x_wrt_phi = tan_phi_bar * partial_y_wrt_phi - y * (1.0 + tan2_phi_bar);
+			partial_z_wrt_mu = tan_mu_bar * partial_y_wrt_mu - y * (1.0 + tan2_mu_bar);
+			partial_z_wrt_phi = tan_mu_bar * partial_y_wrt_phi;
+		}
+	}
+
+	//**************************************
+
+	namespace local_coords
+	{
+		void partial_xyz_wrt_r_nu_phi(fp r, fp nu, fp phi,
+									  fp &partial_x_wrt_r, fp &partial_x_wrt_nu, fp &partial_x_wrt_phi,
+									  fp &partial_y_wrt_r, fp &partial_y_wrt_nu, fp &partial_y_wrt_phi,
+									  fp &partial_z_wrt_r, fp &partial_z_wrt_nu, fp &partial_z_wrt_phi)
+		{
+			const fp nu_bar = 0.5 * PI - nu;
+
+			const fp tan_nu_bar = tan(nu_bar);
+			const fp tan_phi = tan(phi);
+			const fp tan2_nu_bar = pow2(tan_nu_bar);
+			const fp tan2_phi = pow2(tan_phi);
+
+			fp x, y, z;
+			xyz_of_r_nu_phi(r, nu, phi, x, y, z);
+
+			assert(jtutil::fuzzy<fp>::NE(r, 0.0));
+			const fp rinv = 1.0 / r;
+			partial_x_wrt_r = x * rinv;
+			partial_y_wrt_r = y * rinv;
+			partial_z_wrt_r = z * rinv;
+
+			const fp t = 1 + tan2_nu_bar + tan2_phi; // = $r^2/x^2$
+			const fp partial_t_wrt_nu_bar = 2.0 * tan_nu_bar * (1.0 + tan2_nu_bar);
+			const fp partial_t_wrt_phi = 2.0 * tan_phi * (1.0 + tan2_phi);
+
+			const fp r2_over_xt2 = (r * r) / (x * t * t);
+			partial_x_wrt_nu = 0.5 * r2_over_xt2 * partial_t_wrt_nu_bar;
+			partial_x_wrt_phi = -0.5 * r2_over_xt2 * partial_t_wrt_phi;
+
+			partial_y_wrt_nu = tan_phi * partial_x_wrt_nu;
+			partial_y_wrt_phi = tan_phi * partial_x_wrt_phi + x * (1.0 + tan2_phi);
+			partial_z_wrt_nu = tan_nu_bar * partial_x_wrt_nu - x * (1.0 + tan2_nu_bar);
+			partial_z_wrt_phi = tan_nu_bar * partial_x_wrt_phi;
+		}
+	}
+
+	//******************************************************************************
+
+	//
+	// these functions compute the partial derivatives
+	//	partial {mu,nu,phi} / partial {x,y,z}
+	// as computed by the maple file "coord_derivs.{maple,out}" in this directory
+	//
+	namespace local_coords
+	{
+		fp partial_mu_wrt_y(fp y, fp z) { return z / (y * y + z * z); }
+		fp partial_mu_wrt_z(fp y, fp z) { return -y / (y * y + z * z); }
+
+		fp partial_nu_wrt_x(fp x, fp z) { return z / (x * x + z * z); }
+		fp partial_nu_wrt_z(fp x, fp z) { return -x / (x * x + z * z); }
+
+		fp partial_phi_wrt_x(fp x, fp y) { return -y / (x * x + y * y); }
+		fp partial_phi_wrt_y(fp x, fp y) { return x / (x * x + y * y); }
+	}
+
+	//******************************************************************************
+
+	//
+	// these functions compute the 2nd partial derivatives
+	//	partial {mu,nu,phi} / partial {xx,xy,xz,yy,yz,zz}
+	// as computed by the maple file "coord_derivs.{maple,out}" in this directory
+	//
+	namespace local_coords
+	{
+		fp partial2_mu_wrt_yy(fp y, fp z) { return -2.0 * y * z / pow2(y * y + z * z); }
+		fp partial2_mu_wrt_yz(fp y, fp z) { return (y * y - z * z) / pow2(y * y + z * z); }
+		fp partial2_mu_wrt_zz(fp y, fp z) { return 2.0 * y * z / pow2(y * y + z * z); }
+
+		fp partial2_nu_wrt_xx(fp x, fp z) { return -2.0 * x * z / pow2(x * x + z * z); }
+		fp partial2_nu_wrt_xz(fp x, fp z) { return (x * x - z * z) / pow2(x * x + z * z); }
+		fp partial2_nu_wrt_zz(fp x, fp z) { return 2.0 * x * z / pow2(x * x + z * z); }
+
+		fp partial2_phi_wrt_xx(fp x, fp y) { return 2.0 * x * y / pow2(x * x + y * y); }
+		fp partial2_phi_wrt_xy(fp x, fp y) { return (y * y - x * x) / pow2(x * x + y * y); }
+		fp partial2_phi_wrt_yy(fp x, fp y) { return -2.0 * x * y / pow2(x * x + y * y); }
+	}
+
+	namespace local_coords
+	{
+		void xyz_of_r_theta_phi(fp r, fp theta, fp phi, fp &x, fp &y, fp &z)
+		{
+			z = r * cos(theta);
+			x = r * sin(theta) * cos(phi);
+			y = r * sin(theta) * sin(phi);
+		}
+	}
+
+	//**************************************
+
+	namespace local_coords
+	{
+		void r_theta_phi_of_xyz(fp x, fp y, fp z, fp &r, fp &theta, fp &phi)
+		{
+			r = r_of_xyz(x, y, z);
+			theta = theta_of_xyz(x, y, z);
+			phi = phi_of_xy(x, y);
+		}
+	}
+
+	//**************************************
+
+	namespace local_coords
+	{
+		fp theta_of_xyz(fp x, fp y, fp z)
+		{
+			return arctan_xy(z, hypot(x, y));
+		}
+	}
+
+	//******************************************************************************
+
+	//
+	// these functions convert ((mu,nu,phi)) <--> usual polar spherical (theta,phi)
+	// ... note phi is the same coordinate in both systems
+	//
+
+	namespace local_coords
+	{
+		void theta_phi_of_mu_nu(fp mu, fp nu, fp &ps_theta, fp &ps_phi)
+		{
+			fp x, y, z;
+			xyz_of_r_mu_nu(1.0, mu, nu, x, y, z);
+
+			ps_theta = theta_of_xyz(x, y, z);
+			ps_phi = phi_of_xy(x, y);
+		}
+	}
+
+	//**************************************
+
+	// Bugs: computes ps_phi via trig, even though it's trivially == phi
+	namespace local_coords
+	{
+		void theta_phi_of_mu_phi(fp mu, fp phi, fp &ps_theta, fp &ps_phi)
+		{
+			fp x, y, z;
+			xyz_of_r_mu_phi(1.0, mu, phi, x, y, z);
+
+			ps_theta = theta_of_xyz(x, y, z);
+			ps_phi = phi_of_xy(x, y);
+			assert(fuzzy_EQ_ang(ps_phi, phi));
+		}
+	}
+
+	//**************************************
+
+	// Bugs: computes ps_phi via trig, even though it's trivially == phi
+	namespace local_coords
+	{
+		void theta_phi_of_nu_phi(fp nu, fp phi, fp &ps_theta, fp &ps_phi)
+		{
+			fp x, y, z;
+			xyz_of_r_nu_phi(1.0, nu, phi, x, y, z);
+
+			ps_theta = theta_of_xyz(x, y, z);
+			ps_phi = phi_of_xy(x, y);
+			assert(fuzzy_EQ_ang(ps_phi, phi));
+		}
+	}
+
+	//******************************************************************************
+
+	namespace local_coords
+	{
+		void mu_nu_of_theta_phi(fp ps_theta, fp ps_phi, fp &mu, fp &nu)
+		{
+			fp x, y, z;
+			xyz_of_r_theta_phi(1.0, ps_theta, ps_phi, x, y, z);
+
+			mu = mu_of_yz(y, z);
+			nu = nu_of_xz(x, z);
+		}
+	}
+
+	//**************************************
+
+	// Bugs: computes phi via trig, even though it's trivially == ps_phi
+	namespace local_coords
+	{
+		void mu_phi_of_theta_phi(fp ps_theta, fp ps_phi, fp &mu, fp &phi)
+		{
+			fp x, y, z;
+			xyz_of_r_theta_phi(1.0, ps_theta, ps_phi, x, y, z);
+
+			mu = mu_of_yz(y, z);
+			phi = phi_of_xy(x, y);
+			assert(fuzzy_EQ_ang(phi, ps_phi));
+		}
+	}
+
+	//**************************************
+
+	// Bugs: computes phi via trig, even though it's trivially == ps_phi
+	namespace local_coords
+	{
+		void nu_phi_of_theta_phi(fp ps_theta, fp ps_phi, fp &nu, fp &phi)
+		{
+			fp x, y, z;
+			xyz_of_r_theta_phi(1.0, ps_theta, ps_phi, x, y, z);
+
+			nu = nu_of_xz(x, z);
+			phi = phi_of_xy(x, y);
+			assert(fuzzy_EQ_ang(phi, ps_phi));
+		}
+	}
+
+	//******************************************************************************
+
+	//
+	// these functions convert ((mu,nu,phi)) to the direction cosines
+	// (xcos,ycos,zcos)
+	//
+
+	namespace local_coords
+	{
+		void xyzcos_of_mu_nu(fp mu, fp nu, fp &xcos, fp &ycos, fp &zcos)
+		{
+			xyz_of_r_mu_nu(1.0, mu, nu, xcos, ycos, zcos);
+		}
+	}
+
+	namespace local_coords
+	{
+		void xyzcos_of_mu_phi(fp mu, fp phi, fp &xcos, fp &ycos, fp &zcos)
+		{
+			xyz_of_r_mu_phi(1.0, mu, phi, xcos, ycos, zcos);
+		}
+	}
+
+	namespace local_coords
+	{
+		void xyzcos_of_nu_phi(fp nu, fp phi, fp &xcos, fp &ycos, fp &zcos)
+		{
+			xyz_of_r_nu_phi(1.0, nu, phi, xcos, ycos, zcos);
+		}
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+	//
+	// This function computes a human-readable name from a (mu,nu,phi)
+	// coordinates set.
+	//
+	const char *local_coords::name_of_coords_set(coords_set S)
+	{
+		//
+		// we have to use an if-else chain because the  local_coords::set_*
+		// constants aren't compile-time constants and hence aren't eligible
+		// to be switch case labels
+		//
+		if (S == coords_set_empty)
+			then return "{}";
+		else if (S == coords_set_mu)
+			then return "mu";
+		else if (S == coords_set_nu)
+			then return "nu";
+		else if (S == coords_set_phi)
+			then return "phi";
+		else if (S == coords_set_mu | coords_set_nu)
+			then return "{mu,nu}";
+		else if (S == coords_set_mu | coords_set_phi)
+			then return "{mu,phi}";
+		else if (S == coords_set_nu | coords_set_phi)
+			then return "{nu,phi}";
+		else if (S == coords_set_mu | coords_set_nu | coords_set_phi)
+			then return "{mu,nu,phi}";
+		else
+			error_exit(PANIC_EXIT,
+					   "***** local_coords::mu_nu_phi::name_of_coords_set:\n"
+					   "        S=0x%x isn't a valid  coords_set  bit vector!\n",
+					   int(S)); /*NOTREACHED*/
+	}
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/coords.h
+++ b/AMSS_NCKU_source/AHF_Direct/coords.h
@@ -0,0 +1,173 @@
+#ifndef COORDS_H
+#define COORDS_H
+namespace AHFinderDirect
+{
+	namespace local_coords
+	{
+
+		// compare if two angles are fuzzily equal mod 2*pi radians (360 degrees)
+		bool fuzzy_EQ_ang(fp ang1, fp ang2);	// radians
+		bool fuzzy_EQ_dang(fp dang1, fp dang2); // degrees
+
+		// modulo-reduce  {ang,dang}  to be (fuzzily) within the range
+		// [min,max]_{ang,dang}, or error_exit() if no such value exists
+		fp modulo_reduce_ang(fp ang, fp min_ang, fp max_ang);
+		fp modulo_reduce_dang(fp dang, fp min_dang, fp max_dang);
+
+	} // close namespace local_coords::
+
+	namespace local_coords
+	{
+		// (r,(mu,nu,phi)) <--> (x,y,z)
+		void xyz_of_r_mu_nu(fp r, fp mu, fp nu, fp &x, fp &y, fp &z);
+		void xyz_of_r_mu_phi(fp r, fp mu, fp phi, fp &x, fp &y, fp &z);
+		void xyz_of_r_nu_phi(fp r, fp nu, fp phi, fp &x, fp &y, fp &z);
+		fp r_of_xyz(fp x, fp y, fp z);
+		fp mu_of_yz(fp y, fp z);
+		fp nu_of_xz(fp x, fp z);
+		fp phi_of_xy(fp x, fp y);
+
+		// ((mu,nu,phi)) --> the 3rd
+		fp phi_of_mu_nu(fp mu, fp nu);
+		fp nu_of_mu_phi(fp mu, fp phi);
+		fp mu_of_nu_phi(fp nu, fp phi);
+
+		// partial {x,y,z} / partial {mu,nu,phi}
+		void partial_xyz_wrt_r_mu_nu(fp r, fp mu, fp nu,
+									 fp &partial_x_wrt_r, fp &partial_x_wrt_mu, fp &partial_x_wrt_nu,
+									 fp &partial_y_wrt_r, fp &partial_y_wrt_mu, fp &partial_y_wrt_nu,
+									 fp &partial_z_wrt_r, fp &partial_z_wrt_mu, fp &partial_z_wrt_nu);
+		void partial_xyz_wrt_r_mu_phi(fp r, fp mu, fp phi,
+									  fp &partial_x_wrt_r, fp &partial_x_wrt_mu, fp &partial_x_wrt_phi,
+									  fp &partial_y_wrt_r, fp &partial_y_wrt_mu, fp &partial_y_wrt_phi,
+									  fp &partial_z_wrt_r, fp &partial_z_wrt_mu, fp &partial_z_wrt_phi);
+		void partial_xyz_wrt_r_nu_phi(fp r, fp nu, fp phi,
+									  fp &partial_x_wrt_r, fp &partial_x_wrt_nu, fp &partial_x_wrt_phi,
+									  fp &partial_y_wrt_r, fp &partial_y_wrt_nu, fp &partial_y_wrt_phi,
+									  fp &partial_z_wrt_r, fp &partial_z_wrt_nu, fp &partial_z_wrt_phi);
+
+		// partial {mu,nu,phi} / partial {x,y,z}
+		fp partial_mu_wrt_y(fp y, fp z);
+		fp partial_mu_wrt_z(fp y, fp z);
+		fp partial_nu_wrt_x(fp x, fp z);
+		fp partial_nu_wrt_z(fp x, fp z);
+		fp partial_phi_wrt_x(fp x, fp y);
+		fp partial_phi_wrt_y(fp x, fp y);
+
+		// partial^2 {mu,nu,phi} / partial {x,y,z}{x,y,z}
+		fp partial2_mu_wrt_yy(fp y, fp z);
+		fp partial2_mu_wrt_yz(fp y, fp z);
+		fp partial2_mu_wrt_zz(fp y, fp z);
+		fp partial2_nu_wrt_xx(fp x, fp z);
+		fp partial2_nu_wrt_xz(fp x, fp z);
+		fp partial2_nu_wrt_zz(fp x, fp z);
+		fp partial2_phi_wrt_xx(fp x, fp y);
+		fp partial2_phi_wrt_xy(fp x, fp y);
+		fp partial2_phi_wrt_yy(fp x, fp y);
+
+		// usual polar spherical (r,theta,phi) <--> (x,y,z)
+		void xyz_of_r_theta_phi(fp r, fp theta, fp phi, fp &x, fp &y, fp &z);
+		void r_theta_phi_of_xyz(fp x, fp y, fp z, fp &r, fp &theta, fp &phi);
+		// ... already have r_of_xyz()
+		// ... already have phi_of_xy()
+		fp theta_of_xyz(fp x, fp y, fp z);
+
+		// ((mu,nu,phi)) <--> usual polar spherical (theta,phi)
+		// ... note phi is the same coordinate in both systems
+		void theta_phi_of_mu_nu(fp mu, fp nu, fp &ps_theta, fp &ps_phi);
+		void theta_phi_of_mu_phi(fp mu, fp phi, fp &ps_theta, fp &ps_phi);
+		void theta_phi_of_nu_phi(fp nu, fp phi, fp &ps_theta, fp &ps_phi);
+		void mu_nu_of_theta_phi(fp ps_theta, fp ps_phi, fp &mu, fp &nu);
+		void mu_phi_of_theta_phi(fp ps_theta, fp ps_phi, fp &mu, fp &phi);
+		void nu_phi_of_theta_phi(fp ps_theta, fp ps_phi, fp &nu, fp &phi);
+
+		// ((mu,nu,phi)) --> direction cosines (xcos,ycos,zcos)
+		void xyzcos_of_mu_nu(fp mu, fp nu, fp &xcos, fp &ycos, fp &zcos);
+		void xyzcos_of_mu_phi(fp mu, fp phi, fp &xcos, fp &ycos, fp &zcos);
+		void xyzcos_of_nu_phi(fp nu, fp phi, fp &xcos, fp &ycos, fp &zcos);
+	} // close namespace local_coords::
+
+	//*****************************************************************************
+
+	//
+	// ***** bit masks for coordinates ****
+	//
+
+	//
+	// We need to manipulate coordinates to do calculations like "which
+	// coordinate do these two patches have in common".  We do these by
+	// Boolean operations on integers using the following bit masks:
+	//
+
+	namespace local_coords
+	{
+
+		typedef int coords_set;
+
+		enum
+		{
+			coords_set_mu = 0x1,
+			coords_set_nu = 0x2,
+			coords_set_phi = 0x4,
+
+			coords_set_empty = 0x0,
+			coords_set_all = coords_set_mu | coords_set_nu | coords_set_phi // no comma
+		};
+
+		// human-readable coordinate names for debugging etc
+		const char *name_of_coords_set(coords_set S);
+
+		// set complement of coordinates
+		inline coords_set coords_set_not(coords_set S)
+		{
+			return coords_set_all & ~S;
+		}
+
+	} // close namespace local_coords::
+
+	//******************************************************************************
+
+	//
+	// This class stores the origin point of our local coordinates, and
+	// provides conversions between local and global coordinates.
+	//
+	class global_coords
+	{
+	public:
+		// get global (x,y,z) coordinates of local origin point
+		fp origin_x() const { return origin_x_; }
+		fp origin_y() const { return origin_y_; }
+		fp origin_z() const { return origin_z_; }
+
+		// constructor: specify global (x,y,z) coordinates of local origin point
+		global_coords(fp origin_x_in, fp origin_y_in, fp origin_z_in)
+			: origin_x_(origin_x_in),
+			  origin_y_(origin_y_in),
+			  origin_z_(origin_z_in)
+		{
+		}
+		// destructor: compiler-generated no-op is ok
+
+		void recentering(fp x, fp y, fp z)
+		{
+			origin_x_ = x;
+			origin_y_ = y;
+			origin_z_ = z;
+		}
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		global_coords(const global_coords &rhs);
+		global_coords &operator=(const global_coords &rhs);
+
+	private:
+		// global (x,y,z) coordinates of local origin point
+		fp origin_x_, origin_y_, origin_z_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /*  COORDS_H  */
--- a/AMSS_NCKU_source/AHF_Direct/cpm_map.C
+++ b/AMSS_NCKU_source/AHF_Direct/cpm_map.C
@@ -0,0 +1,93 @@
+#include <assert.h>
+#include <stdio.h>
+
+#include "stdc.h"
+#include "util.h"
+#include "cpm_map.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename fp_t>
+		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
+							   fp_t fixed_point)
+			: min_i_(min_i_in), max_i_(max_i_in),
+			  map_is_plus_(false)
+		{
+			const fp_t d_offset = 2.0 * fixed_point;
+			if (!fuzzy<fp_t>::is_integer(d_offset))
+				then error_exit(ERROR_EXIT,
+								"***** cpm_map::cpm_map (mirror):\n"
+								"        fixed_point=%g isn't (fuzzily) integral or half-integral!\n",
+								double(fixed_point)); /*NOTREACHED*/
+
+			offset_ = round<fp_t>::to_integer(d_offset);
+
+			assert(
+				map_unchecked(fuzzy<fp_t>::floor(fixed_point)) ==
+				fuzzy<fp_t>::ceiling(fixed_point));
+		}
+
+		//******************************************************************************
+
+		//
+		// This function constructs a generic  cpm_map  object, with the mapping
+		// specified by a sample point  sample_i --> sample_j  and by sign.
+		// The sample point need not be in the map's domain/range.
+		//
+		template <typename fp_t>
+		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
+							   int sample_i, int sample_j,
+							   bool map_is_plus_in)
+			: min_i_(min_i_in), max_i_(max_i_in),
+			  offset_(map_is_plus_in ? sample_j - sample_i
+									 : sample_j + sample_i),
+			  map_is_plus_(map_is_plus_in)
+		{
+			assert(map_unchecked(sample_i) == sample_j);
+		}
+
+		//******************************************************************************
+
+		//
+		// This function constructs a generic  cpm_map  object, with the mapping
+		// specified by a *fp* sample point  sample_i --> sample_j  (which
+		// must specify an  integer --> integer  mapping, i.e.  4.2 --> 4.2  is
+		// ok for a + map, and 4.5 --> 4.5 is ok for a minus map, but  4.2 --> 4.7
+		// is never ok) and by sign.  The sample point need not be in the map's
+		// domain/range.
+		//
+		template <typename fp_t>
+		cpm_map<fp_t>::cpm_map(int min_i_in, int max_i_in,
+							   fp_t sample_i, fp_t sample_j,
+							   bool map_is_plus_in)
+			: min_i_(min_i_in), max_i_(max_i_in),
+			  map_is_plus_(map_is_plus_in)
+		{
+			const fp_t fp_offset = map_is_plus_in ? sample_j - sample_i
+												  : sample_j + sample_i;
+			if (!fuzzy<fp_t>::is_integer(fp_offset))
+				then error_exit(ERROR_EXIT,
+								"***** cpm_map::cpm_map (generic via fp sample point):\n"
+								"        fp_offset=%g isn't fuzzily integral!\n"
+								"        ==> sample_i=%g --> sample_j=%g\n"
+								"            doesn't fuzzily specify an  integer --> integer  mapping!\n",
+								double(fp_offset),
+								double(sample_i), double(sample_j)); /*NOTREACHED*/
+
+			offset_ = round<fp_t>::to_integer(fp_offset);
+
+			// verify that we have setup correct
+			assert(
+				map_unchecked(fuzzy<fp_t>::floor(sample_i)) ==
+				(map_is_plus_in ? fuzzy<fp_t>::floor(sample_j)
+								: fuzzy<fp_t>::ceiling(sample_j)));
+		}
+
+		template class cpm_map<float>;
+		template class cpm_map<double>;
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/cpm_map.h
+++ b/AMSS_NCKU_source/AHF_Direct/cpm_map.h
@@ -0,0 +1,120 @@
+#ifndef AHFINDERDIRECT__CPM_MAP_HH
+#define AHFINDERDIRECT__CPM_MAP_HH
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename fp_t>
+		class cpm_map
+		{
+		public:
+			// bounds info -- domain
+			int min_i() const { return min_i_; }
+			int max_i() const { return max_i_; }
+			int N_points() const
+			{
+				return jtutil::how_many_in_range(min_i_, max_i_);
+			}
+			bool in_domain(int i) const { return (i >= min_i_) && (i <= max_i_); }
+
+			// is the mapping + or - ?
+			bool is_plus() const { return map_is_plus_; }
+			bool is_minus() const { return !map_is_plus_; }
+			int sign() const { return map_is_plus_ ? +1 : -1; }
+			fp_t fp_sign() const { return map_is_plus_ ? +1.0 : -1.0; }
+
+			// the mapping itself
+			int map_unchecked(int i) const
+			{
+				return map_is_plus_ ? offset_ + i
+									: offset_ - i;
+			}
+			int inv_map_unchecked(int j) const
+			{
+				return map_is_plus_ ? j - offset_
+									: offset_ - j;
+			}
+			int map(int i) const
+			{
+				assert(in_domain(i));
+				return map_unchecked(i);
+			}
+			int inv_map(int j) const
+			{
+				int i = inv_map_unchecked(j);
+				assert(in_domain(i));
+				return i;
+			}
+
+			// bounds info -- range
+			// ... we use the unchecked map here in case the domain is empty
+			int min_j() const
+			{
+				return map_is_plus_ ? map_unchecked(min_i_)
+									: map_unchecked(max_i_);
+			}
+			int max_j() const
+			{
+				return map_is_plus_ ? map_unchecked(max_i_)
+									: map_unchecked(min_i_);
+			}
+			bool in_range(int j) const { return in_domain(inv_map_unchecked(j)); }
+
+			//
+			// constructors
+			//
+
+			// "mirror" map: i --> const - i
+			// ... map specified by fixed point (must be integer or half-integer)
+			// ... fixed point need not be in domain/range
+			cpm_map(int min_i_in, int max_i_in,
+					fp_t fixed_point);
+
+			// "shift" map: i --> const + i
+			// ... map specified by shift amount
+			// ... default is identity map
+			cpm_map(int min_i_in, int max_i_in,
+					int shift_amount = 0)
+				: min_i_(min_i_in), max_i_(max_i_in),
+				  offset_(shift_amount), map_is_plus_(true)
+			{
+			}
+
+			// generic map: i --> const +/- i
+			// ... map specified by sample point sample_i --> sample_j
+			//     and by sign (one of  {plus,minus}_map )
+			// ... sample point need not be in domain/range
+			cpm_map(int min_i_in, int max_i_in,
+					int sample_i, int sample_j,
+					bool map_is_plus_in);
+
+			// generic map: i --> const +/- i
+			// ... map specified by *fp* sample point sample_i --> sample_j
+			//     (must specify an integer --> integer mapping)
+			//     and by sign (one of  {plus,minus}_map )
+			// ... hence if sign is -1, then sample_i and sample_j
+			//     must both be half-integral
+			// ... sample point need *not* be in domain/range
+			cpm_map(int min_i_in, int max_i_in,
+					fp_t sample_i, fp_t sample_j,
+					bool map_is_plus_in);
+
+			// no need for explicit destructor, compiler-generated no-op is ok
+			// ditto for copy constructor and assignment operator
+
+		private:
+			// bounds (inclusive)
+			int min_i_, max_i_;
+
+			// these define the actual mapping
+			int offset_;
+			bool map_is_plus_;
+		};
+
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
+
+#endif /* AHFINDERDIRECT__CPM_MAP_HH */
--- a/AMSS_NCKU_source/AHF_Direct/driver.h
+++ b/AMSS_NCKU_source/AHF_Direct/driver.h
@@ -0,0 +1,108 @@
+#ifndef DRIVER_H
+#define DRIVER_H
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+
+namespace AHFinderDirect
+{
+	struct iteration_status_buffers
+	{
+		int *hn_buffer;
+		int *iteration_buffer;
+		enum expansion_status *expansion_status_buffer;
+		fp *mean_horizon_radius_buffer;
+		fp *Theta_infinity_norm_buffer;
+		bool *found_horizon_buffer;
+
+		jtutil::array2d<CCTK_REAL> *send_buffer_ptr;
+		jtutil::array2d<CCTK_REAL> *receive_buffer_ptr;
+
+		iteration_status_buffers()
+			: hn_buffer(NULL), iteration_buffer(NULL),
+			  expansion_status_buffer(NULL),
+			  mean_horizon_radius_buffer(NULL),
+			  Theta_infinity_norm_buffer(NULL),
+			  found_horizon_buffer(NULL),
+			  send_buffer_ptr(NULL), receive_buffer_ptr(NULL)
+		{
+		}
+	};
+
+	//
+	// This struct holds interprocessor-communication buffers for broadcasting
+	// the BH diagnostics and horizon shape from the processor which finds a
+	// given horizon, to all processors.
+	//
+	struct horizon_buffers
+	{
+		int N_buffer;
+		double *send_buffer;
+		double *receive_buffer;
+
+		horizon_buffers()
+			: N_buffer(0),
+			  send_buffer(NULL),
+			  receive_buffer(NULL)
+		{
+		}
+	};
+	//
+	struct AH_data
+	{
+		patch_system *ps_ptr;
+		Jacobian *Jac_ptr;
+		double surface_expansion;
+
+		bool initial_find_flag;
+		bool recentering_flag, stop_finding, find_trigger;
+
+		bool found_flag; // did we find this horizon (successfully)
+
+		struct BH_diagnostics BH_diagnostics;
+		FILE *BH_diagnostics_fileptr;
+
+		// interprocessor-communication buffers
+		// for this horizon's BH diagnostics and (optionally) horizon shape
+		struct horizon_buffers horizon_buffers;
+	};
+
+	// initial_guess.cc
+	void setup_initial_guess(patch_system &ps,
+							 fp x_center, fp y_center, fp z_center,
+							 fp x_radius, fp y_radius, fp z_radius);
+
+	// Newton.cc
+	void Newton(int N_procs, int N_active_procs, int my_proc,
+				horizon_sequence &hs, struct AH_data *const AH_data_array[],
+				struct iteration_status_buffers &isb, int *dumpid, double *);
+
+} // namespace AHFinderDirect
+#endif /*     DRIVER_H    */
--- a/AMSS_NCKU_source/AHF_Direct/error_exit.C
+++ b/AMSS_NCKU_source/AHF_Direct/error_exit.C
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+		int error_exit(int msg_level, const char *format, ...)
+		{
+			const int N_buffer = 2000;
+			char buffer[N_buffer];
+
+			va_list ap;
+			va_start(ap, format);
+			vsnprintf(buffer, N_buffer, format, ap);
+			va_end(ap);
+
+			const int len = strlen(buffer);
+			if ((len > 0) && (buffer[len - 1] == '\n'))
+				then buffer[len - 1] = '\0';
+
+			CCTK_VWarn(msg_level, __LINE__, __FILE__, CCTK_THORNSTRING, "%s", buffer);
+
+			// if we got here, evidently  msg_level  wasn't drastic enough
+			abort(); /*NOTREACHED*/
+		}
+
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/expansion.C
+++ b/AMSS_NCKU_source/AHF_Direct/expansion.C
--- a/AMSS_NCKU_source/AHF_Direct/expansion_Jacobian.C
+++ b/AMSS_NCKU_source/AHF_Direct/expansion_Jacobian.C
@@ -0,0 +1,386 @@
+
+
+#include "macrodef.h"
+#ifdef With_AHF
+
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+namespace AHFinderDirect
+{
+	using jtutil::error_exit;
+
+	namespace
+	{
+
+		void expansion_Jacobian_partial_SD(patch_system &ps, Jacobian &Jac,
+										   bool print_msg_flag);
+
+		void add_ghost_zone_Jacobian(const patch_system &ps,
+									 Jacobian &Jac,
+									 fp mol,
+									 const patch &xp, const ghost_zone &xmgz,
+									 int x_II,
+									 int xm_irho, int xm_isigma);
+
+		enum expansion_status
+		expansion_Jacobian_dr_FD(patch_system *ps_ptr, Jacobian *Jac_ptr, fp add_to_expansion,
+								 bool initial_flag,
+								 bool print_msg_flag);
+	}
+
+	//******************************************************************************
+
+	//
+	// If ps_ptr != NULL and Jac_ptr != NULL, this function computes the
+	// Jacobian matrix J[Theta(h)] of the expansion Theta(h).  We assume
+	// that Theta(h) has already been computed.
+	//
+	// If ps_ptr == NULL and Jac_ptr == NULL, this function does a dummy
+	// computation, in which only any expansion() (and hence geometry
+	// interpolator) calls are done, these with the number of interpolation
+	// points set to 0 and all the output array pointers set to NULL.
+	//
+	// It's illegal for one but not both of ps_ptr and Jac_ptr to be NULL.
+	//
+	// Arguments:
+	// ps_ptr --> The patch system, or == NULL to do (only) a dummy computation.
+	// Jac_ptr --> The Jacobian, or == NULL to do (only) a dummy computation.
+	// add_to_expansion = A real number to add to the expansion.
+	//
+	// Results:
+	// This function returns a status code indicating whether the computation
+	// succeeded or failed, and if the latter, what caused the failure.
+	//
+	enum expansion_status
+	expansion_Jacobian(patch_system *ps_ptr, Jacobian *Jac_ptr,
+					   fp add_to_expansion,
+					   bool initial_flag,
+					   bool print_msg_flag /* = false */)
+	{
+		const bool active_flag = (ps_ptr != NULL) && (Jac_ptr != NULL);
+		enum expansion_status status;
+
+		if (active_flag)
+			then expansion_Jacobian_partial_SD(*ps_ptr, *Jac_ptr,
+											   print_msg_flag);
+		// this function looks at ps_ptr and Jac_ptr (non-NULL vs NULL)
+		// to choose a normal vs dummy computation
+		{
+			status = expansion_Jacobian_dr_FD(ps_ptr, Jac_ptr, add_to_expansion,
+											  initial_flag,
+											  print_msg_flag);
+			if (status != expansion_success)
+				then return status; // *** ERROR RETURN ***
+		}
+
+		return expansion_success; // *** NORMAL RETURN ***
+	}
+	//
+	// This function computes the partial derivative terms in the Jacobian
+	// matrix of the expansion Theta(h), by symbolic differentiation from
+	// the Jacobian coefficient (angular) gridfns.  The Jacobian is traversed
+	// by rows, using equation (25) of my 1996 apparent horizon finding paper.
+	//
+	// Inputs (angular gridfns, on ghosted grid):
+	//	h			# shape of trial surface
+	//	Theta			# Theta(h) assumed to already be computed
+	//	partial_Theta_wrt_partial_d_h	# Jacobian coefficients
+	//	partial_Theta_wrt_partial_dd_h	# (also assumed to already be computed)
+	//
+	// Outputs:
+	//	The Jacobian matrix is stored in the Jacobian object Jac.
+	//
+	namespace
+	{
+		void expansion_Jacobian_partial_SD(patch_system &ps, Jacobian &Jac,
+										   bool print_msg_flag)
+		{
+			Jac.zero_matrix();
+			ps.compute_synchronize_Jacobian();
+
+			for (int xpn = 0; xpn < ps.N_patches(); ++xpn)
+			{
+				patch &xp = ps.ith_patch(xpn);
+
+				for (int x_irho = xp.min_irho(); x_irho <= xp.max_irho(); ++x_irho)
+				{
+					for (int x_isigma = xp.min_isigma(); x_isigma <= xp.max_isigma(); ++x_isigma)
+					{
+						//
+						// compute the main Jacobian terms for this grid point, i.e.
+						//	partial Theta(this point x, Jacobian row II)
+						//	---------------------------------------------
+						//	partial h(other points y, Jacobian column JJ)
+						//
+
+						// Jacobian row index
+						const int II = ps.gpn_of_patch_irho_isigma(xp, x_irho, x_isigma);
+
+						// Jacobian coefficients for this point
+						const fp Jacobian_coeff_rho = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_d_h_1,
+																x_irho, x_isigma);
+						const fp Jacobian_coeff_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_d_h_2,
+																  x_irho, x_isigma);
+						const fp Jacobian_coeff_rho_rho = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_11,
+																	x_irho, x_isigma);
+						const fp Jacobian_coeff_rho_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_12,
+																	  x_irho, x_isigma);
+						const fp Jacobian_coeff_sigma_sigma = xp.gridfn(gfns::gfn__partial_Theta_wrt_partial_dd_h_22,
+																		x_irho, x_isigma);
+
+						// partial_rho, partial_rho_rho
+						{
+							for (int m_irho = xp.molecule_min_m();
+								 m_irho <= xp.molecule_max_m();
+								 ++m_irho)
+							{
+								const int xm_irho = x_irho + m_irho;
+								const fp Jac_rho = Jacobian_coeff_rho * xp.partial_rho_coeff(m_irho);
+								const fp Jac_rho_rho = Jacobian_coeff_rho_rho * xp.partial_rho_rho_coeff(m_irho);
+								const fp Jac_sum = Jac_rho + Jac_rho_rho;
+								if (xp.is_in_nominal_grid(xm_irho, x_isigma))
+									then
+									{
+										const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, xm_irho, x_isigma);
+										Jac.sum_into_element(II, xm_JJ, Jac_sum);
+									}
+								else
+									add_ghost_zone_Jacobian(ps, Jac,
+															Jac_sum,
+															xp, xp.minmax_rho_ghost_zone(m_irho < 0),
+															II, xm_irho, x_isigma);
+							}
+						}
+
+						// partial_sigma, partial_sigma_sigma
+						{
+							for (int m_isigma = xp.molecule_min_m();
+								 m_isigma <= xp.molecule_max_m();
+								 ++m_isigma)
+							{
+								const int xm_isigma = x_isigma + m_isigma;
+								const fp Jac_sigma = Jacobian_coeff_sigma * xp.partial_sigma_coeff(m_isigma);
+								const fp Jac_sigma_sigma = Jacobian_coeff_sigma_sigma * xp.partial_sigma_sigma_coeff(m_isigma);
+								const fp Jac_sum = Jac_sigma + Jac_sigma_sigma;
+								if (xp.is_in_nominal_grid(x_irho, xm_isigma))
+									then
+									{
+										const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, x_irho, xm_isigma);
+										Jac.sum_into_element(II, xm_JJ, Jac_sum);
+									}
+								else
+									add_ghost_zone_Jacobian(ps, Jac,
+															Jac_sum,
+															xp, xp.minmax_sigma_ghost_zone(m_isigma < 0),
+															II, x_irho, xm_isigma);
+							}
+						}
+
+						// partial_rho_sigma
+						{
+							for (int m_irho = xp.molecule_min_m();
+								 m_irho <= xp.molecule_max_m();
+								 ++m_irho)
+							{
+								for (int m_isigma = xp.molecule_min_m();
+									 m_isigma <= xp.molecule_max_m();
+									 ++m_isigma)
+								{
+									const int xm_irho = x_irho + m_irho;
+									const int xm_isigma = x_isigma + m_isigma;
+									const fp Jac_rho_sigma = Jacobian_coeff_rho_sigma * xp.partial_rho_sigma_coeff(m_irho, m_isigma);
+									if (xp.is_in_nominal_grid(xm_irho, xm_isigma))
+										then
+										{
+											const int xm_JJ = Jac.II_of_patch_irho_isigma(xp, xm_irho, xm_isigma);
+											Jac.sum_into_element(II, xm_JJ, Jac_rho_sigma);
+										}
+									else
+									{
+										const ghost_zone &xmgz = xp.corner_ghost_zone_containing_point(m_irho < 0, m_isigma < 0,
+																									   xm_irho, xm_isigma);
+										add_ghost_zone_Jacobian(ps, Jac,
+																Jac_rho_sigma,
+																xp, xmgz,
+																II, xm_irho, xm_isigma);
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	//******************************************************************************
+
+	//
+	// This function adds the ghost-zone Jacobian dependency contributions
+	// for a single ghost-zone point, to a Jacobian matrix.
+	//
+	// Arguments:
+	// ps = The patch system.
+	// Jac = (out) The Jacobian matrix.
+	// mol = The molecule coefficient.
+	// xp = The patch containing the center point of the molecule.
+	// xmgz = If the x+m point is in a ghost zone, this must be that ghost zone.
+	//	  If the x+m point is not in a ghost zone, this argument is ignored.
+	// x_II = The Jacobian row of the x point.
+	// xm_(irho,isigma) = The coordinates (in xp) of the x+m point of the molecule.
+
+	namespace
+	{
+		void add_ghost_zone_Jacobian(const patch_system &ps,
+									 Jacobian &Jac,
+									 fp mol,
+									 const patch &xp, const ghost_zone &xmgz,
+									 int x_II,
+									 int xm_irho, int xm_isigma)
+		{
+			const patch_edge &xme = xmgz.my_edge();
+			const int xm_iperp = xme.iperp_of_irho_isigma(xm_irho, xm_isigma);
+			const int xm_ipar = xme.ipar_of_irho_isigma(xm_irho, xm_isigma);
+
+			// FIXME: this won't change from one call to another
+			//        ==> it would be more efficient to reuse the same buffer
+			//            across multiple calls on this function
+			int global_min_ym, global_max_ym;
+			ps.synchronize_Jacobian_global_minmax_ym(global_min_ym, global_max_ym);
+			jtutil::array1d<fp> Jacobian_buffer(global_min_ym, global_max_ym);
+
+			// on what other points y does this molecule point xm depend
+			// via the patch_system::synchronize() operation?
+			int y_iperp;
+			int y_posn, min_ym, max_ym;
+			const patch_edge &ye = ps.synchronize_Jacobian(xmgz,
+														   xm_iperp, xm_ipar,
+														   y_iperp,
+														   y_posn, min_ym, max_ym,
+														   Jacobian_buffer);
+			patch &yp = ye.my_patch();
+
+			// add the Jacobian contributions from the ym points
+			for (int ym = min_ym; ym <= max_ym; ++ym)
+			{
+				const int y_ipar = y_posn + ym;
+				const int y_irho = ye.irho_of_iperp_ipar(y_iperp, y_ipar);
+				const int y_isigma = ye.isigma_of_iperp_ipar(y_iperp, y_ipar);
+				const int y_JJ = Jac.II_of_patch_irho_isigma(yp, y_irho, y_isigma);
+				Jac.sum_into_element(x_II, y_JJ, mol * Jacobian_buffer(ym));
+			}
+		}
+	}
+
+	//******************************************************************************
+
+	//
+	// If ps_ptr != NULL and Jac_ptr != NULL, this function sums the d/dr
+	// terms into the Jacobian matrix of the expansion Theta(h), computing
+	// those terms by finite differencing.
+	//
+	// If ps_ptr == NULL and Jac_ptr == NULL, this function does a dummy
+	// computation, in which only any expansion() (and hence geometry
+	// interpolator) calls are done, these with the number of interpolation
+	// points set to 0 and all the output array pointers set to NULL.
+	//
+	// It's illegal for one but not both of ps_ptr and Jac_ptr to be NULL.
+	//
+	// The basic algorithm is that
+	//	Jac += diag[ (Theta(h+epsilon) - Theta(h)) / epsilon ]
+	//
+	// Inputs (angular gridfns, on ghosted grid):
+	//	h			# shape of trial surface
+	//	Theta			# Theta(h) assumed to already be computed
+	//
+	// Outputs:
+	//	Jac += d/dr terms
+	//
+	// Results:
+	// This function returns a status code indicating whether the computation
+	// succeeded or failed, and if the latter, what caused the failure.
+	//
+	namespace
+	{
+		enum expansion_status
+		expansion_Jacobian_dr_FD(patch_system *ps_ptr, Jacobian *Jac_ptr, fp add_to_expansion,
+								 bool initial_flag,
+								 bool print_msg_flag)
+		{
+			const bool active_flag = (ps_ptr != NULL) && (Jac_ptr != NULL);
+
+			const double epsilon = 1e-6;
+			// compute Theta(h+epsilon)
+			if (active_flag)
+				then
+				{
+					ps_ptr->gridfn_copy(gfns::gfn__Theta, gfns::gfn__save_Theta);
+					ps_ptr->add_to_ghosted_gridfn(epsilon, gfns::gfn__h);
+				}
+			const enum expansion_status status = expansion(ps_ptr, add_to_expansion,
+														   initial_flag);
+			if (status != expansion_success)
+				then return status; // *** ERROR RETURN ***
+
+			if (active_flag)
+				then
+				{
+					for (int pn = 0; pn < ps_ptr->N_patches(); ++pn)
+					{
+						patch &p = ps_ptr->ith_patch(pn);
+						for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+						{
+							for (int isigma = p.min_isigma();
+								 isigma <= p.max_isigma();
+								 ++isigma)
+							{
+								const int II = ps_ptr->gpn_of_patch_irho_isigma(p, irho, isigma);
+								const fp old_Theta = p.gridfn(gfns::gfn__save_Theta,
+															  irho, isigma);
+								const fp new_Theta = p.gridfn(gfns::gfn__Theta,
+															  irho, isigma);
+								const fp d_dr_term = (new_Theta - old_Theta) / epsilon;
+								Jac_ptr->sum_into_element(II, II, d_dr_term);
+							}
+						}
+					}
+
+					// restore h and Theta
+					ps_ptr->add_to_ghosted_gridfn(-epsilon, gfns::gfn__h);
+					ps_ptr->gridfn_copy(gfns::gfn__save_Theta, gfns::gfn__Theta);
+				}
+
+			return expansion_success; // *** NORMAL RETURN ***
+		}
+	}
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/fd_grid.C
+++ b/AMSS_NCKU_source/AHF_Direct/fd_grid.C
@@ -0,0 +1,79 @@
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+
+namespace AHFinderDirect
+{
+  using jtutil::error_exit;
+
+  //*****************************************************************************
+
+  //
+  // This function computes a single coefficient of a 1st derivative
+  // molecule, for unit grid spacing.
+  //
+  // static
+  fp fd_grid::dx_coeff(int m)
+  {
+    switch (m)
+    {
+    case -2:
+      return FD_GRID__ORDER4__DX__COEFF_M2;
+    case -1:
+      return FD_GRID__ORDER4__DX__COEFF_M1;
+    case 0:
+      return FD_GRID__ORDER4__DX__COEFF_0;
+    case +1:
+      return FD_GRID__ORDER4__DX__COEFF_P1;
+    case +2:
+      return FD_GRID__ORDER4__DX__COEFF_P2;
+
+    default:
+      cout << "***** fd_grid::dx_coeff(): m=" << m << " is outside order=4 molecule radius=" << FD_GRID__MOL_RADIUS << endl;
+      abort();
+    }
+  }
+
+  //*****************************************************************************
+
+  //
+  // This function computes a single coefficient of a 2nd derivative
+  // molecule, for unit grid spacing.
+  //
+  // static
+  fp fd_grid::dxx_coeff(int m)
+  {
+    switch (m)
+    {
+    case -2:
+      return FD_GRID__ORDER4__DXX__COEFF_M2;
+    case -1:
+      return FD_GRID__ORDER4__DXX__COEFF_M1;
+    case 0:
+      return FD_GRID__ORDER4__DXX__COEFF_0;
+    case +1:
+      return FD_GRID__ORDER4__DXX__COEFF_P1;
+    case +2:
+      return FD_GRID__ORDER4__DXX__COEFF_P2;
+
+    default:
+      cout << "***** fd_grid::dx_coeff(): m=" << m << " is outside order=4 molecule radius=" << FD_GRID__MOL_RADIUS << endl;
+      abort();
+    }
+  }
+
+  //******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/fd_grid.h
+++ b/AMSS_NCKU_source/AHF_Direct/fd_grid.h
@@ -0,0 +1,459 @@
+#ifndef FD_GRID_H
+#define FD_GRID_H
+namespace AHFinderDirect
+{
+
+	//******************************************************************************
+
+	//
+	// *** Implementation Notes -- Overview ***
+	//
+
+	//
+	// The key design problem for our finite differencing is how to
+	// implement an entire family of 5(9) finite difference operations in
+	// 2D(3D)
+	//
+	//	partial_rho		partial_sigma
+	//	partial_{rho,rho}	partial_{rho,sigma}
+	//				partial_{sigma,sigma}
+	//
+	//	partial_x		partial_y		partial_z
+	//	partial_xx		partial_xy		partial_xz
+	//				partial_yy		partial_yz
+	//							partial_zz
+	//
+	// without having to write out the finite differencing molecules multiple
+	// times, and while still preserving maximum inline-function efficiency.
+	// In particular, mixed 2nd-order derivative operations like partial_xy
+	// should be automatically composed from the two individual 1st derivative
+	// operations (partial_x and partial_y).
+	//
+
+	//
+	// Our basic approach is to define each finite difference molecule in
+	// a generic 1-dimensional form using an abstract "data(m)" interface.
+	// Here we use the terminology that a finite difference molecule is
+	// defined as
+	//	out[k] = sum(m) c[m] * in[k+m]
+	// where c[] is the vector/matrix of molecule coefficients, and m is
+	// the (integer) relative grid coordinate within a molecule.
+	//
+	// That is, for example, we define the usual 2nd order centered 1st
+	// derivative operator as
+	//	diff = 0.5*inv_delta_x*(data(+1) - data(-1))
+	// leaving unspecified just what the data source is.  We then use this
+	// with an appropriate data source (indexing along that gridfn array axis)
+	// for each directional derivative operation, and we compose two of
+	// these, using the first along x as the data source for the second
+	// along y, for the mixed 2nd-order derivative operation.
+	//
+
+	//******************************************************************************
+
+	//
+	// *** Implementation Notes -- Techniques using C++ Templates ***
+	//
+
+	//
+	// There are two plausible ways to use C++ templates
+	//	[C++ templates are described in detail in chapter 13 of
+	//	Stroustrup's "The C++ Programming Language" (3rd Edition),
+	//	hereinafter "C++PL", and chapter 15 of Stroustrup's
+	//	"The Design and Evolution of C++", hereinafter "D&EC++".]
+	// to write the sort of generic-at-compile-time code we want:
+	// - Template specializations for each axis, as discussed in D&EC++
+	//   section 15.10.3.
+	// - Overloaded functions for each axis, with an argument type
+	//   (possibly that of an extra unused argument) selecting the
+	//   appropriate axis and hence the appropriate function.  This
+	//   technique is discussed in D&EC++ section 15.6.3.1.
+	//
+	// Quoting from D&EC++ (section 15.6.3.1),
+	//
+	//	The fundamental observation is that every property
+	//	of a type or an algorithm can be represented by a
+	//	type (possibly defined specificaly to do exactly
+	//	that).  That done, such a type can be used to guide
+	//	the overload resolution to select a function that
+	//	depends on the desired property.  [...]
+	//
+	//	Please note that thanks to inlining this resolution
+	//	is done at compile-time, so the appropriate [...]
+	//	function will be called directly without any run-time
+	//	overhead.
+	//
+	// Quoting from C++PL3 (section 13.4),
+	//
+	//	Passing [...] operations as a template parameter has two
+	//	significant benefits compared to alternatives such as
+	//	passing pointers to functions.  Several operations can
+	//	be passed as a single argument with no run-time cost.
+	//	In addition, the [...] operators [passed this way] are
+	//	trivial to inline, whereas inlininkg a call through a
+	//	pointer to function requires exceptional attention from
+	//	a compiler.
+	//
+
+	//
+	// In my opinion the template-specialization design is cleaner, and it
+	// clearly has no run-time cost (whereas the overloaded-function design
+	// may have a run-time cost for constructing and passing unused objects),
+	// so we use it here.
+	//
+	// There are, however, two (non-fatal) problema with this approach:
+	// - Unfortunately, it appears C++ (or at least gcc 2.95.1) forbids
+	//   template specialization within a class, so some of the functions
+	//   which whould logically be class members, must instead be defined
+	//   outside any class.  We use the namespace  fd_stuff::  to hide
+	//   these from the outside world.
+	// - C++PL3, section C.13.3, states that
+	//	Only class templates can be template arguments.
+	//   so we have to use dummy classes around some of our template
+	//   functions.  To avoid extra constructor/destructor overhead, we
+	//   make these template functions static.
+	//
+
+	//******************************************************************************
+
+	//
+	// *** Implementation Notes -- Techniques using the C/C++ Preprocessor ***
+	//
+
+	//
+	// The fundamental problem with the template approaches is portability:
+	// Although the C++ standard describes powerful template facilities, not
+	// all C++ compilers yet fully support these.  As an alternative, we can
+	// use the C/C++ preprocessor.  This is ugly and dangerous (global names!),
+	// but is probably simpler than any of the template approaches.  It can
+	// provide the same finite differencing functionality and efficiency as
+	// the template-based approaches.
+	//
+	// Because of its greater portability, we use the preprocessor-based
+	// approach here.
+	//
+
+	//******************************************************************************
+
+	//
+	// *** Implementation Notes -- Run-Time Choice of Molecules ***
+	//
+	// *If* we want to allow the finite differencing scheme to be changed
+	// at run-time (e.g. from a parameter file), there are three plausible
+	// ways to do this:
+	// - Using  switch(molecule_type) , as is standard in C.  This is
+	//   simple, and for this particular application quite well-structured
+	//   and maintainable (there are only a few different molecule types,
+	//   all centralized in this file).
+	// - Using virtual functions, with  molecule  a virtual base class
+	//   and individual molecules derived from it.  This is elegant, but
+	//   may have some performance problems (below).  It also requires some
+	//   sort of switch-based "object factory" to interface with with the
+	//   molecule-choice parameters.
+	// - Write all the finite differencing code multiple times, once for
+	//   each finite differencing scheme.
+	//
+	// The typical use of these functions will be from within a loop over
+	// a whole grid.  In both cases we can expect excellent accuracy from
+	// modern hardware branch prediction (and thus minimal performance loss
+	// from the branching).  It's reasonable to expect a compiler to fully
+	// inline the switch-based code, exposing all the gridfn array subscriptings
+	// to strength reduction etc, but this is much trickier for the
+	// virtual-function--based code.  For this reason, the switch-based
+	// design seems superior to the virtual-function--based one.
+	//
+	// However, at present we don't implement any run-time selection: we
+	// "just" fix the finite differencing scheme at compile time via the
+	// preprocessor.
+	//
+
+	//******************************************************************************
+
+	//
+	// *** finite difference molecules ***
+	//
+
+	//**************************************
+
+	//
+	// define the actual molecules
+	//
+	// In the following macros, we first define all the distinct floating-
+	// -point numbers appearing in a molecules as "K" constants (all > 0),
+	// then define the actual derivative and its molecule coefficients
+	// using +/- the "K" constants, with multiplies by 1.0 elided and 0
+	// terms skipped in computing the derivative.  This (hopefully) gives
+	// maximum efficiency by avoiding the generated code loading the same
+	// constants multiple times.
+	//
+
+	//
+	// The molecule macros all take the following arguments:
+	// inv_delta_x_ = inverse of grid spacing in the finite differencing
+	//		  direction
+	// data_= a data-fetching function or macro: data_(ghosted_gfn, irho, isigma)
+	//	  is the data to be finite differenced
+	// irho_plus_m_ = a function or macro: irho_plus_m_(irho,m) returns the
+	//		  rho coordinate to be passed to data_() for the [m]
+	//		  molecule coefficient
+	// isigma_plus_m_ = same thing, for the sigma coordinate
+	//
+	// n.b. We grab the variables ghosted_gfn, irho, and isigma from the calling
+	//      environment, and we define assorted local variables as needed!
+	//
+
+	//**************************************
+
+	//
+	// 2nd order
+	//
+
+#define FD_GRID__ORDER2__MOL_RADIUS 1
+#define FD_GRID__ORDER2__MOL_DIAMETER 3
+
+#define FD_GRID__ORDER2__DX__KPM1 0.5
+#define FD_GRID__ORDER2__DX(inv_delta_x_, data_,                    \
+							irho_plus_m_, isigma_plus_m_)           \
+	const fp data_p1 = data_(ghosted_gfn,                           \
+							 irho_plus_m_(irho, +1),                \
+							 isigma_plus_m_(isigma, +1));           \
+	const fp data_m1 = data_(ghosted_gfn,                           \
+							 irho_plus_m_(irho, -1),                \
+							 isigma_plus_m_(isigma, -1));           \
+	const fp sum = FD_GRID__ORDER2__DX__KPM1 * (data_p1 - data_m1); \
+	return inv_delta_x_ * sum; /* end macro */
+#define FD_GRID__ORDER2__DX__COEFF_M1 (-FD_GRID__ORDER2__DX__KPM1)
+#define FD_GRID__ORDER2__DX__COEFF_0 0.0
+#define FD_GRID__ORDER2__DX__COEFF_P1 (+FD_GRID__ORDER2__DX__KPM1)
+
+#define FD_GRID__ORDER2__DXX__K0 2.0
+#define FD_GRID__ORDER2__DXX(inv_delta_x_, data_,                         \
+							 irho_plus_m_, isigma_plus_m_)                \
+	const fp data_p1 = data_(ghosted_gfn,                                 \
+							 irho_plus_m_(irho, +1),                      \
+							 isigma_plus_m_(isigma, +1));                 \
+	const fp data_0 = data_(ghosted_gfn,                                  \
+							irho_plus_m_(irho, 0),                        \
+							isigma_plus_m_(isigma, 0));                   \
+	const fp data_m1 = data_(ghosted_gfn,                                 \
+							 irho_plus_m_(irho, -1),                      \
+							 isigma_plus_m_(isigma, -1));                 \
+	const fp sum = data_m1 - FD_GRID__ORDER2__DXX__K0 * data_0 + data_p1; \
+	return jtutil::pow2(inv_delta_x_) * sum; /* end macro */
+#define FD_GRID__ORDER2__DXX__COEFF_M1 1.0
+#define FD_GRID__ORDER2__DXX__COEFF_0 (-FD_GRID__ORDER2__DXX__K0)
+#define FD_GRID__ORDER2__DXX__COEFF_P1 1.0
+
+	//**************************************
+
+	//
+	// 4th order
+	//
+
+#define FD_GRID__ORDER4__MOL_RADIUS 2
+#define FD_GRID__ORDER4__MOL_DIAMETER 5
+
+#define FD_GRID__ORDER4__DX__KPM2 (1.0 / 12.0)
+#define FD_GRID__ORDER4__DX__KPM1 (8.0 / 12.0)
+#define FD_GRID__ORDER4__DX(inv_delta_x_, data_,                                                                      \
+							irho_plus_m_, isigma_plus_m_)                                                             \
+	const fp data_p2 = data_(ghosted_gfn,                                                                             \
+							 irho_plus_m_(irho, +2),                                                                  \
+							 isigma_plus_m_(isigma, +2));                                                             \
+	const fp data_p1 = data_(ghosted_gfn,                                                                             \
+							 irho_plus_m_(irho, +1),                                                                  \
+							 isigma_plus_m_(isigma, +1));                                                             \
+	const fp data_m1 = data_(ghosted_gfn,                                                                             \
+							 irho_plus_m_(irho, -1),                                                                  \
+							 isigma_plus_m_(isigma, -1));                                                             \
+	const fp data_m2 = data_(ghosted_gfn,                                                                             \
+							 irho_plus_m_(irho, -2),                                                                  \
+							 isigma_plus_m_(isigma, -2));                                                             \
+	const fp sum = FD_GRID__ORDER4__DX__KPM1 * (data_p1 - data_m1) + FD_GRID__ORDER4__DX__KPM2 * (data_m2 - data_p2); \
+	/*  printf("(%2d %2d) %f %f %f %f\n",irho, isigma,data_m2, data_m1,data_p1, data_p2);*/                           \
+	return inv_delta_x_ * sum; /* end macro */
+#define FD_GRID__ORDER4__DX__COEFF_M2 (+FD_GRID__ORDER4__DX__KPM2)
+#define FD_GRID__ORDER4__DX__COEFF_M1 (-FD_GRID__ORDER4__DX__KPM1)
+#define FD_GRID__ORDER4__DX__COEFF_0 0.0
+#define FD_GRID__ORDER4__DX__COEFF_P1 (+FD_GRID__ORDER4__DX__KPM1)
+#define FD_GRID__ORDER4__DX__COEFF_P2 (-FD_GRID__ORDER4__DX__KPM2)
+
+	//**************************************
+
+#define FD_GRID__ORDER4__DXX__KPM2 (1.0 / 12.0)
+#define FD_GRID__ORDER4__DXX__KPM1 (16.0 / 12.0)
+#define FD_GRID__ORDER4__DXX__K0 (30.0 / 12.0)
+#define FD_GRID__ORDER4__DXX(inv_delta_x_, data_,                                                                                                            \
+							 irho_plus_m_, isigma_plus_m_)                                                                                                   \
+	const fp data_p2 = data_(ghosted_gfn,                                                                                                                    \
+							 irho_plus_m_(irho, +2),                                                                                                         \
+							 isigma_plus_m_(isigma, +2));                                                                                                    \
+	const fp data_p1 = data_(ghosted_gfn,                                                                                                                    \
+							 irho_plus_m_(irho, +1),                                                                                                         \
+							 isigma_plus_m_(isigma, +1));                                                                                                    \
+	const fp data_0 = data_(ghosted_gfn,                                                                                                                     \
+							irho_plus_m_(irho, 0),                                                                                                           \
+							isigma_plus_m_(isigma, 0));                                                                                                      \
+	const fp data_m1 = data_(ghosted_gfn,                                                                                                                    \
+							 irho_plus_m_(irho, -1),                                                                                                         \
+							 isigma_plus_m_(isigma, -1));                                                                                                    \
+	const fp data_m2 = data_(ghosted_gfn,                                                                                                                    \
+							 irho_plus_m_(irho, -2),                                                                                                         \
+							 isigma_plus_m_(isigma, -2));                                                                                                    \
+	const fp sum = -FD_GRID__ORDER4__DXX__K0 * data_0 + FD_GRID__ORDER4__DXX__KPM1 * (data_m1 + data_p1) - FD_GRID__ORDER4__DXX__KPM2 * (data_m2 + data_p2); \
+	return jtutil::pow2(inv_delta_x_) * sum; /* end macro */
+#define FD_GRID__ORDER4__DXX__COEFF_M2 (-FD_GRID__ORDER4__DXX__KPM2)
+#define FD_GRID__ORDER4__DXX__COEFF_M1 (+FD_GRID__ORDER4__DXX__KPM1)
+#define FD_GRID__ORDER4__DXX__COEFF_0 (-FD_GRID__ORDER4__DXX__K0)
+#define FD_GRID__ORDER4__DXX__COEFF_P1 (+FD_GRID__ORDER4__DXX__KPM1)
+#define FD_GRID__ORDER4__DXX__COEFF_P2 (-FD_GRID__ORDER4__DXX__KPM2)
+
+	//******************************************************************************
+#define FD_GRID__MOL_RADIUS FD_GRID__ORDER4__MOL_RADIUS
+#define FD_GRID__MOL_DIAMETER FD_GRID__ORDER4__MOL_DIAMETER
+#define FD_GRID__DX FD_GRID__ORDER4__DX
+#define FD_GRID__DXX FD_GRID__ORDER4__DXX
+
+#define FD_GRID__MOL_AREA (FD_GRID__MOL_DIAMETER * FD_GRID__MOL_DIAMETER)
+
+	//******************************************************************************
+
+	//
+	// ***** fd_grid - grid with finite differencing operations *****
+	//
+	// An  fd_grid  is identical to a  grid  except that it also defines
+	// (rho,sigma)-coordinate finite differencing operations on gridfns.
+	//
+
+	class fd_grid
+		: public grid
+	{
+		//
+		// molecule sizes
+		//
+	public:
+		// n.b. this interface implicitly assumes that all molecules
+		//      are centered and are the same order and size
+		static int finite_diff_order() { return 4; }
+		static int molecule_radius() { return FD_GRID__MOL_RADIUS; }
+		static int molecule_diameter() { return FD_GRID__MOL_DIAMETER; }
+		static int molecule_min_m() { return -FD_GRID__MOL_RADIUS; }
+		static int molecule_max_m() { return FD_GRID__MOL_RADIUS; }
+
+		//
+		// helper functions to compute (irho,isigma) + [m]
+		// along each axis
+		//
+	private:
+		static int rho_axis__irho_plus_m(int irho, int m) { return irho + m; }
+		static int rho_axis__isigma_plus_m(int isigma, int m) { return isigma; }
+		static int sigma_axis__irho_plus_m(int irho, int m) { return irho; }
+		static int sigma_axis__isigma_plus_m(int isigma, int m) { return isigma + m; }
+
+		//
+		// ***** finite differencing *****
+		//
+	public:
+		// 1st derivatives
+		fp partial_rho(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DX(inverse_delta_rho(),
+						ghosted_gridfn,
+						rho_axis__irho_plus_m,
+						rho_axis__isigma_plus_m);
+		}
+		fp partial_sigma(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DX(inverse_delta_sigma(),
+						ghosted_gridfn,
+						sigma_axis__irho_plus_m,
+						sigma_axis__isigma_plus_m);
+		}
+
+		// "pure" 2nd derivatives
+		fp partial_rho_rho(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DXX(inverse_delta_rho(),
+						 ghosted_gridfn,
+						 rho_axis__irho_plus_m,
+						 rho_axis__isigma_plus_m);
+		}
+		fp partial_sigma_sigma(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DXX(inverse_delta_sigma(),
+						 ghosted_gridfn,
+						 sigma_axis__irho_plus_m,
+						 sigma_axis__isigma_plus_m);
+		}
+
+		// mixed 2nd partial derivative
+		fp partial_rho_sigma(int ghosted_gfn, int irho, int isigma)
+			const
+		{
+			FD_GRID__DX(inverse_delta_rho(),
+						partial_sigma,
+						rho_axis__irho_plus_m,
+						rho_axis__isigma_plus_m);
+		}
+
+		//
+		// ***** molecule coefficients *****
+		//
+	public:
+		// molecule coefficients
+		// n.b. this interface implicitly assumes that all molecules
+		//      are position-independent
+		fp partial_rho_coeff(int m) const
+		{
+			return inverse_delta_rho() * dx_coeff(m);
+		}
+		fp partial_sigma_coeff(int m) const
+		{
+			return inverse_delta_sigma() * dx_coeff(m);
+		}
+		fp partial_rho_rho_coeff(int m) const
+		{
+			return jtutil::pow2(inverse_delta_rho()) * dxx_coeff(m);
+		}
+		fp partial_sigma_sigma_coeff(int m) const
+		{
+			return jtutil::pow2(inverse_delta_sigma()) * dxx_coeff(m);
+		}
+		fp partial_rho_sigma_coeff(int m_rho, int m_sigma) const
+		{
+			return partial_rho_coeff(m_rho) * partial_sigma_coeff(m_sigma);
+		}
+
+		// worker functions: molecule coefficients for unit grid spacing
+	private:
+		static fp dx_coeff(int m);
+		static fp dxx_coeff(int m);
+
+		//
+		// ***** constructor, destructor *****
+		//
+	public:
+		// constructor: pass through to grid:: constructor
+		fd_grid(const grid_array_pars &grid_array_pars_in,
+				const grid_pars &grid_pars_in)
+			: grid(grid_array_pars_in, grid_pars_in)
+		{
+		}
+		// compiler-generated default destructor is ok
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		fd_grid(const fd_grid &rhs);
+		fd_grid &operator=(const fd_grid &rhs);
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* FD_GRID_H  */
--- a/AMSS_NCKU_source/AHF_Direct/find_horizons.C
+++ b/AMSS_NCKU_source/AHF_Direct/find_horizons.C
@@ -0,0 +1,137 @@
+
+
+#include "macrodef.h"
+#ifdef With_AHF
+
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <mpi.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "myglobal.h"
+
+namespace AHFinderDirect
+{
+	void recentering(patch_system &ps, double max_x, double max_y, double max_z,
+					 double min_x, double min_y, double min_z,
+					 double centroid_x, double centroid_y, double centroid_z);
+	extern struct state state;
+
+	void AHFinderDirect_find_horizons(int HN, int *dumpid,
+									  double *xc, double *yc, double *zc, double *xr, double *yr, double *zr,
+									  bool *trigger, double *dT)
+	{
+		const int my_proc = state.my_proc;
+		horizon_sequence &hs = *state.my_hs;
+		if (my_proc == 0 && hs.N_horizons() != HN)
+		{
+			cout << "input number " << HN << " != " << "number of wanted horizons " << hs.N_horizons() << endl;
+			MPI_Abort(MPI_COMM_WORLD, 1);
+		}
+
+		state.ADM->AH_Prepare_derivatives();
+
+		for (int hn = hs.init_hn(); hs.is_genuine(); hn = hs.next_hn())
+		{
+			int ihn = hs.get_hn();
+			assert(ihn > 0 && ihn <= HN);
+			ihn = ihn - 1;
+
+			struct AH_data &AH_data = *state.AH_data_array[hn];
+
+			AH_data.find_trigger = trigger[ihn];
+			if (AH_data.find_trigger)
+			{
+				if (AH_data.found_flag)
+					AH_data.initial_find_flag = false;
+				else if (AH_data.recentering_flag == false)
+				{
+					patch_system &ps = *AH_data.ps_ptr;
+					recentering(ps, xc[ihn] + xr[ihn] / 2, yc[ihn] + yr[ihn] / 2, zc[ihn] + zr[ihn] / 2,
+								xc[ihn] - xr[ihn] / 2, yc[ihn] - yr[ihn] / 2, zc[ihn] - zr[ihn] / 2,
+								xc[ihn], yc[ihn], zc[ihn]);
+					setup_initial_guess(ps, xc[ihn], yc[ihn], zc[ihn], xr[ihn], yr[ihn], zr[ihn]);
+					AH_data.initial_find_flag = true;
+				}
+				else
+					AH_data.stop_finding == true;
+			}
+
+		} // end for hn
+
+		Newton(state.N_procs, state.N_active_procs, my_proc,
+			   *state.my_hs, state.AH_data_array,
+			   state.isb, dumpid, dT);
+	}
+
+	void AHFinderDirect_enforcefind(int HN,
+									double *xc, double *yc, double *zc, double *xr, double *yr, double *zr)
+	{
+		const int my_proc = state.my_proc;
+		horizon_sequence &hs = *state.my_hs;
+		if (my_proc == 0 && hs.N_horizons() != HN)
+		{
+			cout << "input number " << HN << " != " << "number of wanted horizons " << hs.N_horizons() << endl;
+			MPI_Abort(MPI_COMM_WORLD, 1);
+		}
+		bool *trigger;
+		int *dumpid;
+		double *dTT;
+		trigger = new bool[HN];
+		dumpid = new int[HN];
+		dTT = new double[HN];
+		for (int ihn = 0; ihn < HN; ihn++)
+		{
+			trigger[ihn] = true;
+			dumpid[ihn] = 1;
+			dTT[ihn] = 1;
+		}
+
+		for (int hn = hs.init_hn(); hs.is_genuine(); hn = hs.next_hn())
+		{
+			int ihn = hs.get_hn();
+			assert(ihn > 0 && ihn <= HN);
+
+			struct AH_data &AH_data = *state.AH_data_array[hn];
+
+			AH_data.find_trigger = true;
+			AH_data.stop_finding = false;
+			AH_data.found_flag = false;
+			AH_data.recentering_flag = false;
+			AH_data.initial_find_flag = true;
+
+		} // end for hn
+
+		AHFinderDirect_find_horizons(HN, dumpid, xc, yc, zc, xr, yr, zr, trigger, dTT);
+
+		delete[] trigger;
+		delete[] dumpid;
+		delete[] dTT;
+	}
+} // namespace AHFinderDirect
+#endif
--- a/AMSS_NCKU_source/AHF_Direct/fuzzy.C
+++ b/AMSS_NCKU_source/AHF_Direct/fuzzy.C
@@ -0,0 +1,63 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "stdc.h"
+#include "util.h"
+
+namespace AHFinderDirect
+{
+  namespace jtutil
+  {
+    template <typename fp_t>
+    bool fuzzy<fp_t>::EQ(fp_t x, fp_t y)
+    {
+      fp_t max_abs = jtutil::tmax(jtutil::abs(x), jtutil::abs(y));
+      fp_t epsilon = jtutil::tmax(tolerance_, tolerance_ * max_abs);
+
+      return jtutil::abs(x - y) <= epsilon;
+    }
+
+    //******************************************************************************
+
+    template <typename fp_t>
+    bool fuzzy<fp_t>::is_integer(fp_t x)
+    {
+      int i = round<fp_t>::to_integer(x);
+      return EQ(x, fp_t(i));
+    }
+
+    //******************************************************************************
+
+    template <typename fp_t>
+    int fuzzy<fp_t>::floor(fp_t x)
+    {
+      return fuzzy<fp_t>::is_integer(x)
+                 ? round<fp_t>::to_integer(x)
+                 : round<fp_t>::floor(x);
+    }
+
+    //******************************************************************************
+
+    template <typename fp_t>
+    int fuzzy<fp_t>::ceiling(fp_t x)
+    {
+      return fuzzy<fp_t>::is_integer(x)
+                 ? round<fp_t>::to_integer(x)
+                 : round<fp_t>::ceiling(x);
+    }
+    template <>
+    float fuzzy<float>::tolerance_ = 1.0e-5; // about 100 * FLT_EPSILON
+
+    template <>
+    double fuzzy<double>::tolerance_ = 1.0e-12; // about 1e4 * DBL_EPSILON
+
+    // template instantiations
+    template class fuzzy<float>;
+    template class fuzzy<double>;
+
+    //******************************************************************************
+    //******************************************************************************
+    //******************************************************************************
+
+  } // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/gfns.h
+++ b/AMSS_NCKU_source/AHF_Direct/gfns.h
@@ -0,0 +1,98 @@
+#ifndef GFNS_H
+#define GFNS_H
+namespace AHFinderDirect
+{
+
+	namespace gfns
+	{
+
+		// ghosted gridfns
+		enum
+		{
+			ghosted_min_gfn = -1, // must set this by hand so
+								  // ghosted_max_gfn is still < 0
+			gfn__h = ghosted_min_gfn,
+			ghosted_max_gfn = gfn__h
+		};
+
+		// nominal gridfns
+		enum
+		{
+			nominal_min_gfn = 1,
+
+			//
+			// for a skeletal patch system we don't need any nominal gridfns
+			//
+			skeletal_nominal_max_gfn = nominal_min_gfn - 1,
+
+			//
+			// most of these gridfns have access macros in "cg.hh";
+			// the ones that don't are marked explicitly
+			//
+			gfn__global_x = nominal_min_gfn, // no access macro
+			gfn__global_y,					 // no access macro
+			gfn__global_z,					 // no access macro
+
+			gfn__global_xx, // no access macro
+			gfn__global_xy, // no access macro
+			gfn__global_xz, // no access macro
+			gfn__global_yy, // no access macro
+			gfn__global_yz, // no access macro
+			gfn__global_zz, // no access macro
+
+			gfn__g_dd_11,
+			gfn__g_dd_12,
+			gfn__g_dd_13,
+			gfn__g_dd_22,
+			gfn__g_dd_23,
+			gfn__g_dd_33,
+			gfn__partial_d_g_dd_111,
+			gfn__partial_d_g_dd_112,
+			gfn__partial_d_g_dd_113,
+			gfn__partial_d_g_dd_122,
+			gfn__partial_d_g_dd_123,
+			gfn__partial_d_g_dd_133,
+			gfn__partial_d_g_dd_211,
+			gfn__partial_d_g_dd_212,
+			gfn__partial_d_g_dd_213,
+			gfn__partial_d_g_dd_222,
+			gfn__partial_d_g_dd_223,
+			gfn__partial_d_g_dd_233,
+			gfn__partial_d_g_dd_311,
+			gfn__partial_d_g_dd_312,
+			gfn__partial_d_g_dd_313,
+			gfn__partial_d_g_dd_322,
+			gfn__partial_d_g_dd_323,
+			gfn__partial_d_g_dd_333,
+			gfn__K_dd_11,
+			gfn__K_dd_12,
+			gfn__K_dd_13,
+			gfn__K_dd_22,
+			gfn__K_dd_23,
+			gfn__K_dd_33,
+			gfn__trK,
+
+			gfn__psi,			  // no access macro
+			gfn__partial_d_psi_1, // no access macro
+			gfn__partial_d_psi_2, // no access macro
+			gfn__partial_d_psi_3, // no access macro
+
+			gfn__Theta,
+			gfn__partial_Theta_wrt_partial_d_h_1,
+			gfn__partial_Theta_wrt_partial_d_h_2,
+			gfn__partial_Theta_wrt_partial_dd_h_11,
+			gfn__partial_Theta_wrt_partial_dd_h_12,
+			gfn__partial_Theta_wrt_partial_dd_h_22,
+			gfn__Delta_h,
+			gfn__save_Theta,
+			gfn__oldh, // used for dh/dt
+			gfn__one,
+			nominal_max_gfn = gfn__one // no comma
+		};
+
+	} // namespace gfns::
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* GFNS_H  */
--- a/AMSS_NCKU_source/AHF_Direct/ghost_zone.C
+++ b/AMSS_NCKU_source/AHF_Direct/ghost_zone.C
@@ -0,0 +1,604 @@
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <math.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+
+namespace AHFinderDirect
+{
+	using jtutil::error_exit;
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+	//
+	// These functions verify (assert()) that a ghost zone is indeed of
+	// the specified type, then static_cast to the appropriate derived class.
+	//
+
+	const symmetry_ghost_zone &ghost_zone::cast_to_symmetry_ghost_zone()
+		const
+	{
+		assert(is_symmetry());
+		return static_cast<const symmetry_ghost_zone &>(*this);
+	}
+
+	symmetry_ghost_zone &ghost_zone::cast_to_symmetry_ghost_zone()
+	{
+		assert(is_symmetry());
+		return static_cast<symmetry_ghost_zone &>(*this);
+	}
+
+	//**************************************
+
+	const interpatch_ghost_zone &ghost_zone::cast_to_interpatch_ghost_zone()
+		const
+	{
+		assert(is_interpatch());
+		return static_cast<const interpatch_ghost_zone &>(*this);
+	}
+
+	interpatch_ghost_zone &ghost_zone::cast_to_interpatch_ghost_zone()
+	{
+		assert(is_interpatch());
+		return static_cast<interpatch_ghost_zone &>(*this);
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+	//
+	// This function constructs a mirror-symmetry ghost zone object
+	//
+	symmetry_ghost_zone::symmetry_ghost_zone(const patch_edge &my_edge_in)
+		: ghost_zone(my_edge_in,
+					 my_edge_in, // other edge == my edge
+					 ghost_zone_is_symmetry)
+	{
+		// iperp_map: i --> (i of ghost zone) - i
+		iperp_map_ = new jtutil::cpm_map<fp>(min_iperp(), max_iperp(),
+											 my_edge_in.fp_grid_outer_iperp());
+
+		// ipar_map_: identity map
+		ipar_map_ = new jtutil::cpm_map<fp>(extreme_min_ipar(), extreme_max_ipar());
+	}
+
+	//******************************************************************************
+
+	//
+	// This function constructs a periodic-symmetry ghost zone object.
+	//
+	symmetry_ghost_zone::symmetry_ghost_zone(const patch_edge &my_edge_in, const patch_edge &other_edge_in,
+											 int my_edge_sample_ipar, int other_edge_sample_ipar,
+											 bool ipar_map_is_plus)
+		: ghost_zone(my_edge_in,
+					 other_edge_in,
+					 ghost_zone_is_symmetry)
+	{
+		//
+		// perpendicular map
+		//
+		const fp fp_my_period_plane_iperp = my_edge().fp_grid_outer_iperp();
+		const fp fp_other_period_plane_iperp = other_edge().fp_grid_outer_iperp();
+
+		// iperp mapping must be outside --> inside
+		// i.e. if both edges have iperp as the same min/max "direction",
+		//	then the mapping is  iperp increasing --> iperp decreasing
+		//      (i.e. the map's sign is -1)
+		const bool is_iperp_map_plus = !(my_edge().is_min() == other_edge().is_min());
+		iperp_map_ = new jtutil::cpm_map<fp>(min_iperp(), max_iperp(),
+											 fp_my_period_plane_iperp,
+											 fp_other_period_plane_iperp,
+											 is_iperp_map_plus);
+
+		//
+		// parallel map
+		//
+		ipar_map_ = new jtutil::cpm_map<fp>(extreme_min_ipar(), extreme_max_ipar(),
+											my_edge_sample_ipar, other_edge_sample_ipar,
+											ipar_map_is_plus);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function destroys a  symmetry_ghost_zone  object.
+	//
+	symmetry_ghost_zone::~symmetry_ghost_zone()
+	{
+		delete ipar_map_;
+		delete iperp_map_;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function "synchronizes" a ghost zone, i.e. it updates the
+	// ghost-zone values of the specified gridfns via the appropriate
+	// symmetry operations.The flags specify which part(s) of the ghost zone
+	// we want.
+	//
+	void symmetry_ghost_zone::synchronize(int ghosted_min_gfn, int ghosted_max_gfn,
+										  bool want_corners /* = true */,
+										  bool want_noncorner /* = true */)
+	{
+		// printf("*Sync sym ghost zone in %s patch\n", my_patch().name());
+
+		for (int gfn = ghosted_min_gfn; gfn <= ghosted_max_gfn; ++gfn)
+		{
+			for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+			{
+				for (int ipar = min_ipar(iperp); ipar <= max_ipar(iperp); ++ipar)
+				{
+					// do we want to do this point?
+					if (!my_edge().ipar_is_in_selected_part(want_corners, want_noncorner,
+															ipar))
+						then continue; // *** LOOP CONTROL ***
+
+					const int sym_iperp = iperp_map_of_iperp(iperp);
+					const int sym_ipar = ipar_map_of_ipar(ipar);
+					const int sym_irho = other_edge()
+											 .irho_of_iperp_ipar(sym_iperp, sym_ipar);
+					const int sym_isigma = other_edge()
+											   .isigma_of_iperp_ipar(sym_iperp, sym_ipar);
+					const fp sym_gridfn = other_patch()
+											  .ghosted_gridfn(gfn, sym_irho, sym_isigma);
+
+					const int irho = my_edge().irho_of_iperp_ipar(iperp, ipar);
+					const int isigma = my_edge().isigma_of_iperp_ipar(iperp, ipar);
+					my_patch().ghosted_gridfn(gfn, irho, isigma) = sym_gridfn;
+				}
+			}
+		}
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+	//
+	// This function constructs an  interpatch_ghost_zone  object.
+	//
+	interpatch_ghost_zone::interpatch_ghost_zone(const patch_edge &my_edge_in,
+												 const patch_edge &other_edge_in,
+												 int patch_overlap_width)
+		: ghost_zone(my_edge_in,
+					 other_edge_in,
+					 ghost_zone_is_interpatch),
+		  // remaining pointers are all set up properly by finish_setup()
+		  other_patch_interp_(NULL),
+		  other_iperp_(NULL),
+		  min_ipar_used_(NULL), max_ipar_used_(NULL),
+		  other_par_(NULL),
+		  interp_result_buffer_(NULL),
+		  Jacobian_y_ipar_posn_(NULL), Jacobian_buffer_(NULL) // no comma
+	{
+		//
+		// verify that we have the expected relationships between
+		// this and the other patch's (mu,nu,phi) coordinates:
+		//
+
+		// perp coordinate is common to us and the other patch, so
+		// ghost zone must be min in one patch, max in the other
+		if (my_edge().is_min() == other_edge().is_min())
+			then error_exit(ERROR_EXIT,
+							"***** interpatch_ghost_zone::interpatch_ghost_zone:\n"
+							"        my_patch().name()=\"%s\" my_edge().name()=%s\n"
+							"        other_patch().name()=\"%s\" other_edge().name()=%s\n"
+							"        ghost zone must be min in one patch, max in the other!\n",
+							my_patch().name(), my_edge().name(),
+							other_patch().name(), other_edge().name()); /*NOTREACHED*/
+
+		// coord in common between the two patches must be perp coord in both patches
+		// and this patch's tau coordinate must be other edge's parallel coordinate
+		const local_coords::coords_set common_coords_set = local_coords::coords_set_not(my_patch().coords_set_rho_sigma() ^
+																						other_patch().coords_set_rho_sigma());
+		if (!((common_coords_set == my_edge().coords_set_perp()) && (common_coords_set == other_edge().coords_set_perp()) && (my_patch().coords_set_tau() == other_edge().coords_set_par())))
+			then error_exit(PANIC_EXIT,
+							"***** interpatch_ghost_zone::interpatch_ghost_zone:\n"
+							"        (rho,sigma,tau) coordinates don't match up properly\n"
+							"        between this patch/edge and the other patch/edge!\n"
+							"        my_patch().name()=\"%s\" my_edge().name()=%s\n"
+							"        other_patch().name()=\"%s\" other_edge().name()=%s\n"
+							"        my_patch().coords_set_{rho,sigma,tau}={%s,%s,%s}\n"
+							"        my_edge().coords_set_{perp,par}={%s,%s}\n"
+							"        other_patch().coords_set_{rho,sigma,tau}={%s,%s,%s}\n"
+							"        other_edge().coords_set_{perp,par}={%s,%s}\n",
+							my_patch().name(), my_edge().name(),
+							other_patch().name(), other_edge().name(),
+							local_coords::name_of_coords_set(my_patch().coords_set_rho()),
+							local_coords::name_of_coords_set(my_patch().coords_set_sigma()),
+							local_coords::name_of_coords_set(my_patch().coords_set_tau()),
+							local_coords::name_of_coords_set(my_edge().coords_set_perp()),
+							local_coords::name_of_coords_set(my_edge().coords_set_par()),
+							local_coords::name_of_coords_set(other_patch().coords_set_rho()),
+							local_coords::name_of_coords_set(other_patch().coords_set_sigma()),
+							local_coords::name_of_coords_set(other_patch().coords_set_tau()),
+							local_coords::name_of_coords_set(other_edge().coords_set_perp()),
+							local_coords::name_of_coords_set(other_edge().coords_set_par()));
+		/*NOTREACHED*/
+
+		// perp coordinate must match (mod 2*pi) across the two patches
+		// after taking into account any overlap
+		// ... eg patch_overlap_width = 3 would be
+		//	p   p   p   p   p
+		//		q   q   q   q   q
+		//     so the overlap would be (patch_overlap_width-1) * delta
+		const fp other_overlap = (patch_overlap_width - 1) * other_edge().perp_map().delta_fp();
+		const fp other_outer_perp_minus_overlap // move back inwards into other patch
+												// by overlap distance, to get a value
+												// that should match our own
+												// grid_outer_perp() value
+			= other_edge().grid_outer_perp() + (other_edge().is_min() ? +other_overlap : -other_overlap);
+		if (!local_coords::fuzzy_EQ_ang(my_edge().grid_outer_perp(),
+										other_outer_perp_minus_overlap))
+			then error_exit(ERROR_EXIT,
+							"***** interpatch_ghost_zone::interpatch_ghost_zone:\n"
+							"        my_patch().name()=\"%s\" my_edge().name()=%s\n"
+							"        other_patch().name()=\"%s\" other_edge().name()=%s\n"
+							"        perp coordinate doesn't match (mod 2*pi) across the two patches!\n"
+							"        my_edge().grid_outer_perp()=%g   <--(compare this)\n"
+							"        patch_overlap_width=%d other_overlap=%g\n"
+							"        other_edge.grid_outer_perp()=%g\n"
+							"        other_outer_perp_minus_overlap=%g   <--(against this)\n",
+							my_patch().name(), my_edge().name(),
+							other_patch().name(), other_edge().name(),
+							double(my_edge().grid_outer_perp()),
+							patch_overlap_width, double(other_overlap),
+							double(other_edge().grid_outer_perp()),
+							double(other_outer_perp_minus_overlap)); /*NOTREACHED*/
+
+		//
+		// set up the iperp interpatch coordinate mapping
+		// (gives other patch's iperp coordinate for interpolation)
+		//
+
+		// compute the iperp --> other_iperp mapping for a sample point;
+		// ... if the ghost zone is empty, then the sample point will necessarily
+		//     be out-of-range in the ghost zone, so we use the *unchecked*
+		//     conversions to avoid errors in this case
+		// ... we do the computation using the fact that  perp  is the same
+		//     coordinate in both patches (modulo 2*pi radians = 360 degrees)
+		const int sample_iperp = outer_iperp();
+		const fp sample_perp = my_edge().perp_map().fp_of_int_unchecked(sample_iperp);
+		// unchecked conversion here!
+		const fp other_sample_perp = other_patch()
+										 .modulo_reduce_ang(other_edge().perp_is_rho(),
+															sample_perp);
+		const fp fp_other_sample_iperp = other_edge()
+											 .fp_iperp_of_perp(other_sample_perp);
+
+		// verify that this is fuzzily a grid point
+		if (!jtutil::fuzzy<fp>::is_integer(fp_other_sample_iperp))
+			then error_exit(ERROR_EXIT,
+							"***** interpatch_ghost_zone::interpatch_ghost_zone:\n"
+							"        my_patch().name()=\"%s\" my_edge().name()=%s\n"
+							"        other_patch().name()=\"%s\" other_edge().name()=%s\n"
+							"        sample_iperp=%d sample_perp=%g\n"
+							"        other_sample_perp=%g fp_other_sample_iperp=%g\n"
+							"        ==> fp_other_sample_iperp isn't fuzzily an integer!\n"
+							"        ==> patches aren't commensurate in the perpendicular coordinate!\n",
+							my_patch().name(), my_edge().name(),
+							other_patch().name(), other_edge().name(),
+							sample_iperp, double(sample_perp),
+							double(other_sample_perp),
+							double(fp_other_sample_iperp)); /*NOTREACHED*/
+		const int other_sample_iperp = jtutil::round<fp>::to_integer(fp_other_sample_iperp);
+
+		// compute the +/- sign (direction) of the iperp --> other_iperp mapping
+		//
+		// Since perp is the same in both patches (mod 2*pi radians = 360 degrees),
+		// the overall +/- sign is just the product of the signs of the two individual
+		// iperp <--> perp mappings.
+		//
+		// ... signs encoded as (floating-point) +/- 1.0
+		const double iperp_map_sign_pm1 = jtutil::signum(my_edge().perp_map().delta_fp()) * jtutil::signum(other_edge().perp_map().delta_fp());
+		// ... signs encoded as is_plus bool flag
+		const bool is_iperp_map_plus = (iperp_map_sign_pm1 > 0.0);
+
+		// now we finally know enough to set up the other_iperp(iperp)
+		// coordinate mapping
+		other_iperp_ = new jtutil::cpm_map<fp>(min_iperp(), max_iperp(),
+											   sample_iperp, other_sample_iperp,
+											   is_iperp_map_plus);
+	}
+
+	//******************************************************************************
+
+	//
+	// this function destroys an  interpatch_ghost_zone  object.
+	//
+	interpatch_ghost_zone::~interpatch_ghost_zone()
+	{
+		delete Jacobian_buffer_;
+		delete Jacobian_y_ipar_posn_;
+		delete interp_result_buffer_;
+		delete other_par_;
+		delete max_ipar_used_;
+		delete min_ipar_used_;
+		delete other_iperp_;
+		delete other_patch_interp_;
+	}
+
+	//******************************************************************************
+
+	//
+	// These functions compute the [min,max] ipar of the ghost zone for
+	// a given iperp, taking into account how we treat the corners
+	// (cf. the example in the header comments in "ghost_zone.hh"):
+	//
+	// If an adjacent ghost zone is symmetry,
+	//    we do not include that corner;
+	// If an adjacent ghost zone is interpatch,
+	//    we include up to the diagonal line, and if we are a rho ghost zone,
+	//    then also the diagonal line itself.  E.g. For the example in the
+	//    header comments "ghost_zone.hh", the +x ghost zone includes (6,6),
+	//    (7,6), and (7,7), while the +y ghost zone includes (6,7)
+	//
+	// ... in the following 2 functions,
+	//     the  iabs()  term includes the diagonal,
+	//     so we must remove the diagonal for !is_rho,
+	//     i.e. add 1 to min_ipar and subtract 1 from max_ipar
+	//
+	int interpatch_ghost_zone::min_ipar(int iperp) const
+	{
+		return min_par_adjacent_ghost_zone().is_symmetry()
+				   ? my_edge().min_ipar_without_corners()
+				   : my_edge().min_ipar_without_corners() - iabs(iperp - my_edge().nominal_grid_outer_iperp()) + (is_rho() ? 0 : 1);
+	}
+
+	int interpatch_ghost_zone::max_ipar(int iperp) const
+	{
+		return max_par_adjacent_ghost_zone().is_symmetry()
+				   ? my_edge().max_ipar_without_corners()
+				   : my_edge().max_ipar_without_corners() + iabs(iperp - my_edge().nominal_grid_outer_iperp()) - (is_rho() ? 0 : 1);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function finishes the construction/setup of an  interpatch_ghost_zone
+	// object.  It
+	// - sets up the par coordinate mapping information
+	// - sets up the interpatch interpolator data pointer and result arrays
+	// - constructs the patch_interp object to interpolate from the *other* patch
+	//
+	// We use our ipar as the patch_interp's parindex.
+	//
+	void interpatch_ghost_zone::finish_setup(int interp_handle,
+											 int interp_par_table_handle)
+	{
+		min_other_iperp_ = min(other_iperp(min_iperp()),
+							   other_iperp(max_iperp()));
+		max_other_iperp_ = max(other_iperp(min_iperp()),
+							   other_iperp(max_iperp()));
+
+		//
+		// set up arrays giving actual [min,max] ipar that we'll use
+		// at each other_iperp (later on we will pass these arrays to the
+		// other patch's  patch_interp  object, with ipar being parindex there
+		//
+		min_ipar_used_ = new jtutil::array1d<int>(min_other_iperp_, max_other_iperp_);
+		max_ipar_used_ = new jtutil::array1d<int>(min_other_iperp_, max_other_iperp_);
+		{
+			for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+			{
+				(*min_ipar_used_)(other_iperp(iperp)) = min_ipar(iperp);
+				(*max_ipar_used_)(other_iperp(iperp)) = max_ipar(iperp);
+			}
+		}
+
+		//
+		// set up array giving other patch's par coordinate for interpolation
+		//
+
+		other_par_ = new jtutil::array2d<fp>(min_other_iperp_, max_other_iperp_,
+											 extreme_min_ipar(), extreme_max_ipar());
+
+		{
+			for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+			{
+				for (int ipar = min_ipar(iperp); ipar <= max_ipar(iperp); ++ipar)
+				{
+					// compute the  other_par corresponding to  (iperp,ipar)
+					// ... here we use the fact (which we verified in our constructor)
+					//     that other edge's parallel coordinate == our tau coordinate
+					//     (at least modulo 2*pi radians = 360 degrees)
+					const fp perp = my_edge().perp_of_iperp(iperp);
+					const fp par = my_edge().par_of_ipar(ipar);
+
+					const fp rho = my_edge().rho_of_perp_par(perp, par);
+					const fp sigma = my_edge().sigma_of_perp_par(perp, par);
+
+					const fp tau = my_patch().tau_of_rho_sigma(rho, sigma);
+					const fp other_par = other_patch()
+											 .modulo_reduce_ang(other_edge().par_is_rho(), tau);
+
+					(*other_par_)(other_iperp(iperp), ipar) = other_par;
+				}
+			}
+		}
+
+		//
+		// set up interpolation result buffer
+		//
+		interp_result_buffer_ = new jtutil::array3d<fp>(my_patch().ghosted_min_gfn(),
+														my_patch().ghosted_max_gfn(),
+														min_other_iperp_, max_other_iperp_,
+														extreme_min_ipar(), extreme_max_ipar());
+
+		//
+		// construct the patch_interp object to interpolate from the *other* patch
+		// ... the patch_interp should use gridfn data from it's (the other patch's)
+		//     min/max par ghost zones if those (adjacent) adjacent ghost zones
+		//     are symmetry, but not if they're interpatch,
+		//     cf the header comments in "ghost_zone.hh"
+		//
+		const ghost_zone &other_ghost_zone = other_patch()
+												 .ghost_zone_on_edge(other_edge());
+		const bool ok_to_use_min_par_ghost_zone = other_ghost_zone.min_par_adjacent_ghost_zone()
+														  .is_symmetry()
+													  ? true
+													  : false;
+		const bool ok_to_use_max_par_ghost_zone = other_ghost_zone.max_par_adjacent_ghost_zone()
+														  .is_symmetry()
+													  ? true
+													  : false;
+		other_patch_interp_ = new patch_interp(other_edge(),
+											   min_other_iperp_, max_other_iperp_,
+											   *min_ipar_used_, *max_ipar_used_,
+											   *other_par_,
+											   ok_to_use_min_par_ghost_zone,
+											   ok_to_use_max_par_ghost_zone,
+											   interp_handle, interp_par_table_handle);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function asserts() that
+	// - we have a patch_interp object
+	// - our and the patch_interp object's notions of the "other patch" agree
+	// - the other patch has an interpatch ghost zone on this edge
+	// - the other patch's interpatch ghost zone on this edge,
+	//   points back to our patch
+	//
+	void interpatch_ghost_zone::assert_fully_setup() const
+	{
+		assert(other_patch_interp_ != NULL);
+		assert(other_patch() == other_patch_interp_->my_patch());
+		assert(other_patch()
+				   .ghost_zone_on_edge(other_edge())
+				   .is_interpatch());
+		assert(my_patch() == other_patch()
+								 .ghost_zone_on_edge(other_edge())
+								 .other_patch());
+	}
+
+	//******************************************************************************
+
+	//
+	// This function "synchronizes" a ghost zone, i.e. it updates the
+	// ghost-zone values of the specified gridfns via the appropriate
+	// interpatch interpolations.
+	//
+	// The flags specify which part(s) of the ghost zone we want, but
+	// the present implementation only supports the case where all the
+	// flags are  true , i.e. we want the entire ghost zone.
+	//
+	void interpatch_ghost_zone::synchronize(int ghosted_min_gfn, int ghosted_max_gfn,
+											bool want_corners /* = true */,
+											bool want_noncorner /* = true */)
+	{
+#ifdef DEBUG_AHFD
+		printf("*Sync interpatch ghost zone in %s\n", my_patch().name());
+#endif
+
+		// make sure the caller wants the entire ghost zone
+		if (!(want_corners && want_noncorner))
+			then error_exit(ERROR_EXIT,
+							"***** interpatch_ghost_zone::synchronize():\n"
+							"        we only support operating on the *entire* ghost zone,\n"
+							"        but we were passed flags specifying a proper subset!\n"
+							"        want_corners=(int)%d want_noncorner=(int)%d\n",
+							want_corners, want_noncorner); /*NOTREACHED*/
+
+		//
+		// move from 'Compute_Jacobian' below
+		//
+		assert(other_patch_interp_ != NULL);
+		other_patch_interp_->molecule_minmax_ipar_m(Jacobian_min_y_ipar_m_,
+													Jacobian_max_y_ipar_m_);
+#ifdef DEBUG_AHFD
+		printf("%d %d %d %d %d %d \n", Jacobian_min_y_ipar_m_, Jacobian_max_y_ipar_m_,
+			   min_other_iperp_, max_other_iperp_, extreme_min_ipar(), extreme_max_ipar());
+		getchar();
+#endif
+
+		// /*
+		if (Jacobian_y_ipar_posn_ == NULL)
+			Jacobian_y_ipar_posn_ = new jtutil::array2d<CCTK_INT>(min_other_iperp_, max_other_iperp_,
+																  extreme_min_ipar(), extreme_max_ipar());
+		if (Jacobian_buffer_ == NULL)
+			Jacobian_buffer_ = new jtutil::array3d<fp>(min_other_iperp_, max_other_iperp_,
+													   extreme_min_ipar(), extreme_max_ipar(),
+													   Jacobian_min_y_ipar_m_, Jacobian_max_y_ipar_m_);
+
+		// do the interpolation into our result buffer
+		other_patch_interp_->interpolate(ghosted_min_gfn, ghosted_max_gfn,
+										 *interp_result_buffer_, //);
+										 *Jacobian_y_ipar_posn_,
+										 *Jacobian_buffer_);
+
+		// other_patch_interp_->molecule_posn(*Jacobian_y_ipar_posn_);
+
+		// store the results back into our gridfns
+		for (int gfn = ghosted_min_gfn; gfn <= ghosted_max_gfn; ++gfn)
+		{
+			for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+			{
+				const int oiperp = other_iperp(iperp);
+
+				for (int ipar = min_ipar(iperp); ipar <= max_ipar(iperp); ++ipar)
+				{
+					int irho = my_edge().irho_of_iperp_ipar(iperp, ipar);
+					int isigma = my_edge().isigma_of_iperp_ipar(iperp, ipar);
+					my_patch().ghosted_gridfn(gfn, irho, isigma) = (*interp_result_buffer_)(gfn, oiperp, ipar);
+				}
+			}
+		}
+	}
+
+	//******************************************************************************
+
+	//
+	// This function allocates the internal buffers for the Jacobian, and
+	// computes that Jacobian
+	//	    partial synchronize gridfn(ghosted_gfn, iperp, ipar)
+	//	------------------------------------------------------------
+	//	partial other patch gridfn(ghosted_gfn, oiperp, posn+ipar_m)
+	// where
+	//	oiperp = Jacobian_oiperp(iperp)
+	//	posn = Jacobian_oipar_posn(iperp, ipar)
+	// into the internal buffers.
+	//
+	void interpatch_ghost_zone::compute_Jacobian(int ghosted_min_gfn, int ghosted_max_gfn,
+												 bool want_corners /* = true */,
+												 bool want_noncorner /* = true */)
+		const
+	{
+		// make sure the caller wants the entire ghost zone
+		if (!(want_corners && want_noncorner))
+			then error_exit(ERROR_EXIT,
+							"***** interpatch_ghost_zone::compute_Jacobian():\n"
+							"        we only support operating on the *entire* ghost zone,\n"
+							"        but we were passed flags specifying a proper subset!\n"
+							"        want_corners=(int)%d want_noncorner=(int)%d\n",
+							want_corners, want_noncorner); /*NOTREACHED*/
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/ghost_zone.h
+++ b/AMSS_NCKU_source/AHF_Direct/ghost_zone.h
@@ -0,0 +1,796 @@
+#ifndef GHOST_ZONE_H
+#define GHOST_ZONE_H
+namespace AHFinderDirect
+{
+
+	//*****************************************************************************
+
+	//
+	// ***** design notes for ghost zones *****
+	//
+
+	//
+	// A  ghost_zone  object describes a patch's ghost zone, and knows how
+	// to compute gridfns there (we usually speak of "synchronizing" the
+	// ghost zone or zones) based on either the patch system's symmetry
+	// or interpolation from a neighboring patch.  ghost_zone is an abstract
+	// base class, from which we derive two concrete classes:
+	// * A  symmetry_ghost_zone  object describes a ghost zone which is a
+	//   (discrete) symmetry of spacetime, either mirror-image or periodic.
+	//   Such an object knows how to fill in ghost-zone gridfn data from
+	//   the "other side" of the symmetry.
+	// * An  interpatch_ghost_zone  object describes a ghost zone which
+	//   overlaps another patch.  Such an object knows how to get ghost
+	//   zone gridfn data from the other patch.  More accurately, it gets
+	//   the data by asking (calling) the appropriate one of the other
+	//   patch's  patch_interp  objects.
+	// Every patch has (points to) 4  ghost_zone  objects, one for each of
+	// the patch's sides.  See the comments in "patch.hh" for a "big picture"
+	// discussion of patches, patch edges, ghost zones, and patch interpolators.
+	//
+
+	//
+	// There are some unobvious complications involved in synchronizing
+	// the ghost zone "corners", i.e. in ghost zone points that are outside
+	// the nominal grid in *both* coordinates.  There are 3 basic cases here:
+	// * A corner between two symmetry ghost zones, for example the -x/-y
+	//   corner in the example below.  In this case it takes *two* sequential
+	//   symmetry operations to get gridfn data in the corner from the
+	//   nominal grid.  Symmetry operations commute, so at each point we'll
+	//   always get the same results independently of in which order we do
+	//   the symmetry operations.  Computationally, we actually do the operations
+	//   in both orders, one order's results overwriting the other's, but
+	//   this doesn't matter (because the results are the same).
+	// * A corner between two interpatch ghost zones, for example the +x/+y
+	//   corner in the example below.  In this case we could get the gridfn
+	//   data by either of two distinct interpolation operations (presumably
+	//   from two distinct patches), which would in general give slightly
+	//   different results.  In some ideal world we might do a centered
+	//   interpolation using data from both patches, but this would be
+	//   complicated:
+	//   - it would require a 2-D interpolation
+	//   - it would require bookkeeping for interpolating from multiple
+	//     patches within the same ghost zone, indeed for the same ghost
+	//     zone point
+	//   At present, we follow a simpler approach: we split the corner down
+	//   its diagonal,
+	//	[for the points on the diagonal we make an arbitrary choice;
+	//	at present this is that they belong to (and get their data via)
+	//	the rho ghost zone.]
+	//   and off-center the interpolation as necessary so each ghost-zone
+	//   point gets data solely from the neighboring patch on its own side.
+	// * A corner between a symmetry and an interpatch ghost zone, for
+	//   example the +x/-y or -x/+y corners in the example below.  In this
+	//   case we first do a symmetry operation in the neighboring patch,
+	//   then a fully centered interpolation (using the data just obtained
+	//   from a symmetry operation) to get data in the non-corner part of
+	//   the interpatch ghost zone.  After the interpatch interpolation,
+	//   we do a final symmetry operation to get gridfn data in the corner.
+	//
+	// In general, then, a ghost zone is rhomboid-shaped: iperp lies in a
+	// fixed interval, while ipar lies in an interval which may depend on
+	// iperp.  In general, this shape depends on the type (symmetry vs interpatch)
+	// of the adjacent ghost zones.
+	//
+
+	//
+	// To properly handle all the symmetry/interpatch cases described above,
+	// we use a 3-phase algorithm to synchronize ghost zones:
+	// Phase 1: Fill in gridfn data at all the non-corner points of symmetry
+	//	    ghost zones, by using the symmetries to get this data from
+	//	    its "home patch" nominal grids.
+	// Phase 2: Fill in gridfn data in all the interpatch ghost zones, by
+	//	    interpatch interpolating from neighboring patches as described
+	//	    above.
+	// Phase 3: Fill in gridfn data at all the corner points of symmetry
+	//	    ghost zones, by using the symmetries to get this data from
+	//	    its "home patch" nominal grids or ghost zones.
+	// Here a given ghost zone corner may be either a full rectangle (so any
+	// given point is a member of both adjacent corners), or split down its
+	// diagonal (so any given point is a member of only one corner).  This
+	// 3-phase algorithm is actually implemented by
+	//    patch_system::synchronize()
+	// which in turn calls
+	//    symmetry_ghost_zone::synchronize()
+	//    interpatch_ghost_zone::synchronize()
+	//
+
+	//
+	// For example, consider the +z patch in an octant patch system, with
+	// the ghost zones being 2 points wide.  The following illustration is
+	// looking down the z axis, and uses (x,y) for the patch coordinates
+	// for simplicity:
+	//
+	//                    #                                                   //
+	//                   i+y    i+y    i+y    i+y    i+y    i+y    i+y      //
+	//   (-2,7) (-1,7)  (0,7)  (1,7)  (2,7)  (3,7)  (4,7)  (5,7)  (6,7)  (7,7)
+	//    <s-x>  <s-x>    #                                              /i+x
+	//                    #                                            //
+	//                   i+y    i+y    i+y    i+y    i+y    i+y      //
+	//   (-2,6) (-1,6)  (0,6)  (1,6)  (2,6)  (3,6)  (4,6)  (5,6)  (6,6)  (7,6)
+	//    <s-x>  <s-x>    #                                       /i+x    i+x
+	//                    #                                     //
+	//                    #                                   //
+	//   (-2,5) (-1,5)   2,5)--(1,5)--(2,5)--(3,5)--(4,5)--(5,5)  (6,5)  (7,5)
+	//     s-x    s-x     #                                  |     i+x    i+x
+	//                    #                                  |
+	//                    #                                  |
+	//   (-2,4) (-1,4)  (0,4)  (1,4)  (2,4)  (3,4)  (4,4)  (5,4)  (6,4)  (7,4)
+	//     s-x    s-x     #                                  |     i+x    i+x
+	//                    #                                  |
+	//                    #                                  |
+	//   (-2,3) (-1,3)  (0,3)  (1,3)  (2,3)  (3,3)  (4,3)  (5,3)  (6,3)  (7,3)
+	//     s-x    s-x     #                                  |     i+x    i+x
+	//                    #                                  |
+	//                    #                                  |
+	//   (-2,2) (-1,2)  (0,2)  (1,2)  (2,2)  (3,2)  (4,2)  (5,2)  (6,2)  (7,2)
+	//     s-x    s-x     #                                  |     i+x    i+x
+	//                    #                                  |
+	//                    #                                  |
+	//   (-2,1) (-1,1)  (0,1)  (1,1)  (2,1)  (3,1)  (4,1)  (5,1)  (6,1)  (7,1)
+	//     s-x    s-x     #                                  |     i+x    i+x
+	//                    #                                  |
+	//                    #                                  |
+	//  #(-2,0)#(-1,0)##(0,0)##(1,0)##(2,0)##(3,0)##(4,0)##(5,0)##(6,0)##(7,0)
+	//     s-x    s-x     #                                        i+x    i+x
+	//                    #
+	//    <s-y>  <s-y>   s-y    s-y    s-y    s-y    s-y    s-y   <s-y>  <s-y>
+	//   (-2,-1)(-1,-1) (0,-1) (1,-1) (2,-1) (3,-1) (4,-1) (5,-1) (6,-1) (7,-1)
+	//    <s-x>  <s-x>    #
+	//                    #
+	//    <s-y>  <s-y>   s-y    s-y    s-y    s-y    s-y    s-y   <s-y>  <s-y>
+	//   (-2,-2)(-1,-2) (0,-2) (1,-2) (2,-2) (3,-2) (4,-2) (5,-2) (6,-2) (7,-2)
+	//    <s-x>  <s-x>    #
+	//                    #
+	//
+	// For this example,
+	// * The xz plane and yz plane are marked with ### lines
+	// * The +z patch's nominal grid is ([0,5],[0,5]), i.e. 0 <= x,y <= 5;
+	//   its boundary lines are shown with single lines --- and | .
+	// * The diagonal where we've split corners are marked with // lines.
+	// * The +z patch's ghost zones are
+	//	-x: (-1,[-1,7]), (-2,[-2,7])
+	//	+x: (6,[-2,6]), (7,[-2,7])
+	//	-y: ([-2, 7],[-2,-1])
+	//	+y: ([-2,5],6), ([-2,6],7)
+	// * The regions where we will interpolate data from the +z patch are
+	//	+x: ([ 3,4],[-2,7])
+	//	+y: ([-2,7],[ 3,4])
+	//   Note that in both cases the interpolation region includes the points
+	//   computed by symmetry (in phase 1 of our 3-phase algorithm) on the
+	//   adjacent edges! There are no interpolation regions inside the -x or
+	//   -y boundaries, since no interpolation is needed across those boundaries
+	//   of this patch.
+	// The diagonal *** line shows the boundary between the +x and +y ghost
+	// zones.
+	//
+	// Our 3-phase algorithm described above thus becomes:
+	// Phase 1: Fill in gridfn values at points marked with "s-x" below or
+	//	    "s-y" above via symmetry mirroring across the -x boundary
+	//	    (yz plane) or -y boundary (xz plane), as described by the
+	//	    +z patch's -x or -y  symmetry_ghost_zone  object respectively.
+	// Phase 2: Fill in gridfn values at points marked with "i+x" below or
+	//	    "i+y" above via interpatch interpolation from the neighboring
+	//	    patch across the +z patch's +x or +y boundary, as described
+	//	    by the +z patch's +x or +y  interpatch_ghost_zone  object
+	//	    respectively.
+	// Phase 3: Fill in gridfn values at points marked with "<s-x>" below or
+	//	    "<s-y>" above via symmetry mirroring across the -x boundary
+	//	    (yz plane) or -y boundary (xz plane), as described by the
+	//	    +z patch's -x or -y  symmetry_ghost_zone  object respectively.
+	//
+
+	//*****************************************************************************
+
+	//
+	// ghost_zone - abstract base class to describe ghost zone of patch
+	//
+	// This is an abstract base class describing a generic patch ghost zone.
+	// This might represent either of:
+	// - a discrete symmetry of spacetime (derived class symmetry_ghost_zone)
+	// - an overlap with another patch (derived class interpatch_ghost_zone)
+	//
+
+	//
+	// N.b. const qualifiers in ghost_zone and its derived classes refer to
+	//      the underlying gridfn data.
+	//
+
+	// forward declarations
+	class symmetry_ghost_zone;
+	class interpatch_ghost_zone;
+	class patch_system;
+
+	class ghost_zone
+	{
+	public:
+		//
+		// ***** main high-level client interface *****
+		//
+		// "synchronize" a ghost zone, i.e. update the ghost-zone values
+		// of the specified gridfns via the appropriate sequence of
+		// symmetry operations and interpatch interpolations
+		// (flags specify which part(s) of the ghost zone we want)
+		//
+		virtual void synchronize(int ghosted_min_gfn, int ghosted_max_gfn,
+								 bool want_corners = true,
+								 bool want_noncorner = true) = 0;
+
+	public:
+		//
+		// ***** Jacobian of synchronize() *****
+		//
+		// This function computes the Jacobian of the  synchronize()
+		// operation into internal buffers; the following functions
+		// provide access to that Jacobian.
+		//
+		// FIXME: should these be moved out into a separate Jacobian
+		//        object/class?
+		//
+		// Note that this function just computes the Jacobian of this
+		// ghost zone's  synchronize()  operation -- it does *NOT* take
+		// into account the 3-phase synchronization algorithm described
+		// in the header comments for this file.  (That's done by
+		//  patch_system::synchronize_Jacobian()  and its subfunctions.)
+		//
+		// n.b. terminology is
+		//	partial gridfn at x
+		//	-------------------
+		//	partial gridfn at y
+		//
+		virtual void compute_Jacobian(int ghosted_min_gfn, int ghosted_max_gfn,
+									  bool want_corners = true,
+									  bool want_noncorner = true)
+			const = 0;
+
+		//
+		// The API in the remaining functions implicitly assumes that
+		// the Jacobian is independent of  ghosted_gfn , and also that
+		// the structure of the Jacobian is such that the set of y points
+		// on which a single ghost-zone point depends,
+		// - has a single yiperp value (depending on our iperp, of course)
+		// - have a contiguous interval of yipar (depending on our iperp
+		//   and ipar, of course), whose size is
+		//	[or can be taken to be without an unreasonable
+		//	amount of zero-padding]
+		//   independent of our iperp and ipar; we parameterize this
+		//   interval as  yipar = posn+m  where  posn  is determined by
+		//   our iperp and ipar, and  m  has a fixed range independent
+		//   of our iperp and ipar
+		//
+
+		// what is the [min,max] range of m for this ghost zone?
+		virtual int Jacobian_min_y_ipar_m() const = 0;
+		virtual int Jacobian_max_y_ipar_m() const = 0;
+
+		// what is the iperp of the Jacobian y points in their (y) patch?
+		virtual int Jacobian_y_iperp(int x_iperp) const = 0;
+
+		// what is the  posn  value of the y points in this Jacobian row?
+		virtual int Jacobian_y_ipar_posn(int x_iperp, int x_ipar) const = 0;
+
+		// what is the Jacobian
+		//	partial synchronize() px.gridfn(ghosted_gfn, x_iperp, x_ipar)
+		//	-------------------------------------------------------------
+		//	   partial py.gridfn(ghosted_gfn, y_iperp, y_posn+y_ipar_m)
+		// where
+		//	y_iperp = Jacobian_y_iperp(x_iperp)
+		//	y_posn = Jacobian_y_ipar_posn(x_iperp, x_ipar)
+		virtual fp Jacobian(int x_iperp, int x_ipar, int y_ipar_m) const = 0;
+
+	public:
+		//
+		// ***** low-level client interface *****
+		//
+
+		// to which patch/edge do we belong?
+		patch &my_patch() const { return my_patch_; }
+		const patch_edge &my_edge() const { return my_edge_; }
+
+		// from which patch/edge do we get data?
+		patch &other_patch() const { return other_patch_; }
+		const patch_edge &other_edge() const { return other_edge_; }
+
+		// what type of ghost zone are we?
+		bool is_interpatch() const { return is_interpatch_; }
+		bool is_symmetry() const { return !is_interpatch_; }
+
+		// convenience forwarding functions down to patch_edge::
+		bool is_min() const { return my_edge().is_min(); }
+		bool is_rho() const { return my_edge().is_rho(); }
+
+		// min/max iperp of the ghost zone
+		int min_iperp() const
+		{
+			return my_patch()
+				.minmax_ang_ghost_zone__min_iperp(is_min(), is_rho());
+		}
+		int max_iperp() const
+		{
+			return my_patch()
+				.minmax_ang_ghost_zone__max_iperp(is_min(), is_rho());
+		}
+
+		// inner/outer iperp of the ghost zone wrt our patch
+		int inner_iperp() const { return is_min() ? max_iperp() : min_iperp(); }
+		int outer_iperp() const { return is_min() ? min_iperp() : max_iperp(); }
+
+		// extreme min/max ipar that might possibly be part of this ghost zone
+		// (derived classes may actually use a subset of this)
+		int extreme_min_ipar() const
+		{
+			return my_edge().min_ipar_with_corners();
+		}
+		int extreme_max_ipar() const
+		{
+			return my_edge().max_ipar_with_corners();
+		}
+
+		// actual min/max ipar in the ghost zone at a particular iperp
+		// (may depend on type of the adjacent ghost zones)
+		virtual int min_ipar(int iperp) const = 0;
+		virtual int max_ipar(int iperp) const = 0;
+
+		// point membership predicate
+		bool is_in_ghost_zone(int iperp, int ipar)
+			const
+		{
+			// n.b. don't test ipar until we're sure iperp is in range!
+			return (iperp >= min_iperp()) && (iperp <= max_iperp()) && (ipar >= min_ipar(iperp)) && (ipar <= max_ipar(iperp));
+		}
+
+		// adjacent ghost zones to our min/max corners
+		const ghost_zone &min_par_adjacent_ghost_zone() const
+		{
+			return my_patch()
+				.ghost_zone_on_edge(my_edge().min_par_adjacent_edge());
+		}
+		const ghost_zone &max_par_adjacent_ghost_zone() const
+		{
+			return my_patch()
+				.ghost_zone_on_edge(my_edge().max_par_adjacent_edge());
+		}
+
+		//
+		// ***** safely cast to derived classes *****
+		//
+
+		// assert that gz is of specified type,
+		// then static_cast to derive type
+		const symmetry_ghost_zone &cast_to_symmetry_ghost_zone() const;
+		symmetry_ghost_zone &cast_to_symmetry_ghost_zone();
+		const interpatch_ghost_zone &cast_to_interpatch_ghost_zone() const;
+		interpatch_ghost_zone &cast_to_interpatch_ghost_zone();
+
+		//
+		// ***** constructor, finish setup, destructor *****
+		//
+	protected:
+		// ... values for  is_interpatch_in  constructor argument
+		//     FIXME: these should really be bool, but then we couldn't
+		//            use the "enum hack" for in-class constants
+		enum
+		{
+			ghost_zone_is_symmetry = false,
+			ghost_zone_is_interpatch = true // no comma
+		};
+
+		// constructor
+		// ... only used in implementing our derived classes;
+		//     the rest of the world constructs our derived classes instead
+		ghost_zone(const patch_edge &my_edge_in,
+				   const patch_edge &other_edge_in,
+				   bool is_interpatch_in)
+			: my_patch_(my_edge_in.my_patch()),
+			  my_edge_(my_edge_in),
+			  other_patch_(other_edge_in.my_patch()),
+			  other_edge_(other_edge_in),
+			  is_interpatch_(is_interpatch_in)
+		{
+		}
+
+	public:
+		// assert() that ghost zone is fully setup:
+		// defined here ==> no-op
+		// symmetry ghost zone ==> unchanged ==> no-op
+		// interpatch ghost zone ==> check consistency of this and the
+		//			     other patch's ghost zones and
+		//			     patch_interp objects
+		virtual void assert_fully_setup() const {}
+
+		// destructor must be virtual to allow destruction
+		// of derived classes via ptr/ref to this class
+		virtual ~ghost_zone() {}
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them (either here or in derived classes)
+		ghost_zone(const ghost_zone &rhs);
+		ghost_zone &operator=(const ghost_zone &rhs);
+
+	private:
+		patch &my_patch_;
+		const patch_edge &my_edge_;
+		patch &other_patch_;
+		const patch_edge &other_edge_;
+		const bool is_interpatch_;
+	};
+
+	//*****************************************************************************
+
+	//
+	// symmetry_ghost_zone - derived class for spacetime-symmetry ghost zone
+	//
+	// In practice, there are two types of spacetime symmetry ghost zone:
+	// mirror symmetry and periodic symmetry.  However, it turns out that the
+	// code needed to handle periodic BCs is basically a superset of that
+	// needed to handle mirror symmetries, so this class represents a generic
+	// symmetry ghost zone which may be of either type, and once constructed
+	// doesn't distinguish between the two.
+	//
+	// In general, a symmetry ghost zone implies that there's a 1-1 mapping
+	// between ghost zone points of this patch, and (a subset of the) interior
+	// points of this or another patch.  If tensors are involved (this isn't
+	// used at present in the horizon finder), there's also a corresponding
+	// 1-1 mapping between (angular) tensor components.
+	//
+	// A mirror-symmetry ghost zone is specified by (the constructor arguments)
+	// - a patch edge
+	// - the (fp) perp coordinate of the mirror plane
+	// The mapping of ghost zone points is thus "just" the mirror imaging of
+	// iperp across the symmetry plane within this same patch.  (The mapping
+	// leaves ipar invariant.)
+	//
+	// A periodic-symmetry ghost zone is specified by (the constructor arguments)
+	// - a patch edge (specifies the ghost zone)
+	// - the patch edge to which the ghost zone is to be mapped
+	// - a pair of ipar coordinates, one on this edge and one on the other edge,
+	//   which map into each other
+	// - the sign of the ipar mapping (does increasing ipar on this edge map to
+	//   increasing or decreasing ipar on the other edge?)
+	// The mapping of ghost zone points is the periodic mapping; this may map
+	// the ghost zone points to interior points of either this same patch or a
+	// different one.
+	//
+	// In general, the symmetry mapping of ghost zone points is of the form
+	//	(iperp, ipar) --> (const +/- iperp, const +/- ipar)
+	// The iperp mapping is always in the direction
+	//	outside the patch --> inside the patch
+	// while the ipar mapping might have either sign.
+	// If there are tensors, the corresponding mapping of tensor components is
+	//	(index_perp, index_par) --> (+/-) (+/-) (index_perp, index_par)
+	// (that is, the two +/- signs are multiplied).
+
+	//
+	// Since all the member functions are  const , a  symmetry_ghost_zone
+	// object is effectively always  const .
+	//
+	class symmetry_ghost_zone
+		: public ghost_zone
+	{
+	public:
+		//
+		// ***** main high-level client interface *****
+		//
+		// "synchronize" a ghost zone, i.e. update the ghost-zone values
+		// of the specified gridfns via the appropriate symmetry operations
+		// (flags specify which part(s) of the ghost zone we want)
+		//
+		void synchronize(int ghosted_min_gfn, int ghosted_max_gfn,
+						 bool want_corners = true,
+						 bool want_noncorner = true);
+
+		//
+		// ***** Jacobian of synchronize() *****
+		//
+		// n.b. terminology is
+		//      partial gridfn at x
+		//      -------------------
+		//      partial gridfn at y
+		//
+
+		// allocate internal buffers, compute Jacobian
+		// ... this function is a no-op in this class
+		void compute_Jacobian(int ghosted_min_gfn, int ghosted_max_gfn,
+							  bool want_corners = true,
+							  bool want_noncorner = true)
+			const
+		{
+		}
+
+		// what is the [min,max] range of m for this ghost zone?
+		int Jacobian_min_y_ipar_m() const { return 0; }
+		int Jacobian_max_y_ipar_m() const { return 0; }
+
+		// what is the oiperp of the Jacobian points (= iperp in their patch)?
+		virtual int Jacobian_y_iperp(int x_iperp) const
+		{
+			return iperp_map_of_iperp(x_iperp);
+		}
+
+		// what is the  posn  value of the points in this Jacobian row?
+		int Jacobian_y_ipar_posn(int x_iperp, int x_ipar) const
+		{
+			return ipar_map_of_ipar(x_ipar);
+		}
+
+		// what is the Jacobian
+		//	partial synchronize() px.gridfn(ghosted_gfn, x_iperp, x_ipar)
+		//	-------------------------------------------------------------
+		//	   partial py.gridfn(ghosted_gfn, y_iperp, y_posn+y_ipar_m)
+		// where
+		//	y_iperp = Jacobian_y_iperp(x_iperp)
+		//	y_posn = Jacobian_y_ipar_posn(x_iperp, x_ipar)
+		fp Jacobian(int x_iperp, int x_ipar, int y_ipar_m) const
+		{
+			return (y_ipar_m == 0) ? 1.0 : 0.0;
+		}
+
+		//
+		// ***** low-level client interface *****
+		//
+
+		// symmetry-map coordinates
+		int iperp_map_of_iperp(int iperp) const
+		{
+			return iperp_map_->map(iperp);
+		}
+		int ipar_map_of_ipar(int ipar) const
+		{
+			return ipar_map_->map(ipar);
+		}
+		fp fp_sign_of_iperp_map() const
+		{
+			return iperp_map_->fp_sign();
+		}
+		fp fp_sign_of_ipar_map() const
+		{
+			return ipar_map_->fp_sign();
+		}
+
+		// min/max ipar of the ghost zone
+		// ... we always include the corners
+		//     (cf. the example at the start of this file)
+		int min_ipar(int iperp) const { return extreme_min_ipar(); }
+		int max_ipar(int iperp) const { return extreme_max_ipar(); }
+
+		//
+		// ***** constructors, destructor *****
+		//
+	public:
+		// constructor for mirror-symmetry ghost zone
+		symmetry_ghost_zone(const patch_edge &my_edge_in);
+
+		// constructor for periodic-symmetry ghost zone
+		// ... ipar mapping specified by giving sample point and mapping sign
+		symmetry_ghost_zone(const patch_edge &my_edge_in, const patch_edge &other_edge_in,
+							int my_edge_sample_ipar, int other_edge_sample_ipar,
+							bool ipar_map_is_plus);
+
+		~symmetry_ghost_zone();
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		symmetry_ghost_zone(const symmetry_ghost_zone &rhs);
+		symmetry_ghost_zone &operator=(const symmetry_ghost_zone &rhs);
+
+	private:
+		// symmetry mappings for (iperp,ipar)
+		// ... we own these objects
+		const jtutil::cpm_map<fp> *iperp_map_;
+		const jtutil::cpm_map<fp> *ipar_map_;
+	};
+
+	//*****************************************************************************
+
+	//
+	// interpatch_ghost_zone - derived class for interpatch ghost zone of a patch
+	//
+	// A ghost_zone object maps (my_iperp,my_ipar) coordinates to the other
+	// patch's (other_iperp,other_par) coordinates, then calls the other patch's
+	// patch_interp object to interpolate the other patch's data to those
+	// coordinates.
+	//
+	// Note that as described in the "design notes for ghost zones"
+	// comments above,  interpatch_ghost_zone  objects are constructed in
+	// the 2nd and 3rd phase of the overall construction process described
+	// at the comments at the start of "patch.hh"
+	// [done by our constructor]
+	// - set up the object itslf and its links to/from the patches and
+	//   their edges
+	// [done by  finish_setup()]
+	// - set up the interpatch mapping information, data pointers, and
+	//   interpolation result buffer
+	// - construct the  patch_interp  object to interpolate from the other
+	//   patch, and save a pointer to it
+	//
+
+	class patch_interp;
+
+	class interpatch_ghost_zone
+		: public ghost_zone
+	{
+	public:
+		//
+		// ***** main high-level client interface *****
+		//
+		// "synchronize" a ghost zone, i.e. update the ghost-zone
+		// values of the specified gridfns via the appropriate
+		// interpatch interpolations
+		// (flags specify which part(s) of the ghost zone we want)
+		//
+		// ... the present implementation only supports the case where
+		//     both flags are set
+		//
+		void synchronize(int ghosted_min_gfn, int ghosted_max_gfn,
+						 bool want_corners = true,
+						 bool want_noncorner = true);
+
+		//
+		// ***** Jacobian of synchronize() *****
+		//
+		// n.b. terminology is
+		//      partial gridfn at x
+		//      -------------------
+		//      partial gridfn at y
+		//
+
+		// allocate internal buffers, compute Jacobian
+		//
+		// ... the present implementation only supports the case where
+		//     both flags are set
+		//
+		void compute_Jacobian(int ghosted_min_gfn, int ghosted_max_gfn,
+							  bool want_corners = true,
+							  bool want_noncorner = true)
+			const;
+
+		// what is the [min,max] range of m for this ghost zone?
+		int Jacobian_min_y_ipar_m() const { return Jacobian_min_y_ipar_m_; }
+		int Jacobian_max_y_ipar_m() const { return Jacobian_max_y_ipar_m_; }
+
+		// what is the iperp of the Jacobian y points in their (y) patch?
+		// ... the ipar row of grid points is actually the same, so
+		//     we just have to translate x_iperp to the y patch's coordinates
+		int Jacobian_y_iperp(int x_iperp) const { return other_iperp(x_iperp); }
+
+		// what is the  posn  value of the y points in this Jacobian row?
+		int Jacobian_y_ipar_posn(int x_iperp, int x_ipar) const
+		{
+			assert(Jacobian_y_ipar_posn_ != NULL);
+			const int y_iperp = Jacobian_y_iperp(x_iperp);
+			return (*Jacobian_y_ipar_posn_)(y_iperp, x_ipar);
+		}
+
+		// what is the Jacobian
+		//	partial synchronize() px.gridfn(ghosted_gfn, x_iperp, x_ipar)
+		//	-------------------------------------------------------------
+		//	   partial py.gridfn(ghosted_gfn, y_iperp, y_posn+y_ipar_m)
+		// where
+		//	y_iperp = Jacobian_y_iperp(x_iperp)
+		//	y_posn = Jacobian_y_ipar_posn(x_iperp, x_ipar)
+		fp Jacobian(int x_iperp, int x_ipar, int y_ipar_m) const
+		{
+			assert(Jacobian_buffer_ != NULL);
+			assert(y_ipar_m >= Jacobian_min_y_ipar_m_);
+			assert(y_ipar_m <= Jacobian_max_y_ipar_m_);
+			const int y_iperp = Jacobian_y_iperp(x_iperp);
+			return (*Jacobian_buffer_)(y_iperp, x_ipar, y_ipar_m);
+		}
+
+		//
+		// ***** low-level client interface *****
+		//
+
+	public:
+		// check consistency of this and the other patch's ghost zones
+		// and patch_interp objects
+		void assert_fully_setup() const;
+
+		// min/max ipar of the ghost zone for specified iperp
+		// with possibly "triangular" corners depending on the type
+		// (symmetry vs interpatch) of the adjacent ghost zones
+		// (cf. comments & example at the start of this file)
+		int min_ipar(int iperp) const;
+		int max_ipar(int iperp) const;
+
+		// convert our iperp --> other patch's iperp
+		int other_iperp(int iperp) const
+		{
+			assert(other_iperp_ != NULL);
+			return other_iperp_->map(iperp);
+		}
+
+		//
+		// ***** constructor, finish setup, destructor *****
+		//
+	public:
+		interpatch_ghost_zone(const patch_edge &my_edge_in,
+							  const patch_edge &other_edge_in,
+							  int patch_overlap_width);
+
+		// finish setup (requires adjacent-side ghost_zone objects
+		// to exist, though not to have finish_setup() called):
+		// - setup par coordinate mapping information
+		// - setup interpatch interpolator data pointers & result buffer
+		// - create patch_interp object to interpolate from *other* patch
+		void finish_setup(int interp_handle, int interp_par_table_handle);
+
+		~interpatch_ghost_zone();
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		interpatch_ghost_zone(const interpatch_ghost_zone &rhs);
+		interpatch_ghost_zone &operator=(const interpatch_ghost_zone &rhs);
+
+	private:
+		//
+		// all the remaining pointers are initialized to NULL pointers
+		// in our constructor, then finally allocated and set up by
+		// finish_setup() or compute_Jacobian() as appropriate
+		//
+		// FIXME: should these be moved out into a separate object/class
+		//        for the interp stuff and/or another one for the Jacobian?
+		//
+
+		// see comment in "patch_interp.hh" for why this is "const"
+		const patch_interp *other_patch_interp_;
+
+		// other patch's iperp coordinates of our ghost zone points
+		// ... maps my_iperp --> other_iperp
+		jtutil::cpm_map<fp> *other_iperp_;
+
+		// min/max values of other patch's iperp coordinates
+		// of our ghost zone points
+		int min_other_iperp_, max_other_iperp_;
+
+		// [min,max]_ipar used at each other_iperp
+		// ... we will pass these arrays by reference
+		//     to the other patch's patch_interp object
+		// ... index is (other_iperp)
+		jtutil::array1d<int> *min_ipar_used_;
+		jtutil::array1d<int> *max_ipar_used_;
+
+		// other patch's (fp) parallel coordinates of our ghost zone points
+		// ... we will pass this array by reference
+		//     to the other patch's patch_interp object
+		//     using my_ipar as the patch_interp's parindex
+		// ... subscripts are (other_iperp, my_ipar)
+		jtutil::array2d<fp> *other_par_;
+
+		// buffer into which the other patch's patch_interp object
+		// will store the interpolated gridfn values
+		// ... we will pass this array by reference
+		//     to the other patch's patch_interp object
+		//     using my_ipar as the patch_interp's parindex
+		// ... subscripts are (gfn, other_iperp,my_ipar)
+		jtutil::array3d<fp> *interp_result_buffer_;
+
+		//
+		// stuff computed by  compute_Jacobian()
+		//
+		// n.b. terminology is
+		//      partial gridfn at x
+		//      -------------------
+		//      partial gridfn at y
+		//
+		mutable int Jacobian_min_y_ipar_m_, Jacobian_max_y_ipar_m_;
+
+		// other patch's y ipar posn for a Jacobian row
+		// ... subscripts are (oiperp, ipar)
+		mutable jtutil::array2d<CCTK_INT> *Jacobian_y_ipar_posn_;
+
+		// Jacobian values
+		// ... subscripts are (y_iperp, x_ipar, y_ipar_m)
+		mutable jtutil::array3d<fp> *Jacobian_buffer_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* GHOST_ZONE_H*/
--- a/AMSS_NCKU_source/AHF_Direct/gr.h
+++ b/AMSS_NCKU_source/AHF_Direct/gr.h
@@ -0,0 +1,40 @@
+#ifndef GR_H
+#define GR_H
+namespace AHFinderDirect
+{
+
+	enum expansion_status
+	{
+		expansion_success,
+
+		expansion_failure__surface_nonfinite,
+
+		expansion_failure__surface_too_large,
+
+		expansion_failure__surface_outside_grid,
+
+		expansion_failure__surface_in_excised_region,
+
+		expansion_failure__geometry_nonfinite,
+
+		expansion_failure__gij_not_positive_definite // no comma
+	};
+
+	// expansion.cc
+	enum expansion_status
+	expansion(patch_system *ps_ptr, fp add_to_expansion,
+			  bool initial_flag,
+			  bool Jacobian_flag = false,
+			  jtutil::norm<fp> *H_norms_ptr = NULL);
+
+	// expansion_Jacobian.cc
+	enum expansion_status
+	expansion_Jacobian(patch_system *ps_ptr, Jacobian *Jac_ptr,
+					   fp add_to_expansion,
+					   bool initial_flag,
+					   bool print_msg_flag = false);
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* GR_H  */
--- a/AMSS_NCKU_source/AHF_Direct/horizon_sequence.C
+++ b/AMSS_NCKU_source/AHF_Direct/horizon_sequence.C
@@ -0,0 +1,76 @@
+#include <stdio.h>
+#include <assert.h>
+
+#include "stdc.h"
+#include "util.h"
+
+#include "horizon_sequence.h"
+
+namespace AHFinderDirect
+{
+
+	horizon_sequence::horizon_sequence(int N_horizons_in)
+		: N_horizons_(N_horizons_in),
+		  my_N_horizons_(0), // sequence starts out empty
+		  posn_(-1),
+		  my_hn_(new int[N_horizons_in])
+	{
+	}
+
+	horizon_sequence::~horizon_sequence()
+	{
+		delete[] my_hn_;
+	}
+	//
+	// This function appends  hn  to the sequence.  It returns the new value
+	// of my_N_horizons().
+	//
+	int horizon_sequence::append_hn(int hn)
+	{
+		assert(hn > 0);						  // can only append genuine horizons
+		assert(my_N_horizons_ < N_horizons_); // make sure there's space for it
+		my_hn_[my_N_horizons_++] = hn;
+		posn_ = 0;
+		return my_N_horizons_;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes the internal position immediately following
+	// a given internal position in the sequence.
+	//
+	// Arguments:
+	// p = (in) The current internal position, with posn_ semantics
+	//
+	// Results:
+	// This function returns the next internal position after p.
+	//
+	int horizon_sequence::next_posn(int pos)
+		const
+	{
+		return (pos < 0)					? pos - 1
+			   : (pos + 1 < my_N_horizons_) ? pos + 1
+											: -1;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function determines whether or not a given  hn  is genuine.
+	//
+	bool horizon_sequence::is_hn_genuine(int hn)
+		const
+	{
+		for (int pos = 0; pos < my_N_horizons_; ++pos)
+		{
+			if (my_hn_[pos] == hn)
+				then return true;
+		}
+
+		return false;
+	}
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/horizon_sequence.h
+++ b/AMSS_NCKU_source/AHF_Direct/horizon_sequence.h
@@ -0,0 +1,72 @@
+#ifndef HORIZON_SEQUENCE_H
+#define HORIZON_SEQUENCE_H
+namespace AHFinderDirect
+{
+	class horizon_sequence
+	{
+	public:
+		int N_horizons() const { return N_horizons_; }
+
+		int my_N_horizons() const { return my_N_horizons_; }
+
+		bool has_genuine_horizons() const { return my_N_horizons_ > 0; }
+
+		bool is_dummy() const { return posn_is_dummy(posn_); }
+		bool is_genuine() const { return posn_is_genuine(posn_); }
+
+		bool is_next_genuine() const
+		{
+			return posn_is_genuine(next_posn(posn_));
+		}
+
+		int dummy_number() const { return is_genuine() ? 0 : -posn_; }
+
+		int get_hn() const
+		{
+			return posn_is_genuine(posn_) ? my_hn_[posn_] : 0;
+		}
+
+		bool is_hn_genuine(int hn) const;
+
+		int init_hn()
+		{
+			posn_ = (my_N_horizons_ == 0) ? -1 : 0;
+			return get_hn();
+		}
+
+		int next_hn()
+		{
+			posn_ = next_posn(posn_);
+			return get_hn();
+		}
+
+		horizon_sequence(int N_horizons);
+		~horizon_sequence();
+
+		int append_hn(int hn);
+
+	private:
+		bool posn_is_genuine(int pos) const
+		{
+			return (pos >= 0) && (pos < my_N_horizons_);
+		}
+		bool posn_is_dummy(int pos) const
+		{
+			return !posn_is_genuine(pos);
+		}
+
+		int next_posn(int pos) const;
+
+	private:
+		const int N_horizons_;
+		int my_N_horizons_;
+
+		int posn_;
+
+		int *my_hn_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* HORIZON_SEQUENCE_H */
--- a/AMSS_NCKU_source/AHF_Direct/ilucg.f90
+++ b/AMSS_NCKU_source/AHF_Direct/ilucg.f90
@@ -0,0 +1,521 @@
+
+! adopted from J. THORNBURG's code dilucg.f
+
+      subroutine ILUCG(N,IA,JA,A,B,X,ITEMP,RTEMP,EPS,MAXITER,ISTATUS)
+
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION IA(*),JA(*),A(*),B(*),X(*),ITEMP(*),RTEMP(*)
+!
+!     INCOMPLETE LU DECOMPOSITION-CONJUGATE GRADIENT
+!     -          --               -         -
+! WHERE:
+!     |N| IS THE NUMBER OF EQUATIONS.  IF N < 0, ITEMP AND
+!       RTEMP CONTAIN THE ILU FROM A PREVIOUS CALL AND
+!       B AND X ARE THE NEW RHS AND INITIAL GUESS.
+!     IA IS AN INTEGER ARRAY DIMENSIONED |N|+1.  IA(I) IS THE
+!       INDEX INTO ARRAYS JA AND A OF THE FIRST NON-ZERO
+!       ELEMENT IN ROW I.  LET MAX=IA(|N|+1)-IA(1).
+!     JA IS AN INTEGER ARRAY DIMENSIONED MAX.  JA(K) GIVES
+!       THE COLUMN NUMBER OF A(K).
+!    A IS A DOUBLE PRECISION ARRAY DIMENSIONED MAX.  IT CONTAINS THE
+!       NONZERO ELEMENTS OF THE MATRIX STORED BY ROW.
+!     B CONTAINS THE RHS VECTOR.
+!     X IS A DOUBLE PRECISION ARRAY DIMENSIONED |N|.  ON ENTRY, IT CONTAINS
+!       AN INITIAL ESTIMATE; ON EXIT, THE SOLUTION.
+!     ITEMP IS AN INTEGER SCRATCH ARRAY DIMENSIONED 3*(|N|+MAX)+2.
+!     RTEMP IS A DOUBLE PRECISION SCRATCH ARRAY DIMENSIONED 4*|N|+MAX.
+!     EPS IS THE CONVERGENCE CRITERIA.  IT SPECIFIES THE RELATIVE
+!       ERROR ALLOWED IN THE SOLUTION.  TO BE PRECISE, CONVERGENCE
+!       IS DEEMED TO HAVE OCCURED WHEN THE INFINITY-NORM OF THE
+!       CHANGE IN THE SOLUTION IN ONE ITERATION IS .LE. EPS * THE
+!       INFINITY-NORM OF THE CURRENT SOLUTION.  HOWEVER, IF EPS
+!       .LT. 0.0D0, IT IS INTERNALLY SCALED BY THE MACHINE PRECISION,
+!       SO THAT, FOR EXAMPLE, EPS = -256.0D0 WILL ALLOW THE LAST 8 BITS
+!       OF THE SOLUTION TO BE IN ERROR.
+!     MAXITER GIVES THE REQUESTED NUMBER OF ITERATIONS,
+!       OR IS 0 FOR "NO LIMIT".
+!     ISTATUS IS AN INTEGER VARIABLE, WHICH IS SET TO:
+!       -I IF THERE IS AN ERROR IN THE MATRIX STRUCTURE IN ROW I
+!          (SUCH AS A ZERO ELEMENT ON THE DIAGONAL).
+!        0 IF THE ITERATION FAILED TO REACH THE CONVERGENCE CRITERION
+!          IN ITER ITERATIONS.
+!       +I IF THE ITERATION CONVERGED IN I ITERATIONS.
+! REFERENCE:
+!     D.S.KERSHAW,"THE INCOMPLETE CHOLESKY-CONJUGATE GRADIENT
+!       METHOD FOR INTERATIVE SOLUTION OF LINEAR EQUATIONS",
+!       J.COMPUT.PHYS. JAN 1978 PP 43-65
+!
+      LOGICAL DLU0
+      NP=IABS(N)
+      ISTATUS=0
+      IF (NP.EQ.0) GO TO 20
+! CALCULATE INDICES FOR BREAKING UP TEMPORARY ARRAYS.
+      N1=NP+1
+      MAX=IA(N1)-IA(1)
+      ILU=1
+      JLU=ILU+N1
+      ID=JLU+MAX
+      IC=ID+NP
+      JC=IC+N1
+      JCI=JC+MAX
+      IR=1
+      IP=IR+NP
+      IS1=IP+NP
+      IS2=IS1+NP
+      IALU=IS2+NP
+      IF (N.LT.0) GO TO 10
+! DO INCOMPLETE LU DECOMPOSITION
+      IF (DLU0(NP,IA,JA,A,ITEMP(IC),ITEMP(JC),ITEMP(JCI),RTEMP(IALU), &
+          ITEMP(ILU),ITEMP(JLU),ITEMP(ID),RTEMP(IR),IERROR)) GOTO 20
+! AND DO CONJUGATE GRADIENT ITERATIONS
+10    CALL DNCG0(NP,IA,JA,A,B,X,ITEMP(ILU),ITEMP(JLU),ITEMP(ID), &
+        RTEMP(IALU),RTEMP(IR),RTEMP(IP),RTEMP(IS1),RTEMP(IS2),   &
+        EPS,MAXITER,ITER)
+! ITER IS ACTUAL NUMBER OF ITERATIONS (NEGATIVE IF NO CONVERGENCE)
+      ISTATUS = ITER
+      IF (ITER .LT. 0) ISTATUS = 0
+      RETURN
+! ERROR RETURN FROM INCOMPLETE LU DECOMPOSITION
+20    ISTATUS = -IERROR
+      RETURN
+      END
+!------------------------------------------------------------------------------
+      LOGICAL FUNCTION DLU0(N,IA,JA,A,IC,JC,JCI,ALU,ILU,JLU,ID,V,IE)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION IA(*),JA(*),A(*),IC(*),JC(*),JCI(*),ALU(*),ILU(*),JLU(*),ID(N),V(N)
+      LOGICAL NODIAG
+      COMMON /ICBD00/ ICBAD
+!     INCOMPLETE LU DECOMPOSITION
+! WHERE:
+!     N,IA,JA, AND A ARE DESCRIBED IN SUBROUTINE ILUCG
+!     IC IS AN INTEGER ARRAY DIMENSIONED N+1, IC(J) GIVES THE
+!       INDEX OF THE FIRST NONZERO ELEMENT IN COLMN J IN
+!       ARRAY JC.
+!     JC IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A.
+!       JC(K) GIVES THE ROW NUMBER OF THE K'TH ELEMENT IN
+!       THE COLUMN STRUCTURE.
+!     JCI IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A.
+!       JCI(K) GIVES THE INDEX INTO ARRAY A OF THE K'TH ELEMENT
+!       OF THE COLUMN STRUCTURE.
+!     ALU HAS THE SAME DIMENSION AS A.  ON EXIT, IT WILL
+!       CONTAIN THE INCOMPLETE LU DECOMPOSITION OF A WITH THE
+!       RECIPROCALS OF THE DIAGONAL ELEMENTS OF U.
+!     ILU AND JLU CORRESPONDS TO IA AND JA BUT FOR ALU.
+!     ID IS AN INTEGER ARRAY DIMENSIONED N.  IT CONTAINS
+!       INDICES TO THE DIAGONAL ELEMENTS OF U.
+!     V IS A REAL SCRATCH VECTOR OF LENGTH N.
+!     IE GIVES THE ROW NUMBER IN ERROR IF AN ERROR OCCURED
+!       (RETURN VALUE .TRUE.), OR IS UNUSED IF ALL IS WELL
+!       (RETURN VALUE .FALSE.).
+!
+!     RETURN VALUE = .FALSE. IF ALL IS WELL, .TRUE. IF ERROR.
+!
+! NOTE: DLU0 SETS ARGUMENTS IC THROUGH V.
+!
+      ICBAD=0
+! ZERO COUNT OF ZERO DIAGONAL ELEMENTS IN U.
+!
+! FIRST CHECK STRUCTURE OF A AND BUILD COLUMN STRUCTURE
+      DO 10 I=1,N
+        IC(I)=0
+10    CONTINUE
+      DO 30 I=1,N
+        KS=IA(I)
+        KE=IA(I+1)-1
+        NODIAG=.TRUE.
+        DO 20 K=KS,KE
+          J=JA(K)
+          IF (J.LT.1.OR.J.GT.N) GO TO 210
+          IC(J)=IC(J)+1
+          IF (J.EQ.I) NODIAG=.FALSE.
+20      CONTINUE
+        IF (NODIAG) GO TO 210
+30    CONTINUE
+! MAKE IC INTO INDICES
+      KOLD=IC(1)
+      IC(1)=1
+      DO 40 I=1,N
+        KNEW=IC(I+1)
+        IF (KOLD.EQ.0) GO TO 210
+        IC(I+1)=IC(I)+KOLD
+        KOLD=KNEW
+40    CONTINUE
+! SET JC AND JCI FOR COLUMN STRUCTURE
+      DO 60 I=1,N
+        KS=IA(I)
+        KE=IA(I+1)-1
+        DO 50 K=KS,KE
+          J=JA(K)
+          L=IC(J)
+          IC(J)=L+1
+          JC(L)=I
+          JCI(L)=K
+50      CONTINUE
+60    CONTINUE
+! FIX UP IC
+      KOLD=IC(1)
+      IC(1)=1
+      DO 70 I=1,N
+        KNEW=IC(I+1)
+        IC(I+1)=KOLD
+        KOLD=KNEW
+70    CONTINUE
+! FIND SORTED ROW STRUCTURE FROM SORTED COLUMN STRUCTURE
+      NP=N+1
+      DO 80 I=1,NP
+        ILU(I)=IA(I)
+80    CONTINUE
+! MOVE ELEMENTS, SET JLU AND ID
+      DO 100 J=1,N
+        KS=IC(J)
+        KE=IC(J+1)-1
+        DO 90 K=KS,KE
+          I=JC(K)
+          L=ILU(I)
+          ILU(I)=L+1
+          JLU(L)=J
+          KK=JCI(K)
+          ALU(L)=A(KK)
+          IF (I.EQ.J) ID(J)=L
+90      CONTINUE
+100   CONTINUE
+! RESET ILU (COULD JUST USE IA)
+      DO 110 I=1,NP
+        ILU(I)=IA(I)
+110   CONTINUE
+! FINISHED WITH SORTED COLUMN AND ROW STRUCTURE
+!
+! DO LU DECOMPOSITION USING GAUSSIAN ELIMINATION
+      DO 120 I=1,N
+        V(I)=0.0D0
+120   CONTINUE
+      DO 200 IROW=1,N
+        I=ID(IROW)
+        PIVOT=ALU(I)
+        IF (PIVOT.NE.0.0D0) GO TO 140
+! THIS CASE MAKES THE ILU LESS ACCURATE
+        ICBAD=ICBAD+1
+        KS=ILU(IROW)
+        KE=ILU(IROW+1)-1
+        DO 130 K=KS,KE
+          PIVOT=PIVOT+DABS(ALU(K))
+130     CONTINUE
+        IF (PIVOT.EQ.0.0D0) GO TO 220
+140     PIVOT=1.0D0/PIVOT
+        ALU(I)=PIVOT
+        KKS=I+1
+        KKE=ILU(IROW+1)-1
+        IF (KKS.GT.KKE) GO TO 160
+        DO 150 K=KKS,KKE
+          J=JLU(K)
+          V(J)=ALU(K)
+150     CONTINUE
+! FIX L IN COLUMN IROW AND DO PARTIAL LU IN SUBMATRIX
+160     KS=IC(IROW)
+        KE=IC(IROW+1)-1
+        DO 190 K=KS,KE
+          I=JC(K)
+          IF (I.LE.IROW) GO TO 190
+          LS=ILU(I)
+          LE=ILU(I+1)-1
+          DO 180 L=LS,LE
+            J=JLU(L)
+            IF (J.LT.IROW) GO TO 180
+            IF (J.GT.IROW) GO TO 170
+            AMULT=ALU(L)*PIVOT
+            ALU(L)=AMULT
+            IF (AMULT.EQ.0.0) GO TO 190
+            GO TO 180
+170         IF (V(J).EQ.0.0D0) GO TO 180
+            ALU(L)=ALU(L)-AMULT*V(J)
+180       CONTINUE
+190     CONTINUE
+! RESET V
+        IF (KKS.GT.KKE) GO TO 200
+        DO 195 K=KKS,KKE
+          J=JLU(K)
+          V(J)=0.0D0
+195     CONTINUE
+200   CONTINUE
+! NORMAL RETURN
+      DLU0 = .FALSE.
+      RETURN
+! ERROR RETURNS
+210   IE=I
+      DLU0 = .TRUE.
+      RETURN
+220   IE=IROW
+      DLU0 = .TRUE.
+      RETURN
+      END
+!-------------------------------------------------------------------------------------
+      SUBROUTINE DNCG0(N,IA,JA,A,B,X,ILU,JLU,ID,ALU,R,P,S1,S2,EPS,ITER,IE)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION IA(*),JA(*),A(*),B(N),X(N),ILU(*),JLU(*),ALU(*),ID(N),R(N),P(N),S1(N),S2(N)
+!     NONSYMMETRIC CONJUGATE GRADIENT
+! WHERE:
+!     N,IA,JA,A,B, AND X ARE DESCRIBED IN SUBROUTINE DILUCG.
+!     ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU.
+!     JLU GIVES COLUMN NUMBER.
+!     ID GIVES INDEX OF DIAGONAL ELEMENT OF U.
+!     ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+!       WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U.
+!     R,P,S1, AND S2 ARE VECTORS OF LENGTH N USED IN THE
+!       ITERATIONS.
+!     EPS IS CONVERGENCE CRITERIA.  (DESCRIBED IN SUBROUTINE
+!       DILUCG).
+!     ITER IS MAX NUMBER OF ITERATIONS, OR 0 FOR "NO LIMIT".
+!     IE GIVES ACTUAL NUMBER OF ITERATIONS, NEGATIVE IF
+!       NO CONVERGENCE.
+!
+! R0=B-A*X0
+      CALL DMUL10(N,IA,JA,A,X,R)
+      DO 10 I=1,N
+        R(I)=B(I)-R(I)
+10    CONTINUE
+! P0=(UT*U)(-1)*AT*(L*LT)(-1)*R0
+! FIRST SOLVE L*LT*S1=R0
+      CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1)
+! TIMES TRANSPOSE OF A
+      CALL DMUL20(N,IA,JA,A,S1,S2)
+! THEN SOLVE UT*U*P=S2
+      CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,P)
+      IE=0
+      RDOT = DGVV(R,S1,N)
+! LOOP BEGINS HERE
+20    CALL DMUL30(N,ILU,JLU,ID,ALU,P,S2)
+
+      PDOT = DGVV(P,S2,N)
+
+      IF (PDOT.EQ.0.0D0) RETURN
+
+      ALPHA=RDOT/PDOT
+      XMAX=0.0D0
+      XDIF=0.0D0
+      DO 30 I=1,N
+        AP=ALPHA*P(I)
+        X(I)=X(I)+AP
+        AP=DABS(AP)
+        XX=DABS(X(I))
+        IF (AP.GT.XDIF) XDIF=AP
+        IF (XX.GT.XMAX) XMAX=XX
+30    CONTINUE
+      IE=IE+1
+      IF ((EPS .GT. 0.0D0) .AND. (XDIF .LE. EPS * XMAX)) RETURN
+      IF ((EPS .LT. 0.0D0) .AND. (XMAX + XDIF/DABS(EPS) .EQ. XMAX))   RETURN
+!
+! EXCEEDED ITERATION LIMIT?
+!
+      IF ((ITER .NE. 0) .AND. (IE .GE. ITER)) GO TO 60
+      CALL DMUL10(N,IA,JA,A,P,S2)
+      DO 40 I=1,N
+        R(I)=R(I)-ALPHA*S2(I)
+40    CONTINUE
+      CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1)
+      RRDOT = DGVV(R,S1,N)
+      BETA=RRDOT/RDOT
+      RDOT=RRDOT
+      CALL DMUL20(N,IA,JA,A,S1,S2)
+      CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,S1)
+      DO 50 I=1,N
+        P(I)=S1(I)+BETA*P(I)
+50    CONTINUE
+      GO TO 20
+60    IE=-IE
+      RETURN
+      END
+!------------------------------------------------------------------------------------------------------
+      SUBROUTINE DMUL10(N,IA,JA,A,B,X)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION IA(*),JA(*),A(*),B(N),X(N)
+! MULTIPLY A TIMES B TO GET X
+! WHERE:
+!     N IS THE ORDER OF THE MATRIX
+!     IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW
+!     JA GIVES COLUMN NUMBER
+!     A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC
+!       MATRIX STORED BY ROW
+!     B IS THE VECTOR
+!     X IS THE PRODUCT (MUST BE DIFFERENT FROM B)
+
+      DO 20 I=1,N
+        KS=IA(I)
+        KE=IA(I+1)-1
+        SUM=0.0D0
+        DO 10 K=KS,KE
+          J=JA(K)
+          SUM=SUM+A(K)*B(J)
+10      CONTINUE
+        X(I)=SUM
+20    CONTINUE
+      RETURN
+      END
+!--------------------------------------------------------------------------------------------------------
+      SUBROUTINE DMUL20(N,IA,JA,A,B,X)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION IA(*),JA(*),A(*),B(N),X(N)
+! MULTIPLY TRANSPOSE OF A TIMES B TO GET X
+! WHERE:
+!     N IS THE ORDER OF THE MATRIX
+!     IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW
+!     JA GIVES COLUMN NUMBER
+!     A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC
+!       MATRIX STORED BY ROW
+!     B IS THE VECTOR
+!     X IS THE PRODUCT (MUST BE DIFFERENT FROM B)
+
+      DO 10 I=1,N
+        X(I)=0.0D0
+10    CONTINUE
+      DO 30 I=1,N
+        KS=IA(I)
+        KE=IA(I+1)-1
+        BB=B(I)
+        DO 20 K=KS,KE
+          J=JA(K)
+          X(J)=X(J)+A(K)*BB
+20      CONTINUE
+30    CONTINUE
+      RETURN
+      END
+!---------------------------------------------------------------------------------------------------------
+      SUBROUTINE DMUL30(N,ILU,JLU,ID,ALU,B,X)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+! MULTIPLY TRANSPOSE OF U TIMES U TIMES B TO GET X
+! WHERE:
+!     N IS THE ORDER OF THE MATRIX
+!     ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU
+!     JLU GIVES COLUMN NUMBER
+!     ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+!     ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+!       WITH RECIPROCALS OF DIAGONAL ELEMENTS
+!     B IS THE VECTOR
+!     X IS THE PRODUCT UT*U*B (X MUST BE DIFFERENT FROM B)
+
+      DO 10 I=1,N
+        X(I)=0.0D0
+10    CONTINUE
+      DO 50 I=1,N
+        KS=ID(I)+1
+        KE=ILU(I+1)-1
+        DIAG=1.0D0/ALU(KS-1)
+        XX=DIAG*B(I)
+        IF (KS.GT.KE) GO TO 30
+        DO 20 K=KS,KE
+          J=JLU(K)
+          XX=XX+ALU(K)*B(J)
+20      CONTINUE
+30      X(I)=X(I)+DIAG*XX
+        IF (KS.GT.KE) GO TO 50
+        DO 40 K=KS,KE
+          J=JLU(K)
+          X(J)=X(J)+ALU(K)*XX
+40      CONTINUE
+50    CONTINUE
+      RETURN
+      END
+!----------------------------------------------------------------------------------------------------------
+      SUBROUTINE DSUBU0(N,ILU,JLU,ID,ALU,B,X)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+! DO FORWARD AND BACK SUBSTITUTION TO SOLVE UT*U*X=B
+! WHERE:
+!     N IS THE ORDER OF THE MATRIX
+!     ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU
+!     JLU GIVES COLUMN NUMBER
+!     ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+!     ALU HAS NONZERO ELMENTS OF LU MATRIX STORED BY ROW
+!       WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U
+!     B IS THE RHS VECTOR
+!     X IS THE SOLUTION VECTOR
+
+      NP=N+1
+      DO 10 I=1,N
+        X(I)=B(I)
+10    CONTINUE
+! FORWARD SUBSTITUTION
+      DO 30 I=1,N
+        KS=ID(I)+1
+        KE=ILU(I+1)-1
+        XX=X(I)*ALU(KS-1)
+        X(I)=XX
+        IF (KS.GT.KE) GO TO 30
+        DO 20 K=KS,KE
+          J=JLU(K)
+          X(J)=X(J)-ALU(K)*XX
+20      CONTINUE
+30    CONTINUE
+! BACK SUBSTITUTION
+      DO 60 II=1,N
+        I=NP-II
+        KS=ID(I)+1
+        KE=ILU(I+1)-1
+        SUM=0.0D0
+        IF (KS.GT.KE) GO TO 50
+        DO 40 K=KS,KE
+          J=JLU(K)
+          SUM=SUM+ALU(K)*X(J)
+40      CONTINUE
+50      X(I)=(X(I)-SUM)*ALU(KS-1)
+60    CONTINUE
+      RETURN
+      END
+!--------------------------------------------------------------------------------------------------------------
+      SUBROUTINE DSUBL0(N,ILU,JLU,ID,ALU,B,X)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
+! DO FORWARD AND BACK SUBSTITUTION TO SOLVE L*LT*X=B
+! WHERE:
+!     N IS THE ORDER OF THE MATRIX
+!     ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW LU
+!     JLU GIVES THE COLUMN NUMBER
+!     ID GIVES INDEX OF DIAGONAL ELEMENT OF U
+!     ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
+!       DIAGONAL ELEMENTS OF L ARE 1.0 AND NOT STORED
+!     B IS THE RHS VECTOR
+!     X IS THE SOLUTION VECTOR
+
+      NP=N+1
+      DO 10 I=1,N
+        X(I)=B(I)
+10    CONTINUE
+! FORWARD SUBSTITUTION
+      DO 30 I=1,N
+        KS=ILU(I)
+        KE=ID(I)-1
+        IF (KS.GT.KE) GO TO 30
+        SUM=0.0D0
+        DO 20 K=KS,KE
+          J=JLU(K)
+          SUM=SUM+ALU(K)*X(J)
+20      CONTINUE
+        X(I)=X(I)-SUM
+30    CONTINUE
+! BACK SUBSTITUTION
+      DO 50 II=1,N
+        I=NP-II
+        KS=ILU(I)
+        KE=ID(I)-1
+        IF (KS.GT.KE) GO TO 50
+        XX=X(I)
+        IF (XX.EQ.0.0) GO TO 50
+        DO 40 K=KS,KE
+          J=JLU(K)
+          X(J)=X(J)-ALU(K)*XX
+40      CONTINUE
+50    CONTINUE
+      RETURN
+      END
+!------------------------------------------------------------------------------------------------------------------
+      DOUBLE PRECISION FUNCTION DGVV(V,W,N)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION V(N),W(N)
+!     SUBROUTINE TO COMPUTE DOUBLE PRECISION VECTOR DOT PRODUCT.
+!     Optimized using Intel oneMKL BLAS ddot
+!     Mathematical equivalence: DGVV = sum_{i=1}^{N} V(i)*W(i)
+
+      DOUBLE PRECISION, EXTERNAL :: DDOT
+      DGVV = DDOT(N, V, 1, W, 1)
+      RETURN
+      END
--- a/AMSS_NCKU_source/AHF_Direct/ilucg.h
+++ b/AMSS_NCKU_source/AHF_Direct/ilucg.h
@@ -0,0 +1,24 @@
+
+#ifndef ILUCG_H
+#define ILUCG_H
+
+#ifdef fortran1
+#define f_ilucg ilucg
+#endif
+#ifdef fortran2
+#define f_ilucg ILUCG
+#endif
+#ifdef fortran3
+#define f_ilucg ilucg_
+#endif
+
+extern "C"
+{
+	void f_ilucg(const int &N,
+				 const int *IA, const int *JA, const double *A,
+				 const double *B, double *X,
+				 int *ITEMP, double *RTEMP,
+				 const double &EPS, const int &ITER, int &ISTATUS);
+}
+
+#endif /* ILUCG_H */
--- a/AMSS_NCKU_source/AHF_Direct/initial_guess.C
+++ b/AMSS_NCKU_source/AHF_Direct/initial_guess.C
@@ -0,0 +1,132 @@
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "myglobal.h"
+
+namespace AHFinderDirect
+{
+	extern struct state state;
+	//******************************************************************************
+
+	// ellipsoid has global-coordinates center (A,B,C), radius (a,b,c)
+	// angular coordinate system has center (U,V,W)
+	//
+	// direction cosines wrt angular coordinate center are (xcos,ycos,zcos)
+	// i.e. a point has coordinates (U+xcos*r, V+ycos*r, W+zcos*r)
+	//
+	// then the equation of the ellipsoid is
+	//	(U+xcos*r - A)^2     (V+ycos*r - B)^2     (W+zcos*r - C)^2
+	//	-----------------  +  ----------------  +  -----------------  =  1
+	//	        a^2                  b^2                   c^2
+	//
+	// to solve this, we introduce intermediate variables
+	//	AU = A - U
+	//	BV = B - V
+	//	CW = C - W
+	//
+	void setup_initial_guess(patch_system &ps,
+							 fp x_center, fp y_center, fp z_center,
+							 fp x_radius, fp y_radius, fp z_radius)
+	{
+		for (int pn = 0; pn < ps.N_patches(); ++pn)
+		{
+			patch &p = ps.ith_patch(pn);
+
+			for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+			{
+				for (int isigma = p.min_isigma();
+					 isigma <= p.max_isigma();
+					 ++isigma)
+				{
+					const fp rho = p.rho_of_irho(irho);
+					const fp sigma = p.sigma_of_isigma(isigma);
+					fp xcos, ycos, zcos;
+					p.xyzcos_of_rho_sigma(rho, sigma, xcos, ycos, zcos);
+
+					// set up variables used by Maple-generated code
+					const fp AU = x_center - ps.origin_x();
+					const fp BV = y_center - ps.origin_y();
+					const fp CW = z_center - ps.origin_z();
+					const fp a = x_radius;
+					const fp b = y_radius;
+					const fp c = z_radius;
+
+					// compute the solutions r_plus and r_minus
+					fp r_plus, r_minus;
+					{
+						fp t1, t2, t3, t5, t6, t7, t9, t10, t12, t28;
+						fp t30, t33, t35, t36, t40, t42, t43, t48, t49, t52;
+						fp t55;
+						t1 = a * a;
+						t2 = b * b;
+						t3 = t1 * t2;
+						t5 = t3 * zcos * CW;
+						t6 = c * c;
+						t7 = t1 * t6;
+						t9 = t7 * ycos * BV;
+						t10 = t2 * t6;
+						t12 = t10 * xcos * AU;
+						t28 = xcos * xcos;
+						t30 = CW * CW;
+						t33 = BV * BV;
+						t35 = t10 * t28;
+						t36 = ycos * ycos;
+						t40 = AU * AU;
+						t42 = t7 * t36;
+						t43 = zcos * zcos;
+						t48 = t3 * t43;
+						t49 = -2.0 * t1 * zcos * CW * ycos * BV - 2.0 * t2 * zcos * CW * xcos * AU - 2.0 * t6 * ycos * BV * xcos * AU + t2 * t28 * t30 + t6 * t28 * t33 - t35 + t1 * t36 * t30 + t6 * t36 * t40 - t42 + t1 * t43 * t33 + t2 * t43 * t40 -
+							  t48;
+						t52 = sqrt(-t3 * t6 * t49);
+						t55 = 1 / (t35 + t42 + t48);
+						r_plus = (t5 + t9 + t12 + t52) * t55;
+						r_minus = (t5 + t9 + t12 - t52) * t55;
+					}
+
+					// exactly one of the solutions (call it r) should be positive
+					fp r;
+					if ((r_plus > 0.0) && (r_minus < 0.0))
+						then r = r_plus;
+					else if ((r_plus < 0.0) && (r_minus > 0.0))
+						then r = r_minus;
+					else if (state.my_proc == 0)
+						printf("\nsetup_coord_ellipsoid():\nexpected exactly one r>0 solution to quadratic, got 0 or 2!\n%s patch (irho,isigma)=(%d,%d) ==> (rho,sigma)=(%g,%g)\ndirection cosines (xcos,ycos,zcos)=(%g,%g,%g)\nr_plus=%g r_minus=%g\n==> this probably means the initial guess surface doesn't contain\nthe local origin point, or more generally that the initial\nguess surface isn't a Strahlkoerper (\"star-shaped region\")\nwith respect to the local origin point\n", p.name(), irho, isigma, double(rho), double(sigma), double(xcos), double(ycos), double(zcos), double(r_plus), double(r_minus));
+
+					// r = horizon radius at this grid point
+					p.ghosted_gridfn(gfns::gfn__h, irho, isigma) = r;
+				}
+			}
+		}
+	}
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/linear_map.C
+++ b/AMSS_NCKU_source/AHF_Direct/linear_map.C
@@ -0,0 +1,244 @@
+#include <assert.h>
+#include <stdio.h>
+
+#include "stdc.h"
+#include "util.h"
+#include "linear_map.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+		template <typename fp_t>
+		linear_map<fp_t>::linear_map(int min_int_in, int max_int_in,
+									 fp_t min_fp_in, fp_t delta_fp_in, fp_t max_fp_in)
+			: delta_(delta_fp_in), inverse_delta_(1.0 / delta_fp_in),
+			  min_int_(min_int_in), max_int_(max_int_in)
+		{
+			constructor_common(min_fp_in, max_fp_in);
+		}
+
+		template <typename fp_t>
+		linear_map<fp_t>::linear_map(const linear_map<fp_t> &lm_in,
+									 int min_int_in, int max_int_in) // subrange
+			: delta_(lm_in.delta_fp()), inverse_delta_(lm_in.inverse_delta_fp()),
+			  min_int_(min_int_in), max_int_(max_int_in)
+		{
+			if (!(is_in_range(min_int_in) && is_in_range(max_int_in)))
+				then error_exit(ERROR_EXIT,
+								"***** linear_map<fp_t>::linear_map:\n"
+								"        min_int_in=%d and/or max_int_in=%d\n"
+								"        aren't in integer range [%d,%d] of existing linear_map!\n",
+								min_int_, max_int_,
+								lm_in.min_int(), lm_in.max_int()); /*NOTREACHED*/
+
+			constructor_common(lm_in.fp_of_int_unchecked(min_int_in),
+							   lm_in.fp_of_int_unchecked(max_int_in));
+		}
+
+		//******************************************************************************
+
+		//
+		// This function does the common argument validation and setup for
+		// all the constructors of class  linear_map<fp_t>:: .
+		//
+		template <typename fp_t>
+		void linear_map<fp_t>::constructor_common(fp_t min_fp_in, fp_t max_fp_in)
+		// assumes
+		//	min_int_, max_int_, delta_, inverse_delta_
+		// are already initialized
+		// ==> ok to use min_int(), max_int(), delta_fp(), inverse_delta_fp()
+		// ... other class members *not* yet initialized
+		{
+			origin_ = 0.0; // temp value
+			origin_ = min_fp_in - fp_of_int_unchecked(min_int());
+
+			// this should be guaranteed by the above calculation
+			assert(fuzzy<fp_t>::EQ(fp_of_int_unchecked(min_int()), min_fp_in));
+
+			// this is a test of the consistency of the input arguments
+			if (fuzzy<fp_t>::NE(fp_of_int_unchecked(max_int()), max_fp_in))
+				then error_exit(ERROR_EXIT,
+								"***** linear_map<fp_t>::linear_map:\n"
+								"        int range [%d,%d]\n"
+								"        and fp range [%g(%g)%g]\n"
+								"        are (fuzzily) inconsistent!\n",
+								min_int(), max_int(),
+								double(min_fp_in), double(delta_fp()), double(max_fp_in));
+			/*NOTREACHED*/
+		}
+
+		//******************************************************************************
+
+		//
+		// This function converts  fp  --> int  coordinate, returning the result
+		// as an fp (which need not be fuzzily integral).
+		//
+		template <typename fp_t>
+		fp_t linear_map<fp_t>::fp_int_of_fp(fp_t x)
+			const
+		{
+			if (!is_in_range(x))
+				then error_exit(ERROR_EXIT,
+								"***** linear_map<fp_t>::fp_int_of_fp:\n"
+								"        fp value x=%g is (fuzzily) outside the grid!\n"
+								"        {min(delta)max}_fp = %g(%g)%g\n",
+								double(x),
+								double(min_fp()), double(delta_fp()), double(max_fp()));
+			/*NOTREACHED*/
+
+			return inverse_delta_ * (x - origin_);
+		}
+
+		//******************************************************************************
+
+		//
+		// This function converts  fp  --> int  and checks that the result is
+		// fuzzily integral.  (The  nia  argument specifies what to do if the
+		// result *isn't* fuzzily integral.)
+		//
+		// FIXME:
+		// Having to explicitly specify the namespace for jtutil::round<fp_t>::
+		// is ++ugly. :(
+		//
+		template <typename fp_t>
+		int linear_map<fp_t>::int_of_fp(fp_t x, noninteger_action nia /* = nia_error */)
+			const
+		{
+			const fp_t fp_int = fp_int_of_fp(x);
+
+			if (fuzzy<fp_t>::is_integer(fp_int))
+				then
+				{
+					// x is (fuzzily) a grid point ==> return that
+					return jtutil::round<fp_t>::to_integer(fp_int); // *** EARLY RETURN ***
+				}
+
+			// get to here ==> x isn't (fuzzily) a grid point
+			static const char *const noninteger_msg =
+				"%s linear_map<fp_t>::int_of_fp:\n"
+				"        x=%g isn't (fuzzily) a grid point!\n"
+				"        {min(delta)max}_fp() = %g(%g)%g\n";
+			switch (nia)
+			{
+			case nia_error:
+				error_exit(ERROR_EXIT,
+						   noninteger_msg,
+						   "*****",
+						   double(x),
+						   double(min_fp()), double(delta_fp()), double(max_fp()));
+				/*NOTREACHED*/
+
+			case nia_warning:
+				printf(noninteger_msg,
+					   "---",
+					   double(x),
+					   double(min_fp()), double(delta_fp()), double(max_fp()));
+				// fall through
+
+			case nia_round:
+				return jtutil::round<fp_t>::to_integer(fp_int); // *** EARLY RETURN ***
+
+			case nia_floor:
+				return jtutil::round<fp_t>::floor(fp_int); // *** EARLY RETURN ***
+
+			case nia_ceiling:
+				return jtutil::round<fp_t>::ceiling(fp_int); // *** EARLY RETURN ***
+
+			default:
+				error_exit(PANIC_EXIT,
+						   "***** linear_map<fp_t>::int_of_fp: illegal nia=(int)%d\n"
+						   "                                   (this should never happen!)\n",
+						   int(nia)); /*NOTREACHED*/
+			}
+			return 0; // dummy return to quiet gcc
+					  // (which doesn't grok that error_exit() never returns)
+		}
+
+		//******************************************************************************
+
+		//
+		// This function converts "delta" spacings in the fp coordinate to
+		// corresponding "delta" spacings in the int coordinate, and checks that
+		// the result is fuzzily integral.  (The  nia  argument specifies what to
+		// do if the result *isn't* fuzzily integral.)
+		//
+		// FIXME:
+		// Having to explicitly specify the namespace for jtutil::round<fp_t>::
+		// is ++ugly. :(
+		//
+		template <typename fp_t>
+		int linear_map<fp_t>::delta_int_of_delta_fp(fp_t delta_x, noninteger_action nia /* = nia_error */)
+			const
+		{
+			const fp_t fp_delta_int = inverse_delta_ * delta_x;
+
+			if (fuzzy<fp_t>::is_integer(fp_delta_int))
+				then
+				{
+					// delta_x is (fuzzily) an integer number of grid spacings
+					// ==> return that
+					return jtutil::round<fp_t>::to_integer(fp_delta_int);
+					// *** EARLY RETURN ***
+				}
+
+			// get to here ==> delta_x isn't (fuzzily) an integer number of grid spacings
+			static const char *const noninteger_msg =
+				"%s linear_map<fp_t>::delta_int_of_delta_fp:\n"
+				"        delta_x=%g isn't (fuzzily) an integer number of grid spacings!\n"
+				"        {min(delta)max}_fp() = %g(%g)%g\n";
+			switch (nia)
+			{
+			case nia_error:
+				error_exit(ERROR_EXIT,
+						   noninteger_msg,
+						   "*****",
+						   double(delta_x),
+						   double(min_fp()), double(delta_fp()), double(max_fp()));
+				/*NOTREACHED*/
+
+			case nia_warning:
+				printf(noninteger_msg,
+					   "---",
+					   double(delta_x),
+					   double(min_fp()), double(delta_fp()), double(max_fp()));
+				// fall through
+
+			case nia_round:
+				return jtutil::round<fp_t>::to_integer(fp_delta_int);
+				// *** EARLY RETURN ***
+
+			case nia_floor:
+				return jtutil::round<fp_t>::floor(fp_delta_int); // *** EARLY RETURN ***
+
+			case nia_ceiling:
+				return jtutil::round<fp_t>::ceiling(fp_delta_int);
+				// *** EARLY RETURN ***
+
+			default:
+				error_exit(PANIC_EXIT,
+						   "***** linear_map<fp_t>::delta_int_of_delta_fp: illegal nia=(int)%d\n"
+						   "                                               (this should never happen!)\n",
+						   int(nia)); /*NOTREACHED*/
+			}
+			return 0; // dummy return to quiet gcc
+					  // (which doesn't grok that error_exit() never returns)
+		}
+
+		//******************************************************************************
+		//******************************************************************************
+		//******************************************************************************
+
+		//
+		// ***** template instantiation *****
+		//
+
+		template class linear_map<float>;
+		template class linear_map<double>;
+
+		//******************************************************************************
+		//******************************************************************************
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/linear_map.h
+++ b/AMSS_NCKU_source/AHF_Direct/linear_map.h
@@ -0,0 +1,131 @@
+#ifndef AHFINDERDIRECT__LINEAR_MAP_HH
+#define AHFINDERDIRECT__LINEAR_MAP_HH
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename fp_t>
+		class linear_map
+		{
+		public:
+			// integer bounds info
+			int min_int() const { return min_int_; }
+			int max_int() const { return max_int_; }
+			int N_points() const
+			{
+				return jtutil::how_many_in_range(min_int_, max_int_);
+			}
+			bool is_in_range(int i) const
+			{
+				return (i >= min_int()) && (i <= max_int());
+			}
+			int clamp(int i) const
+			{
+				if (i < min_int())
+					then return min_int();
+				else if (i > max_int())
+					then return max_int();
+				else
+					return i;
+			}
+
+			// convert int --> fp
+			fp_t fp_of_int_unchecked(int i) const
+			{
+				return origin_ + delta_ * i;
+			}
+			fp_t fp_of_int(int i) const
+			{
+				assert(is_in_range(i));
+				return fp_of_int_unchecked(i);
+			}
+
+			// converg delta_int --> delta_fp
+			fp_t delta_fp_of_delta_int(int delta_i) const
+			{
+				return delta_ * delta_i;
+			}
+
+			// fp bounds info
+			fp_t origin() const { return origin_; }
+			fp_t delta_fp() const { return delta_; }
+			fp_t inverse_delta_fp() const { return inverse_delta_; }
+			fp_t min_fp() const { return fp_of_int_unchecked(min_int_); }
+			fp_t max_fp() const { return fp_of_int_unchecked(max_int_); }
+			bool is_in_range(fp_t x) const
+			{
+				return fuzzy<fp_t>::GE(x, min_fp()) && fuzzy<fp_t>::LE(x, max_fp());
+			}
+			fp_t clamp(fp_t x) const
+			{
+				if (x < min_fp())
+					then return min_fp();
+				else if (x > max_fp())
+					then return max_fp();
+				else
+					return x;
+			}
+
+			// convert linear map indices <--> C-style 0-origin indices
+			int zero_origin_int(int i) const { return i - min_int(); }
+			int map_int(int zero_origin_i) { return zero_origin_i + min_int(); }
+
+			// convert fp --> int coordinate, but return result as fp
+			// (which need not be fuzzily integral)
+			fp_t fp_int_of_fp(fp_t x) const;
+
+			// convert fp --> int, check being fuzzily integral
+			enum noninteger_action // what to do if "int"
+								   // isn't fuzzily integral?
+			{
+				nia_error,	 // jtutil::error_exit(...)
+				nia_warning, // print warning msg,
+							 // then round to nearest
+				nia_round,	 // (silently) round to nearest
+				nia_floor,	 // (silently) round to -infinity
+				nia_ceiling	 // (silently) round to +infinity
+			};
+			int int_of_fp(fp_t x, noninteger_action nia = nia_error) const;
+
+			// convert delta_fp --> delta_int, check being fuzzily integral
+			int delta_int_of_delta_fp(fp_t delta_x,
+									  noninteger_action nia = nia_error)
+				const;
+
+			// constructors
+			linear_map(int min_int_in, int max_int_in,
+					   fp_t min_fp_in, fp_t delta_fp_in, fp_t max_fp_in);
+			// ... construct with subrange of existing linear_map
+			linear_map(const linear_map<fp_t> &lm_in,
+					   int min_int_in, int max_int_in);
+
+			// no need for explicit destructor, compiler-generated no-op is ok
+
+			// no need for copy constructor or assignment operator,
+			// compiler-generated defaults are ok
+
+		private:
+			// common code (argument validation & setup) for all constructors
+			// assumes min_int_, max_int_, delta_ already initialized,
+			//         other class members *not* initialized
+			void constructor_common(fp_t min_fp_in, fp_t max_fp_in);
+
+			// these define the actual mapping
+			// via the  fp_of_int()  function (above)
+			fp_t origin_, delta_;
+
+			// cache of 1.0/delta_
+			// ==> avoids fp divide in inverse_delta_fp()
+			// ==> also makes fp --> int conversions slightly faster
+			fp_t inverse_delta_;
+
+			const int min_int_, max_int_;
+		};
+
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
+
+#endif /* AHFINDERDIRECT__LINEAR_MAP_HH */
--- a/AMSS_NCKU_source/AHF_Direct/miscfp.C
+++ b/AMSS_NCKU_source/AHF_Direct/miscfp.C
@@ -0,0 +1,66 @@
+#include <math.h>
+#include <stdlib.h>
+
+#include "cctk.h"
+
+#include "stdc.h"
+#include "util.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+		double signum(double x)
+		{
+			if (x == 0.0)
+				then return 0.0;
+			else
+				return (x > 0.0) ? 1.0 : -1.0;
+		}
+		double hypot3(double x, double y, double z)
+		{
+			return sqrt(x * x + y * y + z * z);
+		}
+		double arctan_xy(double x, double y)
+		{
+			return ((x == 0.0) && (y == 0.0)) ? 0.0 : atan2(y, x);
+		}
+		double modulo_reduce(double x, double xmod, double xmin, double xmax)
+		{
+			double xx = x;
+
+			while (fuzzy<double>::LT(xx, xmin))
+			{
+				xx += xmod;
+			}
+
+			while (fuzzy<double>::GT(xx, xmax))
+			{
+				xx -= xmod;
+			}
+
+			if (!(fuzzy<double>::GE(xx, xmin) && fuzzy<double>::LE(xx, xmax)))
+				then error_exit(ERROR_EXIT,
+								"***** modulo_reduce(): no modulo value is fuzzily within specified range!\n"
+								"                       x = %g   xmod = %g\n"
+								"                       [xmin,xmax] = [%g,%g]\n"
+								"                       ==> xx = %g\n",
+								x, xmod,
+								xmin, xmax,
+								xx); /*NOTREACHED*/
+
+			return xx;
+		}
+		template <typename fp_t>
+		void zero_C_array(int N, fp_t array[])
+		{
+			for (int i = 0; i < N; ++i)
+			{
+				array[i] = 0;
+			}
+		}
+
+		template void zero_C_array<CCTK_REAL>(int, CCTK_REAL[]);
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/myglobal.h
+++ b/AMSS_NCKU_source/AHF_Direct/myglobal.h
@@ -0,0 +1,65 @@
+#ifndef MYGLOBAL_H
+#define MYGLOBAL_H
+
+#include "var.h"
+#include "MyList.h"
+
+#ifdef USE_GPU
+#include "bssn_gpu_class.h"
+#else
+#include "bssn_class.h"
+#endif
+
+#include "driver.h"
+
+namespace AHFinderDirect
+{
+
+	int globalInterpGFL(double *X, double *Y, double *Z, int Ns,
+						double *Data);
+
+	int globalInterpGFLlash(double *X, double *Y, double *Z, int Ns,
+							double *Data);
+
+	void AHFinderDirect_setup(MyList<var> *AHList, MyList<var> *GaugeList, bssn_class *ADM,
+							  int Symmetry, int HN, double *PhysTime);
+
+	void AHFinderDirect_cleanup();
+
+	void AHFinderDirect_find_horizons(int HN, int *dumpid,
+									  double *xc, double *yc, double *zc, double *xr, double *yr, double *zr,
+									  bool *trigger, double *);
+
+	void AHFinderDirect_enforcefind(int HN,
+									double *xc, double *yc, double *zc, double *xr, double *yr, double *zr);
+	//
+	struct state
+	{
+		int N_procs; // total number of processors
+		int my_proc; // processor number of this processor
+					 // (0 to N_procs-1)
+
+		int Symmetry;
+		double *PhysTime;
+
+		MyList<var> *AHList;
+		MyList<var> *GaugeList;
+
+		bssn_class *ADM;
+
+		int N_horizons; // total number of genuine horizons
+						// being searched for
+		int N_active_procs; // total number of active processors
+							// (the active processors are processor
+							//  numbers 0 to N_active_procs-1)
+
+		struct iteration_status_buffers isb;
+
+		horizon_sequence *my_hs;
+
+		struct AH_data **AH_data_array;
+
+		double *Data, *oX, *oY, *oZ;
+	};
+}
+#endif /* MYGLOBAL_H */
--- a/AMSS_NCKU_source/AHF_Direct/norm.C
+++ b/AMSS_NCKU_source/AHF_Direct/norm.C
@@ -0,0 +1,68 @@
+#include <math.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include "util.h"
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		template <typename fp_t>
+		norm<fp_t>::norm()
+			: N_(0L),
+			  sum_(0.0), sum2_(0.0),
+			  max_abs_value_(0.0), min_abs_value_(0.0),
+			  max_value_(0.0), min_value_(0.0)
+		{
+		}
+
+		template <typename fp_t>
+		void norm<fp_t>::reset()
+		{
+			N_ = 0L;
+			sum_ = 0.0;
+			sum2_ = 0.0;
+			max_abs_value_ = 0.0;
+			min_abs_value_ = 0.0;
+			max_value_ = 0.0;
+			min_value_ = 0.0;
+		}
+
+		template <typename fp_t>
+		void norm<fp_t>::data(fp_t x)
+		{
+			sum_ += x;
+			sum2_ += x * x;
+
+			const fp_t abs_x = jtutil::abs<fp_t>(x);
+			max_abs_value_ = jtutil::tmax(max_abs_value_, abs_x);
+			min_abs_value_ = (N_ == 0) ? abs_x : jtutil::tmin(min_abs_value_, abs_x);
+
+			min_value_ = (N_ == 0) ? x : jtutil::tmin(min_value_, x);
+			max_value_ = (N_ == 0) ? x : jtutil::tmax(max_value_, x);
+
+			++N_;
+		}
+
+		template <typename fp_t>
+		fp_t norm<fp_t>::mean() const { return sum_ / fp_t(N_); }
+		template <typename fp_t>
+		fp_t norm<fp_t>::two_norm() const { return sqrt(sum2_); }
+		template <typename fp_t>
+		fp_t norm<fp_t>::rms_norm() const
+		{
+			assert(is_nonempty());
+			return sqrt(sum2_ / fp_t(N_));
+		}
+
+		template class jtutil::norm<float>;
+		template class jtutil::norm<double>;
+
+		//******************************************************************************
+		//******************************************************************************
+		//******************************************************************************
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch.C
+++ b/AMSS_NCKU_source/AHF_Direct/patch.C
@@ -0,0 +1,955 @@
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+
+namespace AHFinderDirect
+{
+	using jtutil::error_exit;
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+	//
+	// This function constructs a  patch  object.
+	//
+	patch::patch(patch_system &my_patch_system_in, int patch_number_in,
+				 const char name_in[], bool is_plus_in, char ctype_in,
+				 local_coords::coords_set coords_set_rho_in,
+				 local_coords::coords_set coords_set_sigma_in,
+				 local_coords::coords_set coords_set_tau_in,
+				 const grid_arrays::grid_array_pars &grid_array_pars_in,
+				 const grid::grid_pars &grid_pars_in)
+
+		: fd_grid(grid_array_pars_in, grid_pars_in),
+
+		  my_patch_system_(my_patch_system_in),
+		  patch_number_(patch_number_in),
+		  name_(name_in),
+		  is_plus_(is_plus_in), ctype_(ctype_in),
+
+		  coords_set_rho_(coords_set_rho_in),
+		  coords_set_sigma_(coords_set_sigma_in),
+		  coords_set_tau_(coords_set_tau_in),
+
+		  min_rho_patch_edge_(*new patch_edge(*this, side_is_min, side_is_rho)),
+		  max_rho_patch_edge_(*new patch_edge(*this, side_is_max, side_is_rho)),
+		  min_sigma_patch_edge_(*new patch_edge(*this, side_is_min, side_is_sigma)),
+		  max_sigma_patch_edge_(*new patch_edge(*this, side_is_max, side_is_sigma)),
+
+		  min_rho_ghost_zone_(NULL),
+		  max_rho_ghost_zone_(NULL),
+		  min_sigma_ghost_zone_(NULL),
+		  max_sigma_ghost_zone_(NULL) // no comma
+
+	{
+	}
+
+	//******************************************************************************
+
+	//
+	// This function destroys a  patch  object.
+	//
+	patch::~patch()
+	{
+		// no need to check for null pointers, since  delete NULL  is a silent no-op
+
+		delete max_sigma_ghost_zone_;
+		delete min_sigma_ghost_zone_;
+		delete max_rho_ghost_zone_;
+		delete min_rho_ghost_zone_;
+
+		delete &max_sigma_patch_edge_;
+		delete &min_sigma_patch_edge_;
+		delete &max_rho_patch_edge_;
+		delete &min_rho_patch_edge_;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function constructs a  z_patch  object.
+	//
+	z_patch::z_patch(patch_system &my_patch_system_in, int patch_number_in,
+					 const char *name_in, bool is_plus_in,
+					 const grid_arrays::grid_array_pars &grid_array_pars_in,
+					 const grid::grid_pars &grid_pars_in)
+		: patch(my_patch_system_in, patch_number_in,
+				name_in, is_plus_in, 'z',
+				local_coords::coords_set_mu, local_coords::coords_set_nu,
+				local_coords::coords_set_phi,
+				grid_array_pars_in, grid_pars_in)
+	{
+	}
+
+	//******************************************************************************
+
+	//
+	// This function constructs an  x_patch  object.
+	//
+	x_patch::x_patch(patch_system &my_patch_system_in, int patch_number_in,
+					 const char *name_in, bool is_plus_in,
+					 const grid_arrays::grid_array_pars &grid_array_pars_in,
+					 const grid::grid_pars &grid_pars_in)
+		: patch(my_patch_system_in, patch_number_in,
+				name_in, is_plus_in, 'x',
+				local_coords::coords_set_nu, local_coords::coords_set_phi,
+				local_coords::coords_set_mu,
+				grid_array_pars_in, grid_pars_in)
+	{
+	}
+
+	//******************************************************************************
+
+	//
+	// This function constructs a  y_patch  object.
+	//
+	y_patch::y_patch(patch_system &my_patch_system_in, int patch_number_in,
+					 const char *name_in, bool is_plus_in,
+					 const grid_arrays::grid_array_pars &grid_array_pars_in,
+					 const grid::grid_pars &grid_pars_in)
+		: patch(my_patch_system_in, patch_number_in,
+				name_in, is_plus_in, 'y',
+				local_coords::coords_set_mu, local_coords::coords_set_phi,
+				local_coords::coords_set_nu,
+				grid_array_pars_in, grid_pars_in)
+	{
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+	//
+	// This function computes the (rho,sigma) induced 2-D metric from the
+	// 3-D (x,y,z) metric of the space containing the patch, as per p.33 of
+	// my apparent horizon finding notes.
+	//
+	// Arguments:
+	// (r,rho,sigma) = The coordinates where the Jacobian is wanted.
+	// partial_surface_r_wrt_(rho,sigma)
+	//	= The partial derivatives of the surface radius with respect to
+	//	  the (rho,sigma) coordinates.
+	// g_{xx,xy,xz,yy,yz,zz} = The xyz 3-metric components $g_{ij}$.
+	// g_{rho_rho,rho_sigma,sigma_sigma} = The (rho,sigma) induced 2-D metric.
+	//
+	// Results:
+	// This function returns the Jacobian of the (rho,sigma) induced 2-D metric.
+	//
+	fp patch::rho_sigma_metric(fp r, fp rho, fp sigma,
+							   fp partial_surface_r_wrt_rho,
+							   fp partial_surface_r_wrt_sigma,
+							   fp g_xx, fp g_xy, fp g_xz,
+							   fp g_yy, fp g_yz,
+							   fp g_zz,
+							   fp &g_rho_rho, fp &g_rho_sigma,
+							   fp &g_sigma_sigma)
+		const
+	{
+		fp partial_x_wrt_r, partial_x_wrt_rho, partial_x_wrt_sigma;
+		fp partial_y_wrt_r, partial_y_wrt_rho, partial_y_wrt_sigma;
+		fp partial_z_wrt_r, partial_z_wrt_rho, partial_z_wrt_sigma;
+		partial_xyz_wrt_r_rho_sigma(r, rho, sigma,
+									partial_x_wrt_r, partial_x_wrt_rho, partial_x_wrt_sigma,
+									partial_y_wrt_r, partial_y_wrt_rho, partial_y_wrt_sigma,
+									partial_z_wrt_r, partial_z_wrt_rho, partial_z_wrt_sigma);
+
+		const fp dx_wrt_rho = partial_x_wrt_rho + partial_x_wrt_r * partial_surface_r_wrt_rho;
+		const fp dx_wrt_sigma = partial_x_wrt_sigma + partial_x_wrt_r * partial_surface_r_wrt_sigma;
+		const fp dy_wrt_rho = partial_y_wrt_rho + partial_y_wrt_r * partial_surface_r_wrt_rho;
+		const fp dy_wrt_sigma = partial_y_wrt_sigma + partial_y_wrt_r * partial_surface_r_wrt_sigma;
+		const fp dz_wrt_rho = partial_z_wrt_rho + partial_z_wrt_r * partial_surface_r_wrt_rho;
+		const fp dz_wrt_sigma = partial_z_wrt_sigma + partial_z_wrt_r * partial_surface_r_wrt_sigma;
+
+		g_rho_rho = +dx_wrt_rho * dx_wrt_rho * g_xx + 2.0 * dx_wrt_rho * dy_wrt_rho * g_xy + 2.0 * dx_wrt_rho * dz_wrt_rho * g_xz + dy_wrt_rho * dy_wrt_rho * g_yy + 2.0 * dy_wrt_rho * dz_wrt_rho * g_yz + dz_wrt_rho * dz_wrt_rho * g_zz;
+		g_rho_sigma = +dx_wrt_rho * dx_wrt_sigma * g_xx + (dx_wrt_rho * dy_wrt_sigma + dy_wrt_rho * dx_wrt_sigma) * g_xy + (dx_wrt_rho * dz_wrt_sigma + dz_wrt_rho * dx_wrt_sigma) * g_xz + dy_wrt_rho * dy_wrt_sigma * g_yy + (dy_wrt_rho * dz_wrt_sigma + dz_wrt_rho * dy_wrt_sigma) * g_yz + dz_wrt_rho * dz_wrt_sigma * g_zz;
+		g_sigma_sigma = +dx_wrt_sigma * dx_wrt_sigma * g_xx + 2.0 * dx_wrt_sigma * dy_wrt_sigma * g_xy + 2.0 * dx_wrt_sigma * dz_wrt_sigma * g_xz + dy_wrt_sigma * dy_wrt_sigma * g_yy + 2.0 * dy_wrt_sigma * dz_wrt_sigma * g_yz + dz_wrt_sigma * dz_wrt_sigma * g_zz;
+
+		return g_rho_rho * g_sigma_sigma - jtutil::pow2(g_rho_sigma);
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+	//
+	// This function decodes the character-string name of an integration method
+	// into an  enum integration_method .  See the comments in "patch.hh" on the
+	// declaration of  enum integration_method  for details on the methods and
+	// their character-string names.
+	//
+	// static
+	enum patch::integration_method
+	patch::decode_integration_method(const char method_string[])
+	{
+		if ((strcmp(method_string, "trapezoid") == 0) || (strcmp(method_string, "trapezoid rule") == 0))
+			return integration_method__trapezoid;
+		else if ((strcmp(method_string, "Simpson") == 0) || (strcmp(method_string, "Simpson's rule") == 0))
+			return integration_method__Simpson;
+		else if ((strcmp(method_string, "Simpson (variant)") == 0) || (strcmp(method_string, "Simpson's rule (variant)") == 0))
+			return integration_method__Simpson_variant;
+		else if (strcmp(method_string, "automatic choice") == 0)
+			return integration_method__automatic_choice;
+		else
+			error_exit(ERROR_EXIT,
+					   "***** patch::decode_integration_method():\n"
+					   "        unknown method_string=\"%s\"!\n",
+					   method_string); /*NOTREACHED*/
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes an approximation to the arc length of a surface
+	// over the patch's nominal bounds along the rho direction (i.e. in a
+	// dsigma=constant plane where dsigma is a multiple of 90 degrees)
+	//
+	// Arguments:
+	// ghosted_radius_gfn = (in) The surface radius.
+	// g_{xx,xy,xz,yy,yz,zz}_gfn = (in) The xyz 3-metric components.
+	// method = (in) Selects the integration scheme.
+	//
+	fp patch::rho_arc_length(int ghosted_radius_gfn,
+							 int g_xx_gfn, int g_xy_gfn, int g_xz_gfn,
+							 int g_yy_gfn, int g_yz_gfn,
+							 int g_zz_gfn,
+							 enum integration_method method)
+		const
+	{
+		fp dsigma;
+		if (is_valid_dsigma(0.0))
+			then dsigma = 0.0;
+		else if (is_valid_dsigma(90.0))
+			then dsigma = 90.0;
+		else if (is_valid_dsigma(180.0))
+			then dsigma = 180.0;
+		else if (is_valid_dsigma(-90.0))
+			then dsigma = -90.0;
+		else
+			error_exit(PANIC_EXIT,
+					   "***** patch::rho_arc_length(): can't find valid dsigma\n"
+					   "                               which is a multiple of 90 degrees!\n"
+					   "                               %s patch: [min,max]_dsigma()=[%g,%g]\n",
+					   name(), min_dsigma(), max_dsigma());
+		const fp sigma = sigma_of_dsigma(dsigma);
+		const int isigma = isigma_of_sigma(sigma);
+
+		fp sum = 0.0;
+
+		for (int irho = min_irho(); irho <= max_irho(); ++irho)
+		{
+			const fp rho = rho_of_irho(irho);
+			const fp r = ghosted_gridfn(ghosted_radius_gfn, irho, isigma);
+			const fp partial_surface_r_wrt_rho = partial_rho(ghosted_radius_gfn, irho, isigma);
+			const fp partial_surface_r_wrt_sigma = partial_sigma(ghosted_radius_gfn, irho, isigma);
+
+			const fp g_xx = gridfn(g_xx_gfn, irho, isigma);
+			const fp g_xy = gridfn(g_xy_gfn, irho, isigma);
+			const fp g_xz = gridfn(g_xz_gfn, irho, isigma);
+			const fp g_yy = gridfn(g_yy_gfn, irho, isigma);
+			const fp g_yz = gridfn(g_yz_gfn, irho, isigma);
+			const fp g_zz = gridfn(g_zz_gfn, irho, isigma);
+
+			fp g_rho_rho, g_rho_sigma, g_sigma_sigma;
+			rho_sigma_metric(r, rho, sigma,
+							 partial_surface_r_wrt_rho,
+							 partial_surface_r_wrt_sigma,
+							 g_xx, g_xy, g_xz,
+							 g_yy, g_yz,
+							 g_zz,
+							 g_rho_rho, g_rho_sigma,
+							 g_sigma_sigma);
+
+			const fp coeff = integration_coeff(method,
+											   max_irho() - min_irho(),
+											   irho - min_irho());
+
+			sum += coeff * sqrt(g_rho_rho);
+		}
+
+		return delta_rho() * sum;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes an approximation to the arc length of a surface
+	// over the patch's nominal bounds along the sigma direction (i.e. in a
+	// drho=constant plane where drho is a multiple of 90 degrees)
+	//
+	// Arguments:
+	// ghosted_radius_gfn = (in) The surface radius.
+	// g_{xx,xy,xz,yy,yz,zz}_gfn = (in) The xyz 3-metric components.
+	// method = (in) Selects the integration scheme.
+	//
+	fp patch::sigma_arc_length(int ghosted_radius_gfn,
+							   int g_xx_gfn, int g_xy_gfn, int g_xz_gfn,
+							   int g_yy_gfn, int g_yz_gfn,
+							   int g_zz_gfn,
+							   enum integration_method method)
+		const
+	{
+		fp drho;
+		if (is_valid_drho(0.0))
+			then drho = 0.0;
+		else if (is_valid_drho(90.0))
+			then drho = 90.0;
+		else if (is_valid_drho(180.0))
+			then drho = 180.0;
+		else if (is_valid_drho(-90.0))
+			then drho = -90.0;
+		else
+			error_exit(PANIC_EXIT,
+					   "***** patch::sigma_arc_length(): can't find valid drho\n"
+					   "                                 which is a multiple of 90 degrees!\n"
+					   "                                 %s patch: [min,max]_drho()=[%g,%g]\n",
+					   name(), min_drho(), max_drho());
+		const fp rho = rho_of_drho(drho);
+		const int irho = irho_of_rho(rho);
+
+		fp sum = 0.0;
+
+		for (int isigma = min_isigma(); isigma <= max_isigma(); ++isigma)
+		{
+			const fp sigma = sigma_of_isigma(isigma);
+			const fp r = ghosted_gridfn(ghosted_radius_gfn, irho, isigma);
+			const fp partial_surface_r_wrt_rho = partial_rho(ghosted_radius_gfn, irho, isigma);
+			const fp partial_surface_r_wrt_sigma = partial_sigma(ghosted_radius_gfn, irho, isigma);
+
+			const fp g_xx = gridfn(g_xx_gfn, irho, isigma);
+			const fp g_xy = gridfn(g_xy_gfn, irho, isigma);
+			const fp g_xz = gridfn(g_xz_gfn, irho, isigma);
+			const fp g_yy = gridfn(g_yy_gfn, irho, isigma);
+			const fp g_yz = gridfn(g_yz_gfn, irho, isigma);
+			const fp g_zz = gridfn(g_zz_gfn, irho, isigma);
+
+			fp g_rho_rho, g_rho_sigma, g_sigma_sigma;
+			rho_sigma_metric(r, rho, sigma,
+							 partial_surface_r_wrt_rho,
+							 partial_surface_r_wrt_sigma,
+							 g_xx, g_xy, g_xz,
+							 g_yy, g_yz,
+							 g_zz,
+							 g_rho_rho, g_rho_sigma,
+							 g_sigma_sigma);
+
+			const fp coeff = integration_coeff(method,
+											   max_isigma() - min_isigma(),
+											   isigma - min_isigma());
+
+			sum += coeff * sqrt(g_sigma_sigma);
+		}
+
+		return delta_sigma() * sum;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes the arc length of a surface in the specified
+	// plane ("xz" or "yz") over the patch's nominal bounds.
+	//
+	// Arguments:
+	// plane[] = (in) "xz" or "yz" to specify the plane.
+	// ghosted_radius_gfn = (in) The surface radius.
+	// g_{xx,xy,xz,yy,yz,zz}_gfn = (in) The xyz 3-metric components.
+	// method = (in) Selects the integration scheme.
+	//
+	fp z_patch::plane_arc_length(const char plane[],
+								 int ghosted_radius_gfn,
+								 int g_xx_gfn, int g_xy_gfn, int g_xz_gfn,
+								 int g_yy_gfn, int g_yz_gfn,
+								 int g_zz_gfn,
+								 enum integration_method method)
+		const
+	{
+		if ((plane[0] == 'x') && (plane[1] == 'z'))
+			then // xz-plane = rotation about y = nu arc = sigma sigma
+				return sigma_arc_length(ghosted_radius_gfn,
+										g_xx_gfn, g_xy_gfn, g_xz_gfn,
+										g_yy_gfn, g_yz_gfn,
+										g_zz_gfn,
+										method);
+		else if ((plane[0] == 'y') && (plane[1] == 'z'))
+			then // yz-plane = rotation about x = mu arc = rho arc
+				return rho_arc_length(ghosted_radius_gfn,
+									  g_xx_gfn, g_xy_gfn, g_xz_gfn,
+									  g_yy_gfn, g_yz_gfn,
+									  g_zz_gfn,
+									  method);
+		else
+			error_exit(ERROR_EXIT,
+					   "***** z_patch::plane_arc_length(): %s patch, plane=\"%s\", but\n"
+					   "                                   this patch doesn't contain that plane!\n",
+					   name(), plane); /*NOTREACHED*/
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes the arc length of a surface in the specified
+	// plane ("xy" or "xz") over the patch's nominal bounds.
+	//
+	// Arguments:
+	// plane[] = (in) "xy" or "xz" to specify the plane.
+	// ghosted_radius_gfn = (in) The surface radius.
+	// g_{xx,xy,xz,yy,yz,zz}_gfn = (in) The xyz 3-metric components.
+	// method = (in) Selects the integration scheme.
+	//
+	fp x_patch::plane_arc_length(const char plane[],
+								 int ghosted_radius_gfn,
+								 int g_xx_gfn, int g_xy_gfn, int g_xz_gfn,
+								 int g_yy_gfn, int g_yz_gfn,
+								 int g_zz_gfn,
+								 enum integration_method method)
+		const
+	{
+		if ((plane[0] == 'x') && (plane[1] == 'y'))
+			then // xy-plane = rotation about z = phi arc = sigma arc
+				return sigma_arc_length(ghosted_radius_gfn,
+										g_xx_gfn, g_xy_gfn, g_xz_gfn,
+										g_yy_gfn, g_yz_gfn,
+										g_zz_gfn,
+										method);
+		else if ((plane[0] == 'x') && (plane[1] == 'z'))
+			then // xz-plane = rotation about y = nu arc = rho arc
+				return rho_arc_length(ghosted_radius_gfn,
+									  g_xx_gfn, g_xy_gfn, g_xz_gfn,
+									  g_yy_gfn, g_yz_gfn,
+									  g_zz_gfn,
+									  method);
+		else
+			error_exit(ERROR_EXIT,
+					   "***** x_patch::plane_arc_length(): %s patch, plane=\"%s\", but\n"
+					   "                                   this patch doesn't contain that plane!\n",
+					   name(), plane); /*NOTREACHED*/
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes the arc length of a surface in the specified
+	// plane ("xy" or "yz") over the patch's nominal bounds.
+	//
+	// Arguments:
+	// plane[] = (in) "xy" or "yz" to specify the plane.
+	// ghosted_radius_gfn = (in) The surface radius.
+	// g_{xx,xy,xz,yy,yz,zz}_gfn = (in) The xyz 3-metric components.
+	// method = (in) Selects the integration scheme.
+	//
+	fp y_patch::plane_arc_length(const char plane[],
+								 int ghosted_radius_gfn,
+								 int g_xx_gfn, int g_xy_gfn, int g_xz_gfn,
+								 int g_yy_gfn, int g_yz_gfn,
+								 int g_zz_gfn,
+								 enum integration_method method)
+		const
+	{
+		if ((plane[0] == 'x') && (plane[1] == 'y'))
+			then // xy-plane = rotation about z = phi arc = sigma arc
+				return sigma_arc_length(ghosted_radius_gfn,
+										g_xx_gfn, g_xy_gfn, g_xz_gfn,
+										g_yy_gfn, g_yz_gfn,
+										g_zz_gfn,
+										method);
+		else if ((plane[0] == 'y') && (plane[1] == 'z'))
+			then // yz-plane = rotation about x = mu arc = rho arc
+				return rho_arc_length(ghosted_radius_gfn,
+									  g_xx_gfn, g_xy_gfn, g_xz_gfn,
+									  g_yy_gfn, g_yz_gfn,
+									  g_zz_gfn,
+									  method);
+		else
+			error_exit(ERROR_EXIT,
+					   "***** y_patch::plane_arc_length(): %s patch, plane=\"%s\", but\n"
+					   "                                   this patch doesn't contain that plane!\n",
+					   name(), plane); /*NOTREACHED*/
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes an approximation to the (surface) integral of
+	// a gridfn over the patch's nominal area,
+	//	$\int f(\rho,\sigma) \, dA$
+	//		= \int f(\rho,\sigma) \sqrt{|J|} \, d\rho \, d\sigma$
+	// where $J$ is the Jacobian of $(x,y,z)$ with respect to $(rho,sigma).
+	//
+	// Arguments:
+	// unknown_src_gfn = (in) The gridfn to be integrated.  This may be
+	//			  either nominal-grid or ghosted-grid; n.b. in
+	//			  the latter case the integral is still done only
+	//			  over the patch's nominal area.
+	// ghosted_radius_gfn = (in) The surface radius.
+	// g_{xx,xy,xz,yy,yz,zz}_gfn = (in) The xyz 3-metric components.
+	// method = (in) Selects the integration scheme.
+	//
+	fp patch::integrate_gridfn(int unknown_src_gfn,
+							   int ghosted_radius_gfn,
+							   int g_xx_gfn, int g_xy_gfn, int g_xz_gfn,
+							   int g_yy_gfn, int g_yz_gfn,
+							   int g_zz_gfn,
+							   enum integration_method method)
+		const
+	{
+		const bool src_is_ghosted = is_valid_ghosted_gfn(unknown_src_gfn);
+
+		fp sum = 0.0;
+		for (int irho = min_irho(); irho <= max_irho(); ++irho)
+		{
+			for (int isigma = min_isigma(); isigma <= max_isigma(); ++isigma)
+			{
+				const fp fn = unknown_gridfn(src_is_ghosted,
+											 unknown_src_gfn, irho, isigma);
+
+				const fp rho = rho_of_irho(irho);
+				const fp sigma = sigma_of_isigma(isigma);
+				const fp r = ghosted_gridfn(ghosted_radius_gfn, irho, isigma);
+				const fp partial_surface_r_wrt_rho = partial_rho(ghosted_radius_gfn, irho, isigma);
+				const fp partial_surface_r_wrt_sigma = partial_sigma(ghosted_radius_gfn, irho, isigma);
+
+				const fp g_xx = gridfn(g_xx_gfn, irho, isigma);
+				const fp g_xy = gridfn(g_xy_gfn, irho, isigma);
+				const fp g_xz = gridfn(g_xz_gfn, irho, isigma);
+				const fp g_yy = gridfn(g_yy_gfn, irho, isigma);
+				const fp g_yz = gridfn(g_yz_gfn, irho, isigma);
+				const fp g_zz = gridfn(g_zz_gfn, irho, isigma);
+
+				fp g_rho_rho, g_rho_sigma, g_sigma_sigma;
+				const fp Jac = rho_sigma_metric(r, rho, sigma,
+												partial_surface_r_wrt_rho,
+												partial_surface_r_wrt_sigma,
+												g_xx, g_xy, g_xz,
+												g_yy, g_yz,
+												g_zz,
+												g_rho_rho, g_rho_sigma,
+												g_sigma_sigma);
+
+				const fp coeff_rho = integration_coeff(method,
+													   max_irho() - min_irho(),
+													   irho - min_irho());
+				const fp coeff_sigma = integration_coeff(method,
+														 max_isigma() - min_isigma(),
+														 isigma - min_isigma());
+
+				sum += coeff_rho * coeff_sigma * fn * sqrt(jtutil::abs(Jac));
+			}
+		}
+
+		return delta_rho() * delta_sigma() * sum;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function computes the integration coefficients for
+	//  integrate_gridfn() .  That is, if we write
+	//	$\int_{x_0}^{x_N} f(x) \, dx
+	//		\approx \Delta x \, \sum_{i=0}^N c_i f(x_i)$
+	// then this function computes $c_i$.
+	//
+	// For method == integration_method__automatic_choice the choices are
+	//	N=1		trapezoid
+	//	N=2		Simpson
+	//	N=3		trapezoid
+	//	N=4		Simpson
+	//	N=5		trapezoid
+	//	N=6		Simpson
+	//	N=7 and up	Simpson variant
+	//
+	// Arguments:
+	// method = Specifies the integration method.
+	// N = The number of integration *intervals*.  (The number of integration
+	//     *points* is N+1.)
+	// i = Specifies the point at which the coefficient is desired.
+	//
+	// static
+	fp patch::integration_coeff(enum integration_method method, int N, int i)
+	{
+		assert(i >= 0);
+		assert(i <= N);
+
+		if (method == integration_method__automatic_choice)
+			then
+			{
+				if (N >= 7)
+					then method = integration_method__Simpson_variant;
+				else if ((N % 2) == 0)
+					then method = integration_method__Simpson;
+				else
+					method = integration_method__trapezoid;
+			}
+
+		switch (method)
+		{
+		case integration_method__trapezoid:
+			if ((i == 0) || (i == N))
+				then return 0.5;
+			else
+				return 1.0;
+
+		case integration_method__Simpson:
+			if ((N % 2) != 0)
+				then error_exit(ERROR_EXIT,
+								"***** patch::integration_coeff():\n"
+								"        Simpson's rule requires N to be even, but N=%d!\n",
+								N); /*NOTREACHED*/
+			if ((i == 0) || (i == N))
+				then return 1.0 / 3.0;
+			else if ((i % 2) == 0)
+				then return 2.0 / 3.0;
+			else
+				return 4.0 / 3.0;
+
+		case integration_method__Simpson_variant:
+			if (N < 7)
+				then error_exit(ERROR_EXIT,
+								"***** patch::integration_coeff():\n"
+								"        Simpson's rule (variant) requires N >= 7, but N=%d!\n",
+								N); /*NOTREACHED*/
+			if ((i == 0) || (i == N))
+				then return 17.0 / 48.0;
+			else if ((i == 1) || (i == N - 1))
+				then return 59.0 / 48.0;
+			else if ((i == 2) || (i == N - 2))
+				then return 43.0 / 48.0;
+			else if ((i == 3) || (i == N - 3))
+				then return 49.0 / 48.0;
+			else
+				return 1.0;
+
+		default:
+			error_exit(ERROR_EXIT,
+					   "***** patch::integration_coeff(): unknown method=(int)%d!\n"
+					   "                                  (this should never happen!)\n",
+					   int(method)); /*NOTREACHED*/
+		}
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+	//
+	// This function returns a reference to the ghost zone on a specified
+	// edge, after first assert()ing that the edge belongs to this patch.
+	//
+	// N.b. This function can't be inline in "patch.hh" because it needs
+	//	member functions of class patch_edge, which comes after class patch
+	//	in our #include order.
+	//
+	ghost_zone &patch::ghost_zone_on_edge(const patch_edge &e)
+		const
+	{
+		assert(e.my_patch() == *this);
+		return minmax_ang_ghost_zone(e.is_min(), e.is_rho());
+	}
+
+	//******************************************************************************
+
+	//
+	// This function determines which of the two adjacent ghost zones meeting
+	// at a specified corner, contains a specified point.  If the point isn't
+	// in either ghost zone, an error_exit() is done.  If the point is in both
+	// ghost zones, it's arbitrary which one will be chosen.
+	//
+	// Arguments:
+	// {rho,sigma}_is_min = Specify the corner (and implicitly the ghost zones).
+	// irho,isigma = Specify the point.
+	//
+	// Results:
+	// This function returns (a reference to) the desired ghost zone.
+	ghost_zone &patch::corner_ghost_zone_containing_point(bool rho_is_min, bool sigma_is_min,
+														  int irho, int isigma)
+		const
+	{
+		ghost_zone &rho_gz = minmax_rho_ghost_zone(rho_is_min);
+		ghost_zone &sigma_gz = minmax_sigma_ghost_zone(sigma_is_min);
+
+		const patch_edge &rho_edge = rho_gz.my_edge();
+		const patch_edge &sigma_edge = sigma_gz.my_edge();
+
+		const int rho_iperp = rho_edge.iperp_of_irho_isigma(irho, isigma);
+		const int rho_ipar = rho_edge.ipar_of_irho_isigma(irho, isigma);
+		const int sigma_iperp = sigma_edge.iperp_of_irho_isigma(irho, isigma);
+		const int sigma_ipar = sigma_edge.ipar_of_irho_isigma(irho, isigma);
+
+		const bool is_in_rho_ghost_zone = rho_gz.is_in_ghost_zone(rho_iperp, rho_ipar);
+		const bool is_in_sigma_ghost_zone = sigma_gz.is_in_ghost_zone(sigma_iperp, sigma_ipar);
+
+		// check that point is in at least one ghost zone
+		if (!is_in_rho_ghost_zone && !is_in_sigma_ghost_zone)
+			then error_exit(ERROR_EXIT,
+							"***** patch::corner_ghost_zone_containing_point():\n"
+							"        neither ghost zone contains point (this should never happen)!\n"
+							"        patch=%s rho_is_min=(int)%d sigma_is_min=(int)%d\n"
+							"        irho=%d isigma=%d\n",
+							name(), int(rho_is_min), int(sigma_is_min),
+							irho, isigma); /*NOTREACHED*/
+
+		return is_in_rho_ghost_zone ? rho_gz : sigma_gz;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function determines which ghost zone contains a specified
+	// noncorner point.
+	//
+	// If the point isn't in any ghost zone of this patch, or if the point
+	// is in the corner of a ghost zone, an error_exit() is done.
+	//
+	// Arguments:
+	// irho,isigma = Specify the point.
+	//
+	// Results:
+	// This function returns (a reference to) the desired ghost zone.
+	ghost_zone &patch::ghost_zone_containing_noncorner_point(int irho, int isigma)
+		const
+	{
+		// n.b. these loops must use _int_ variables for the loop
+		//      to terminate!
+		for (int want_min = false; want_min <= true; ++want_min)
+		{
+			for (int want_rho = false; want_rho <= true; ++want_rho)
+			{
+				const patch_edge &e = minmax_ang_patch_edge(want_min, want_rho);
+				const int iperp = e.iperp_of_irho_isigma(irho, isigma);
+				const int ipar = e.ipar_of_irho_isigma(irho, isigma);
+
+				ghost_zone &gz = minmax_ang_ghost_zone(want_min, want_rho);
+				if (gz.is_in_ghost_zone(iperp, ipar) && gz.my_edge().ipar_is_in_noncorner(ipar))
+					then return gz;
+			}
+		}
+
+		error_exit(ERROR_EXIT,
+				   "***** patch::ghost_zone_containing_noncorner_point():\n"
+				   "        no ghost zone contains point (this should never happen)!\n"
+				   "        patch=%s irho=%d isigma=%d\n",
+				   name(), irho, isigma); /*NOTREACHED*/
+	}
+
+	//******************************************************************************
+
+	//
+	// This function assert()s that a specified ghost zone of this patch
+	// hasn't already been set up, then constructs it as a mirror-symmetry
+	// ghost zone and properly links this to/from the patch.
+	//
+	void patch::create_mirror_symmetry_ghost_zone(const patch_edge &my_edge)
+	{
+		// make sure we belong to the right patch
+		assert(my_edge.my_patch() == *this);
+
+		symmetry_ghost_zone *temp = new symmetry_ghost_zone(my_edge);
+		set_ghost_zone(my_edge, temp);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function assert()s that a specified ghost zone of this patch
+	// hasn't already been set up, then creates it as a periodic-symmetry
+	// ghost zone and properly links this to/from the patch.
+	//
+	void patch::create_periodic_symmetry_ghost_zone(const patch_edge &my_edge, const patch_edge &other_edge,
+													bool is_ipar_map_plus)
+	{
+		// make sure we belong to the right patch
+		assert(my_edge.my_patch() == *this);
+
+		int my_sample_ipar = my_edge.min_ipar_without_corners();
+		int other_sample_ipar = is_ipar_map_plus
+									? other_edge.min_ipar_without_corners()
+									: other_edge.max_ipar_without_corners();
+
+		symmetry_ghost_zone *temp = new symmetry_ghost_zone(my_edge, other_edge,
+															my_sample_ipar, other_sample_ipar,
+															is_ipar_map_plus);
+		set_ghost_zone(my_edge, temp);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function assert()s that a specified ghost zone of this patch
+	// hasn't already been set up, then creates it as an interpatch ghost
+	// zone (with lots of NULL pointers for info we can't compute yet)
+	// and properly links this to/from the patch.
+	//
+	void patch::create_interpatch_ghost_zone(const patch_edge &my_edge, const patch_edge &other_edge,
+											 int patch_overlap_width)
+	{
+		// make sure we belong to the right patch
+		assert(my_edge.my_patch() == *this);
+
+		interpatch_ghost_zone *temp = new interpatch_ghost_zone(my_edge, other_edge,
+																patch_overlap_width);
+		set_ghost_zone(my_edge, temp);
+	}
+
+	//******************************************************************************
+
+	//
+	// This is a helper function for  setup_*_ghost_zone().  This function
+	// assert()s that one of the ghost zone pointers (which one is selected
+	// by  edge ) is NULL, then stores a value in it.
+	//
+	void patch::set_ghost_zone(const patch_edge &edge, ghost_zone *gzp)
+	{
+		ghost_zone *&ghost_zone_ptr_to_set = edge.is_min()
+												 ? (edge.is_rho() ? min_rho_ghost_zone_ : min_sigma_ghost_zone_)
+												 : (edge.is_rho() ? max_rho_ghost_zone_ : max_sigma_ghost_zone_);
+
+		assert(ghost_zone_ptr_to_set == NULL);
+		ghost_zone_ptr_to_set = gzp;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function finds which patch edge is adjacent to a neighboring
+	// patch q, or does an error_exit() if q isn't actually a neighboring patch.
+	// The computation is done using only (rho,sigma) coordinate sets and
+	// min/max dang bounds ==> it's ok to use this function in setting up
+	// interpatch ghost zones.
+	//
+	// Arguments:
+	// q = The (supposedly) neighboring patch.
+	// patch_overlap_width = The number of grid points these patches overlap.
+	//		      If this is nonzero, then these patches must have the
+	//		      same grid spacing in the perpendicular direction.
+	//
+	const patch_edge &patch::edge_adjacent_to_patch(const patch &q,
+													int patch_overlap_width /* = 0 */)
+		const
+	{
+		const patch &p = *this;
+
+		// which (rho,sigma) coordinate do the patches have in common?
+		// ... this is the perp coordinate for the border
+		const local_coords::coords_set common_coord_set = p.coords_set_rho_sigma() & q.coords_set_rho_sigma();
+
+		// is this coordinate rho or sigma in each patch?
+		const bool common_is_p_rho = (common_coord_set == p.coords_set_rho());
+		const bool common_is_p_sigma = (common_coord_set == p.coords_set_sigma());
+		if ((common_is_p_rho ^ common_is_p_sigma) != 0x1)
+			then error_exit(ERROR_EXIT,
+							"***** patch::edge_adjacent_to_patch():\n"
+							"        common coordinate isn't exactly one of p.{rho,sigma}!\n"
+							"        p.name()=\"%s\" q.name()=\"%s\"\n"
+							"        common_coord_set=%s\n"
+							"        common_is_p_rho=%d common_is_p_sigma=%d\n",
+							p.name(), q.name(),
+							local_coords::name_of_coords_set(common_coord_set),
+							int(common_is_p_rho), int(common_is_p_sigma));
+		/*NOTREACHED*/
+		const bool common_is_q_rho = (common_coord_set == q.coords_set_rho());
+		const bool common_is_q_sigma = (common_coord_set == q.coords_set_sigma());
+		if ((common_is_q_rho ^ common_is_q_sigma) != 0x1)
+			then error_exit(ERROR_EXIT,
+							"***** patch::edge_adjacent_to_patch():\n"
+							"        common coordinate isn't exactly one of q.{rho,sigma}!\n"
+							"        p.name()=\"%s\" q.name()=\"%s\"\n"
+							"        common_coord_set=%s\n"
+							"        common_is_q_rho=%d common_is_q_sigma=%d\n",
+							p.name(), q.name(),
+							local_coords::name_of_coords_set(common_coord_set),
+							int(common_is_q_rho), int(common_is_q_sigma));
+		/*NOTREACHED*/
+
+		// how much do the patches overlap?
+		// ... eg patch_overlap_width = 3 would be
+		//	p   p   p   p   p
+		//		q   q   q   q   q
+		//     so the overlap would be (patch_overlap_width-1) * delta = 2 * delta
+		if ((patch_overlap_width - 1 != 0) && jtutil::fuzzy<fp>::NE(p.delta_dang(common_is_p_rho),
+																	q.delta_dang(common_is_q_rho)))
+			then error_exit(ERROR_EXIT,
+							"***** patch::edge_adjacent_to_patch():\n"
+							"        patch_overlap_width != 0 must have same perp grid spacing in both patches!\n"
+							"        p.name()=\"%s\" q.name()=\"%s\"\n"
+							"        common_coord_set=%s\n"
+							"        common_is_p_rho=%d common_is_q_rho=%d\n"
+							"        p.delta_dang(common_is_p_rho)=%g\n"
+							"        q.delta_dang(common_is_q_rho)=%g\n",
+							p.name(), q.name(),
+							local_coords::name_of_coords_set(common_coord_set),
+							int(common_is_p_rho), int(common_is_q_rho),
+							double(p.delta_dang(common_is_p_rho)),
+							double(q.delta_dang(common_is_q_rho))); /*NOTREACHED*/
+
+		const fp doverlap = fp(patch_overlap_width - 1) * p.delta_dang(common_is_p_rho);
+
+		// where is the common boundary relative to the min/max sides of each patch?
+		const bool common_is_p_min_q_max = local_coords::fuzzy_EQ_dang(p.min_dang(common_is_p_rho),
+																	   q.max_dang(common_is_q_rho) - doverlap);
+		const bool common_is_p_max_q_min = local_coords::fuzzy_EQ_dang(p.max_dang(common_is_p_rho),
+																	   q.min_dang(common_is_q_rho) + doverlap);
+		if ((common_is_p_min_q_max ^ common_is_p_max_q_min) != 0x1)
+			then error_exit(ERROR_EXIT,
+							"***** patch::edge_adjacent_to_patch():\n"
+							"        common coordinate isn't exactly one of {pmax/qmin, pmin/qmax}!\n"
+							"        p.name()=\"%s\" q.name()=\"%s\"\n"
+							"        common_coord_set=%s\n"
+							"        common_is_p_rho=%d common_is_q_rho=%d\n"
+							"        p.delta_dang(common_is_p_rho)=%g\n"
+							"        q.delta_dang(common_is_q_rho)=%g\n"
+							"        patch_overlap_width=%d doverlap=%g\n"
+							"        common_is_p_min_q_max=%d common_is_p_max_q_min=%d\n",
+							p.name(), q.name(),
+							local_coords::name_of_coords_set(common_coord_set),
+							int(common_is_p_rho), int(common_is_q_rho),
+							double(p.delta_dang(common_is_p_rho)),
+							double(q.delta_dang(common_is_q_rho)),
+							patch_overlap_width, double(doverlap),
+							int(common_is_p_min_q_max), int(common_is_p_max_q_min));
+		/*NOTREACHED*/
+
+		return p.minmax_ang_patch_edge(common_is_p_min_q_max, common_is_p_rho);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function verifies (via assert()) that all ghost zones of this
+	// patch have been fully set up.
+	//
+	void patch::assert_all_ghost_zones_fully_setup() const
+	{
+		assert(min_rho_ghost_zone_ != NULL);
+		assert(max_rho_ghost_zone_ != NULL);
+		assert(min_sigma_ghost_zone_ != NULL);
+		assert(max_sigma_ghost_zone_ != NULL);
+
+		// these calls are no-ops for non-interpatch ghost zones
+		min_rho_ghost_zone().assert_fully_setup();
+		max_rho_ghost_zone().assert_fully_setup();
+		min_sigma_ghost_zone().assert_fully_setup();
+		max_sigma_ghost_zone().assert_fully_setup();
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch.h
--- a/AMSS_NCKU_source/AHF_Direct/patch_edge.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_edge.h
@@ -0,0 +1,320 @@
+#ifndef TPATCH_EDGE_H
+#define TPATCH_EDGE_H
+namespace AHFinderDirect
+{
+
+	//*****************************************************************************
+
+	//
+	// patch_edge -- perpendicular/parallel geometry of one side of a patch
+	//
+	// A  patch_edge  object is a very light-weight object which represents
+	// the basic geometry of a min/max rho/sigma side of a patch, i.e. it
+	// provides which-side-am-I predicates, coordinate conversions between
+	// (perp,par) and (rho,sigma), etc.  Every patch has (points to) 4  patch_edge
+	//  objects, one for each of the patch's sides.  See the comments in
+	// "patch.hh" for a "big picture" discussion of patches, patch edges,
+	// ghost zones, and patch interpolation regions.
+	//
+	// Note that since  patch_edge  has only  const  member functions
+	// (and members!), a  patch_edge  object is effectively always  const .
+	// This means there's no harm in always declaring  patch_edge  objects
+	// to be  const .
+	//
+
+	class patch_edge
+	{
+	public:
+		//
+		// ***** meta-info *****
+		//
+
+		// meta-info about patch
+		patch &my_patch() const { return my_patch_; }
+
+		// meta-info about edge
+		bool is_rho() const { return is_rho_; }
+		bool is_min() const { return is_min_; }
+		bool perp_is_rho() const { return is_rho(); }
+		bool par_is_rho() const { return !is_rho(); }
+
+		// human-readable {min,max}_{rho,sigma} name (for debugging etc)
+		const char *name() const
+		{
+			return is_min()
+					   ? (is_rho() ? "min_rho" : "min_sigma")
+					   : (is_rho() ? "max_rho" : "max_sigma");
+		}
+
+		// are two edges really the same edge?
+		bool operator==(const patch_edge &other_edge) const
+		{
+			return (my_patch() == other_edge.my_patch()) && (is_rho() == other_edge.is_rho()) && (is_min() == other_edge.is_min());
+		}
+		bool operator!=(const patch_edge &other_edge) const
+		{
+			return !operator==(other_edge);
+		}
+
+		//
+		// ***** adjacent edges *****
+		//
+
+		// get adjacent edges to our min/max par corners
+		const patch_edge &min_par_adjacent_edge() const
+		{
+			return my_patch()
+				.minmax_ang_patch_edge(grid::side_is_min, par_is_rho());
+		}
+		const patch_edge &max_par_adjacent_edge() const
+		{
+			return my_patch()
+				.minmax_ang_patch_edge(grid::side_is_max, par_is_rho());
+		}
+		const patch_edge &minmax_par_adjacent_edge(bool want_min) const
+		{
+			return want_min ? min_par_adjacent_edge()
+							: max_par_adjacent_edge();
+		}
+
+		//
+		// ***** gridfn subscripting and coordinate maps *****
+		//
+
+		// gridfn strides perpendicular/parallel to the edge
+		int perp_stride() const
+		{
+			return my_patch().iang_stride(perp_is_rho());
+		}
+		int par_stride() const
+		{
+			return my_patch().iang_stride(par_is_rho());
+		}
+		int ghosted_perp_stride() const
+		{
+			return my_patch().ghosted_iang_stride(perp_is_rho());
+		}
+		int ghosted_par_stride() const
+		{
+			return my_patch().ghosted_iang_stride(par_is_rho());
+		}
+
+		// coordinate maps perpendicular/parallel to the edge
+		// ... range is that of the grid *including* ghost zones
+		const jtutil::linear_map<fp> &perp_map() const
+		{
+			return my_patch().ang_map(perp_is_rho());
+		}
+		const jtutil::linear_map<fp> &par_map() const
+		{
+			return my_patch().ang_map(par_is_rho());
+		}
+
+		// meta-info about perp/par coordinates
+		// ... as (mu,nu,phi) tensor indices
+		local_coords::coords_set coords_set_perp() const
+		{
+			return perp_is_rho() ? my_patch().coords_set_rho()
+								 : my_patch().coords_set_sigma();
+		}
+		local_coords::coords_set coords_set_par() const
+		{
+			return par_is_rho() ? my_patch().coords_set_rho()
+								: my_patch().coords_set_sigma();
+		}
+
+		//
+		// ***** coordinate conversions *****
+		//
+
+		// coordinate conversions based on ghost zone direction
+		// ... (iperp,ipar) <--> (perp,par)
+		fp perp_of_iperp(int iperp) const
+		{
+			return my_patch().ang_of_iang(perp_is_rho(), iperp);
+		}
+		fp par_of_ipar(int ipar) const
+		{
+			return my_patch().ang_of_iang(par_is_rho(), ipar);
+		}
+		fp fp_iperp_of_perp(fp perp) const
+		{
+			return my_patch().fp_iang_of_ang(perp_is_rho(), perp);
+		}
+		fp fp_ipar_of_par(fp par) const
+		{
+			return my_patch().fp_iang_of_ang(par_is_rho(), par);
+		}
+		int iperp_of_perp(fp perp, jtutil::linear_map<fp>::noninteger_action
+									   nia = jtutil::linear_map<fp>::nia_error)
+		{
+			return my_patch().iang_of_ang(perp_is_rho(), perp, nia);
+		}
+		int ipar_of_par(fp par, jtutil::linear_map<fp>::noninteger_action
+									nia = jtutil::linear_map<fp>::nia_error)
+		{
+			return my_patch().iang_of_ang(par_is_rho(), par, nia);
+		}
+
+		// ... (perp,par) --> (rho,sigma)
+		int irho_of_iperp_ipar(int iperp, int ipar) const
+		{
+			return perp_is_rho() ? iperp : ipar;
+		}
+		int isigma_of_iperp_ipar(int iperp, int ipar) const
+		{
+			return perp_is_rho() ? ipar : iperp;
+		}
+		fp rho_of_perp_par(fp perp, fp par) const
+		{
+			return perp_is_rho() ? perp : par;
+		}
+		fp sigma_of_perp_par(fp perp, fp par) const
+		{
+			return perp_is_rho() ? par : perp;
+		}
+		// ... (rho,sigma) --> (perp,par)
+		int iperp_of_irho_isigma(int irho, int isigma) const
+		{
+			return perp_is_rho() ? irho : isigma;
+		}
+		int ipar_of_irho_isigma(int irho, int isigma) const
+		{
+			return par_is_rho() ? irho : isigma;
+		}
+		fp perp_of_rho_sigma(fp rho, fp sigma) const
+		{
+			return perp_is_rho() ? rho : sigma;
+		}
+		fp par_of_rho_sigma(fp rho, fp sigma) const
+		{
+			return par_is_rho() ? rho : sigma;
+		}
+
+		// outer perp of nominal grid on this edge
+		// ... this is outermost *grid point*
+		fp grid_outer_iperp() const
+		{
+			return my_patch().minmax_iang(is_min(), is_rho());
+		}
+		// ... this is actual outer edge of grid
+		//     (might be halfway between two grid points)
+		fp grid_outer_perp() const
+		{
+			return my_patch().minmax_ang(is_min(), is_rho());
+		}
+		// ... this is grid_outer_perp() converted back to the iperp
+		//     coordinate, but still returned as floating-point;
+		//     it will be either integer or half-integer
+		fp fp_grid_outer_iperp() const
+		{
+			return fp_iperp_of_perp(grid_outer_perp());
+		}
+
+		//
+		// ***** min/max/outer coordinates of edge *****
+		//
+
+		// min/max/size ipar of the edge
+		// (these are exteme limits for any iperp, a given ghost zone
+		//  or interpolation region may have tighter and/or iperp-dependent
+		// limits)
+		// ... not including corners
+		int min_ipar_without_corners() const
+		{
+			return my_patch().min_iang(par_is_rho());
+		}
+		int max_ipar_without_corners() const
+		{
+			return my_patch().max_iang(par_is_rho());
+		}
+		// ... including corners
+		int min_ipar_with_corners() const
+		{
+			return my_patch().ghosted_min_iang(par_is_rho());
+		}
+		int max_ipar_with_corners() const
+		{
+			return my_patch().ghosted_max_iang(par_is_rho());
+		}
+		// ... of the corners themselves
+		int min_ipar_corner__min_ipar() const
+		{
+			return min_ipar_with_corners();
+		}
+		int min_ipar_corner__max_ipar() const
+		{
+			return min_ipar_without_corners() - 1;
+		}
+		int max_ipar_corner__min_ipar() const
+		{
+			return max_ipar_without_corners() + 1;
+		}
+		int max_ipar_corner__max_ipar() const
+		{
+			return max_ipar_with_corners();
+		}
+
+		// membership predicates for ipar corners, non-corners
+		bool ipar_is_in_min_ipar_corner(int ipar) const
+		{
+			return (ipar >= min_ipar_corner__min_ipar()) && (ipar <= min_ipar_corner__max_ipar());
+		}
+		bool ipar_is_in_max_ipar_corner(int ipar) const
+		{
+			return (ipar >= max_ipar_corner__min_ipar()) && (ipar <= max_ipar_corner__max_ipar());
+		}
+		bool ipar_is_in_corner(int ipar) const
+		{
+			return ipar_is_in_min_ipar_corner(ipar) || ipar_is_in_max_ipar_corner(ipar);
+		}
+		bool ipar_is_in_noncorner(int ipar) const
+		{
+			return (ipar >= min_ipar_without_corners()) && (ipar <= max_ipar_without_corners());
+		}
+
+		// convenience function selecting amongst the above
+		// membership predicates
+		bool ipar_is_in_selected_part(bool want_corners,
+									  bool want_noncorner,
+									  int ipar)
+			const
+		{
+			return (want_corners && ipar_is_in_corner(ipar)) || (want_noncorner && ipar_is_in_noncorner(ipar));
+		}
+
+		// outer (farthest from patch center) iperp of nominal grid
+		int nominal_grid_outer_iperp() const
+		{
+			return my_patch()
+				.minmax_iang(is_min(), is_rho());
+		}
+
+		//
+		// ***** constructor, destructor *****
+		//
+
+		patch_edge(patch &my_patch_in,
+				   bool is_min_in, bool is_rho_in)
+			: my_patch_(my_patch_in),
+			  is_min_(is_min_in), is_rho_(is_rho_in)
+		{
+		}
+		// compiler-synthesized (no-op) destructor is fine
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		patch_edge(const patch_edge &rhs);
+		patch_edge &operator=(const patch_edge &rhs);
+
+	private:
+		patch &my_patch_;
+		const bool is_min_, is_rho_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* TPATCH_EDGE_H */
--- a/AMSS_NCKU_source/AHF_Direct/patch_info.C
+++ b/AMSS_NCKU_source/AHF_Direct/patch_info.C
@@ -0,0 +1,187 @@
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "patch_info.h"
+
+namespace AHFinderDirect
+	  {
+using jtutil::error_exit;
+
+//******************************************************************************
+//******************************************************************************
+//******************************************************************************
+
+//
+// This function computes, and returns a reference to, a
+//  struct grid_arrays::grid_array_pars  from the info in a
+//  struct patch_info  and the additional information in the arguments.
+//
+// The result refers to an internal static buffer in this function; the
+// usual caveats about lifetimes/overwriting apply.
+//
+// Arguments:
+// ghost_zone_width = Width in grid points of all ghost zones.
+// patch_extend_width = Number of grid points to extend each patch past
+//		     "just touching" so as to overlap neighboring patches.
+//		     Thus patches overlap by
+//			patch_overlap_width = 2*patch_extend_width + 1
+//		     grid points.  For example, with patch_extend_width == 2,
+//		     here are the grid points of two neighboring patches:
+//			x   x   x   x   x   X   X
+//                                      |
+//			        O   O   o   o   o   o   o
+//		     Here | marks the "just touching" boundary,
+//		     x and o the grid points before this extension,
+//		     and X and O the extra grid points added by this
+//		     extension.
+// N_zones_per_right_angle = This sets the grid spacing (same in both
+//			     directions) to 90.0 / N_zones_per_right_angle.
+//			     It's a fatal error (error_exit()) if this
+//			     doesn't evenly divide the grid sizes in both
+//			     directions.
+//
+const grid_arrays::grid_array_pars&
+  patch_info::grid_array_pars(int ghost_zone_width, int patch_extend_width,
+			      int N_zones_per_right_angle)
+	const
+{
+static
+  struct grid_arrays::grid_array_pars grid_array_pars_buffer;
+
+//
+// the values of min_(irho,isigma) are actually arbitrary, but for
+// debugging convenience it's handy to have (irho,isigma) ranges map
+// one-to-one with (rho,sigma) ranges across all patches; the assignments
+// here have this property
+//
+const fp delta_drho_dsigma = 90.0 / fp(N_zones_per_right_angle);
+grid_array_pars_buffer.min_irho
+	= jtutil::round<fp>::to_integer(min_drho  /delta_drho_dsigma);
+grid_array_pars_buffer.min_isigma
+	= jtutil::round<fp>::to_integer(min_dsigma/delta_drho_dsigma);
+
+verify_grid_spacing_ok(N_zones_per_right_angle);
+const int N_irho_zones
+	= jtutil::round<fp>::to_integer(
+		   fp(N_zones_per_right_angle) * (max_drho  -min_drho  ) / 90.0
+				       );
+const int N_isigma_zones
+	= jtutil::round<fp>::to_integer(
+		   fp(N_zones_per_right_angle) * (max_dsigma-min_dsigma) / 90.0
+				       );
+
+grid_array_pars_buffer.max_irho
+	= grid_array_pars_buffer.min_irho   + N_irho_zones;
+grid_array_pars_buffer.max_isigma
+	= grid_array_pars_buffer.min_isigma + N_isigma_zones;
+
+grid_array_pars_buffer.min_irho   -= patch_extend_width;
+grid_array_pars_buffer.min_isigma -= patch_extend_width;
+grid_array_pars_buffer.max_irho   += patch_extend_width;
+grid_array_pars_buffer.max_isigma += patch_extend_width;
+
+grid_array_pars_buffer.min_rho_ghost_zone_width = ghost_zone_width;
+grid_array_pars_buffer.max_rho_ghost_zone_width = ghost_zone_width;
+grid_array_pars_buffer.min_sigma_ghost_zone_width = ghost_zone_width;
+grid_array_pars_buffer.max_sigma_ghost_zone_width = ghost_zone_width;
+
+return grid_array_pars_buffer;
+}
+
+//******************************************************************************
+//
+//
+// This function computes, and returns a reference to, a
+//  struct grid_arrays::grid_pars  from the info in a  struct patch_info
+// and the additional information in the arguments.
+//
+// The result refers to an internal static buffer in this function; the
+// usual caveats about lifetimes/overwriting apply.
+//
+// Arguments:
+// patch_extend_width = Number of grid points to extend each patch past
+//		     "just touching" so as to overlap neighboring patches.
+//		     Thus patches overlap by  2*patch_extend_width + 1  grid
+//		     points.  For example, with patch_extend_width == 2, here
+//		     are the grid points of two neighboring patches:
+//			x   x   x   x   x   X   X
+//                                      |
+//			        O   O   o   o   o   o   o
+//		     Here | marks the "just touching" boundary,
+//		     x and o the grid points before this extension,
+//		     and X and O the extra grid points added by this
+//		     extension.
+// N_zones_per_right_angle = This sets the grid spacing (same in both
+//			     directions) to 90.0 / N_zones_per_right_angle.
+//			     It's a fatal error (error_exit()) if this
+//			     doesn't evenly divide the grid sizes in both
+//			     directions.
+//
+const grid::grid_pars& patch_info::grid_pars(int patch_extend_width,
+					     int N_zones_per_right_angle)
+	const
+{
+static
+  struct grid::grid_pars grid_pars_buffer;
+
+verify_grid_spacing_ok(N_zones_per_right_angle);
+const fp delta_drho_dsigma = 90.0 / fp(N_zones_per_right_angle);
+const fp extend_drho_dsigma = fp(patch_extend_width) * delta_drho_dsigma;
+
+grid_pars_buffer.  min_drho   = min_drho   - extend_drho_dsigma;
+grid_pars_buffer.delta_drho   = delta_drho_dsigma;
+grid_pars_buffer.  max_drho   = max_drho   + extend_drho_dsigma;
+grid_pars_buffer.  min_dsigma = min_dsigma - extend_drho_dsigma;
+grid_pars_buffer.delta_dsigma = delta_drho_dsigma;
+grid_pars_buffer.  max_dsigma = max_dsigma + extend_drho_dsigma;
+
+return grid_pars_buffer;
+}
+
+//******************************************************************************
+
+//
+// This function verifies that the grid spacing evenly divides the
+// grid sizes in both directions, and does an  error_exit()  if not.
+//
+// Arguments:
+// N_zones_per_right_angle = This sets the grid spacing (same in both
+//			     directions) to 90.0 / N_zones_per_right_angle.
+//
+void patch_info::verify_grid_spacing_ok(int N_zones_per_right_angle)
+	const
+{
+const fp N_irho_zones_fp
+	= fp(N_zones_per_right_angle) * (max_drho  -min_drho  ) / 90.0;
+const fp N_isigma_zones_fp
+	= fp(N_zones_per_right_angle) * (max_dsigma-min_dsigma) / 90.0;
+
+if (! (    jtutil::fuzzy<fp>::is_integer(N_irho_zones_fp)
+	&& jtutil::fuzzy<fp>::is_integer(N_isigma_zones_fp)    ) )
+   then error_exit(ERROR_EXIT,
+"***** patch_info::verify_grid_spacing_ok():\n"
+"        N_zones_per_right_angle=%d gives grid spacing which\n"
+"        doesn't evenly divide grid sizes!\n"
+"        [min,max]_drho=[%g,%g] [min,max]_dsigma=[%g,%g]\n"
+"        ==> N_irho_zones_fp=%g N_isigma_zones_fp=%g\n"
+		   ,
+		   N_zones_per_right_angle,
+		   double(min_drho), double(max_drho),
+		   double(min_dsigma), double(max_dsigma),
+		   double(N_irho_zones_fp), double(N_isigma_zones_fp));
+								/*NOTREACHED*/
+}
+
+	  }	// namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch_info.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_info.h
@@ -0,0 +1,70 @@
+namespace AHFinderDirect
+{
+
+	//*****************************************************************************
+
+	//
+	// This (POD, and hence static-initializable) struct gives a minimal
+	// set of information which varies from one patch to another.
+	//
+	// The member functions allow computing all the grid:: constructor
+	// arguments; with these in hand it's fairly easy to construct the
+	// patch itself.  This scheme doesn't allow the most general possible
+	// type of patch (eg it constrains all ghost zones to have the same width,
+	// and it requires the grid spacing to evenly divide 90 degrees), but
+	// it does cover all the cases that seem to come up in practice.
+	//
+	// Arguments for member functions:
+	// ghost_zone_width = Width in grid points of all ghost zones.
+	// patch_extend_width = Number of grid points to extend each patch past
+	//		     "just touching" so as to overlap neighboring patches.
+	//		     Thus patches overlap by
+	//			patch_overlap_width = 2*patch_extend_width + 1
+	//		     grid points.  For example, with patch_extend_width == 2,
+	//		     here are the grid points of two neighboring patches:
+	//			x   x   x   x   x   X   X
+	//                                      |
+	//			        O   O   o   o   o   o   o
+	//		     Here | marks the "just touching" boundary,
+	//		     x and o the grid points before this extension,
+	//		     and X and O the extra grid points added by this
+	//		     extension.
+	// N_zones_per_right_angle = This sets the grid spacing (same in both
+	//			     directions) to 90.0 / N_zones_per_right_angle.
+	//			     It's a fatal error (error_exit()) if this
+	//			     doesn't evenly divide the grid sizes in both
+	//			     directions.
+	//
+	struct patch_info
+	{
+		const char *name;
+		bool is_plus;
+		char ctype;
+		fp min_drho, max_drho;
+		fp min_dsigma, max_dsigma;
+
+		// compute and return reference to  struct grid_arrays::grid_array_pars
+		// ... result refers to internal static buffer;
+		//     the usual caveats about lifetimes/overwriting apply
+		const grid_arrays::grid_array_pars &
+		grid_array_pars(int ghost_zone_width, int patch_extend_width,
+						int N_zones_per_right_angle)
+			const;
+
+		// compute and return reference to  struct grid::grid_pars
+		// ... result refers to internal static buffer;
+		//     the usual caveats about lifetimes/overwriting apply
+		const grid::grid_pars &grid_pars(int patch_extend_width,
+										 int N_zones_per_right_angle)
+			const;
+
+	private:
+		// verify that grid spacing evenly divides grid sizes
+		// in both directions; no-op if ok, error_exit() if not ok
+		void verify_grid_spacing_ok(int N_zones_per_right_angle)
+			const;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch_interp.C
+++ b/AMSS_NCKU_source/AHF_Direct/patch_interp.C
@@ -0,0 +1,360 @@
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+
+namespace AHFinderDirect
+{
+	int lagrange_interp(double coor_orin, double dx, double *gf,
+						int PTS, double ipx, double *out, int *mposn, double *Jac,
+						int ORD) // ORD-1 order lagrange interpolation
+	{
+		assert(PTS >= ORD);
+		int mi, mf;
+
+		double *L, *x;
+		L = new double[PTS];
+		x = new double[PTS];
+		int i, j, k;
+
+		//-- Determine molecular range
+		//   for odd points, say 5, the molecular is
+		//             |
+		//   +-----+---x-+-----+-----+
+		//
+		mi = jtutil::round<double>::ceiling((ipx - coor_orin) / dx) - ORD / 2;
+		mf = mi + ORD;
+		if (mi < 0)
+		{
+			mi = 0;
+			mf = ORD;
+		}
+		else if (mf > PTS)
+		{
+			mf = PTS;
+			mi = PTS - ORD;
+		}
+
+		//-- Setup coordinate by input origin, dx
+		for (j = mi; j < mf; j++)
+			x[j] = coor_orin + j * dx;
+
+		//-- Lagrange basis function
+		*out = 0;
+		for (i = mi; i < mf; i++)
+		{
+			L[i] = 1.0;
+			for (k = mi; k < mf; k++)
+				if (k != i)
+				{
+					L[i] *= (ipx - x[k]) / (x[i] - x[k]);
+				}
+			*out += *(gf + i) * L[i];
+			*Jac = L[i];
+			Jac++;
+		}
+
+		*mposn = mi;
+
+		delete[] L;
+		delete[] x;
+
+		return 0; // Normal retrun
+	}
+
+	using jtutil::error_exit;
+
+	patch_interp::patch_interp(const patch_edge &my_edge_in,
+							   int min_iperp_in, int max_iperp_in,
+							   const jtutil::array1d<int> &min_parindex_array_in,
+							   const jtutil::array1d<int> &max_parindex_array_in,
+							   const jtutil::array2d<fp> &interp_par_in,
+							   bool ok_to_use_min_par_ghost_zone,
+							   bool ok_to_use_max_par_ghost_zone,
+							   int interp_handle_in, int interp_par_table_handle_in)
+		: my_patch_(my_edge_in.my_patch()),
+		  my_edge_(my_edge_in),
+		  min_gfn_(my_patch().ghosted_min_gfn()),
+		  max_gfn_(my_patch().ghosted_max_gfn()),
+		  ok_to_use_min_par_ghost_zone_(ok_to_use_min_par_ghost_zone),
+		  ok_to_use_max_par_ghost_zone_(ok_to_use_max_par_ghost_zone),
+		  min_iperp_(min_iperp_in), max_iperp_(max_iperp_in),
+		  min_ipar_(ok_to_use_min_par_ghost_zone
+						? my_edge_in.min_ipar_with_corners()
+						: my_edge_in.min_ipar_without_corners()),
+		  max_ipar_(ok_to_use_max_par_ghost_zone
+						? my_edge_in.max_ipar_with_corners()
+						: my_edge_in.max_ipar_without_corners()),
+		  min_parindex_array_(min_parindex_array_in),
+		  max_parindex_array_(max_parindex_array_in),
+		  interp_par_(interp_par_in),
+		  interp_handle_(interp_handle_in),
+		  interp_par_table_handle_(1),
+		  gridfn_coord_origin_(my_edge().par_map().fp_of_int(min_ipar_)),
+		  gridfn_coord_delta_(my_edge().par_map().delta_fp()),
+		  gridfn_data_ptrs_(min_gfn_, max_gfn_),
+		  interp_data_buffer_ptrs_(min_gfn_, max_gfn_) // no comma
+	{
+		int status;
+
+		const CCTK_INT stride = my_edge().ghosted_par_stride();
+
+		status = 0;
+		if (status < 0)
+			then error_exit(ERROR_EXIT,
+							"***** patch_interp::patch_interp():\n"
+							"        can't set gridfn stride in interpolator parmameter table!\n"
+							"        error status=%d\n",
+							status); /*NOTREACHED*/
+	}
+
+	patch_interp::~patch_interp()
+	{
+	}
+
+	void patch_interp::interpolate(int ghosted_min_gfn_to_interp,
+								   int ghosted_max_gfn_to_interp,
+								   jtutil::array3d<fp> &data_buffer,
+								   jtutil::array2d<CCTK_INT> &posn_buffer,
+								   jtutil::array3d<fp> &Jacobian_buffer)
+		const
+
+	{
+		int status;
+
+		const int N_dims = 1;
+		const int N_gridfns = jtutil::how_many_in_range(ghosted_min_gfn_to_interp,
+														ghosted_max_gfn_to_interp);
+		const CCTK_INT N_gridfn_data_points = jtutil::how_many_in_range(min_ipar(), max_ipar());
+
+		//--  Jacobian
+		const int Jacobian_interp_point_stride = Jacobian_buffer.subscript_stride_j();
+
+		//
+		// do the interpolations at each iperp
+		//
+		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+		{
+			//
+			// interpolation-point coordinates
+			//
+			const int min_parindex = min_parindex_array_(iperp);
+			const int max_parindex = max_parindex_array_(iperp);
+			const CCTK_INT N_interp_points = jtutil::how_many_in_range(min_parindex, max_parindex);
+			const fp *const interp_coords_ptr = &interp_par_(iperp, min_parindex);
+			const void *const interp_coords[N_dims] = {static_cast<const void *>(interp_coords_ptr)};
+
+			//
+			// pointers to gridfn data to interpolate, and to result buffer
+			//
+			for (int ghosted_gfn = ghosted_min_gfn_to_interp;
+				 ghosted_gfn <= ghosted_max_gfn_to_interp;
+				 ++ghosted_gfn)
+			{
+				// set up data pointer to --> (iperp,min_ipar) gridfn
+				const int start_irho = my_edge().irho_of_iperp_ipar(iperp, min_ipar());
+				const int start_isigma = my_edge().isigma_of_iperp_ipar(iperp, min_ipar());
+				gridfn_data_ptrs_(ghosted_gfn) = static_cast<const void *>(
+					&my_patch()
+						 .ghosted_gridfn(ghosted_gfn,
+										 start_irho, start_isigma));
+				interp_data_buffer_ptrs_(ghosted_gfn) = static_cast<void *>(
+					&data_buffer(ghosted_gfn, iperp, min_parindex));
+			}
+			const void *const *const gridfn_data = &gridfn_data_ptrs_(ghosted_min_gfn_to_interp);
+			void *const *const interp_buffer = &interp_data_buffer_ptrs_(ghosted_min_gfn_to_interp);
+
+			//--  molecule position
+			CCTK_POINTER molecule_posn_ptrs[N_dims] = {static_cast<CCTK_POINTER>(&posn_buffer(iperp, min_parindex))};
+			//--  Jacobian
+			CCTK_POINTER const Jacobian_ptrs[1] //[N_gridfns]
+				= {static_cast<CCTK_POINTER>(
+					&Jacobian_buffer(iperp, min_parindex, 0))};
+			// Jacobian_buffer has continuous memory allocation.
+
+			const CCTK_INT stride = my_edge().ghosted_par_stride();
+			double y[N_gridfn_data_points];
+
+			for (int i = 0; i < N_gridfn_data_points; i++)
+			{
+				y[i] = *((double *)(*gridfn_data) + stride * i);
+			}
+
+			const int ORD = 6;
+			double Jac[ORD];
+			int posn; // of molecular, starting from 0
+			for (int i = 0; i < N_interp_points; i++)
+			{
+				status = lagrange_interp(gridfn_coord_origin_, gridfn_coord_delta_,
+										 y, N_gridfn_data_points,
+										 *((double *)interp_coords[0] + i), ((double *)(*interp_buffer) + i),
+										 &posn, Jac, ORD);
+
+				*((int *)molecule_posn_ptrs[0] + i) = posn + 2;
+
+				memcpy((double *)(Jacobian_ptrs[0]) + Jacobian_buffer.min_k() +
+						   Jacobian_interp_point_stride * i,
+					   Jac, sizeof(Jac));
+			}
+
+			// convert the molecule positions from  parindex-min_ipar
+			// to  parindex  values (again, cf comments on array subscripting
+			// at the start of "patch_interp.hh")
+			for (int parindex = min_parindex;
+				 parindex <= max_parindex;
+				 ++parindex)
+			{
+				posn_buffer(iperp, parindex) += min_ipar();
+			}
+
+			if (status < 0)
+				then error_exit(ERROR_EXIT,
+								"***** patch_interp::interpolate():\n"
+								"        error return %d from interpolator at iperp=%d of [%d,%d]!\n"
+								"        my_patch()=\"%s\" my_edge()=\"%s\"\n",
+								status, iperp, min_iperp(), max_iperp(),
+								my_patch().name(), my_edge().name()); /*NOTREACHED*/
+
+		} // end for iperp
+	}
+
+	void patch_interp::verify_Jacobian_sparsity_pattern_ok()
+		const
+	{
+		CCTK_INT MSS_is_fn_of_interp_coords = 0, MSS_is_fn_of_input_array_values = 0;
+		CCTK_INT Jacobian_is_fn_of_input_array_values = 0;
+
+		//
+		// verify that we grok the Jacobian sparsity pattern
+		//
+		if (MSS_is_fn_of_interp_coords || MSS_is_fn_of_input_array_values || Jacobian_is_fn_of_input_array_values)
+			then error_exit(ERROR_EXIT,
+							"***** patch_interp::verify_Jacobian_sparsity_pattern_ok():\n"
+							"        implementation restriction: we only grok Jacobians with\n"
+							"        fixed-sized hypercube-shaped molecules, independent of\n"
+							"        the interpolation coordinates and the floating-point values!\n"
+							"        MSS_is_fn_of_interp_coords=(int)%d (we only grok 0)\n"
+							"        MSS_is_fn_of_input_array_values=(int)%d (we only grok 0)\n"
+							"        Jacobian_is_fn_of_input_array_values=(int)%d (we only grok 0)\n",
+							MSS_is_fn_of_interp_coords,
+							MSS_is_fn_of_input_array_values,
+							Jacobian_is_fn_of_input_array_values);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function queries the interpolator to get the [min,max] ipar m
+	// coordinates of the interpolation molecules.
+	//
+	// (This API implicitly assumes that the Jacobian sparsity is one which
+	// is "ok" as verified by  verify_Jacobian_sparsity_pattern_ok() .)
+	//
+	void patch_interp::molecule_minmax_ipar_m(int &min_ipar_m, int &max_ipar_m)
+		const
+	{
+		min_ipar_m = -2;
+		max_ipar_m = 3;
+	}
+
+	//******************************************************************************
+
+	//
+	// This function queries the interpolator at each iperp to find out the
+	// molecule ipar positions (which we implicitly assume to be independent
+	// of ghosted_gfn), and stores these in  posn_buffer(iperp, parindex) .
+	//
+	// (This API implicitly assumes that the Jacobian sparsity is one which
+	// is "ok" as verified by  verify_Jacobian_sparsity_pattern_ok() .)
+	//
+	void patch_interp::molecule_posn(jtutil::array2d<CCTK_INT> &posn_buffer)
+		const
+	{
+		const int N_dims = 1;
+		int status;
+
+		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+		{
+			const int min_parindex = min_parindex_array_(iperp);
+			const int max_parindex = max_parindex_array_(iperp);
+
+			// set up the molecule-position query in the parameter table
+			CCTK_POINTER molecule_posn_ptrs[N_dims] = {static_cast<CCTK_POINTER>(&posn_buffer(iperp, min_parindex))};
+			status = 0; // Util_TableSetPointerArray(interp_par_table_handle_, N_dims,
+						//               molecule_posn_ptrs, "molecule_positions");
+
+			if (status < 0)
+				then error_exit(ERROR_EXIT,
+								"***** patch_interp::molecule_posn():\n"
+								"        can't set molecule position query\n"
+								"        in interpolator parmameter table at iperp=%d of [%d,%d]!\n"
+								"        error status=%d\n",
+								iperp, min_iperp(), max_iperp(),
+								status); /*NOTREACHED*/
+
+			for (int parindex = min_parindex;
+				 parindex <= max_parindex;
+				 ++parindex)
+			{
+				posn_buffer(iperp, parindex) += min_ipar();
+			}
+		}
+	}
+
+	void patch_interp::Jacobian(jtutil::array3d<fp> &Jacobian_buffer)
+		const
+	{
+		const int N_dims = 1;
+		const int N_gridfns = 1;
+
+		int status1, status2;
+
+		//
+		// set Jacobian stride info in parameter table
+		//
+		const int Jacobian_interp_point_stride = Jacobian_buffer.subscript_stride_j();
+
+		status1 = 0;
+
+		status2 = 0;
+
+		if ((status1 < 0) || (status2 < 0))
+			then error_exit(ERROR_EXIT,
+							"***** patch_interp::Jacobian():\n"
+							"        can't set Jacobian stride info in interpolator parmameter table!\n"
+							"        error status1=%d status2=%d\n",
+							status1, status2);
+
+		//
+		// query the Jacobians at each iperp
+		//
+		for (int iperp = min_iperp(); iperp <= max_iperp(); ++iperp)
+		{
+			const int min_parindex = min_parindex_array_(iperp);
+			const int max_parindex = max_parindex_array_(iperp);
+
+			//
+			// set up the Jacobian query in the parameter table
+			//
+			CCTK_POINTER const Jacobian_ptrs[N_gridfns] = {static_cast<CCTK_POINTER>(
+				&Jacobian_buffer(iperp, min_parindex, 0))};
+		}
+	}
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/patch_interp.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_interp.h
@@ -0,0 +1,293 @@
+#ifndef TPATCH_INTERP_H
+#define TPATCH_INTERP_H
+namespace AHFinderDirect
+	  {
+
+//
+// patch_interp - interpolation from a patch
+//
+
+//
+// A patch_interp object is responsible for interpolating gridfn data
+// from its owning patch for use by another patch's ghost_zone object
+// (in setting up the gridfn in the other ghost zone).  A patch_interp
+// object deals only in its own patch's coordinates; other code elsewhere
+// (in practice in interpatch_ghost_zone::) is responsible for translating
+// other patch's coordinates into our coordinates.
+//
+
+//
+// A patch_interp defines a "patch interpolation region", the region of
+// its owning patch from which this interpolation will use gridfn data.
+//
+
+//
+// The way the patch coordnates are constructed, any two adjacent patches
+// share a common (perpendicular) coordinate.  Thus we only have to do
+// 1-dimensional interpolation here (in the parallel direction).  In
+// other words, for each iperp we interpolate in par.
+//
+// In general we interpolate each gridfn at a number of distinct par
+// for each iperp; the integer "parindex" indexes these points.  We
+// attach no particular semantics to parindex, and it need not be
+// 0-origin or have the same range for each iperp.  [In practice,
+// parindex will be the other patch's ipar coordinate.]  However,
+// we assume that the range of parindex is roughly similar for each
+// iperp, so it's ok to use (iperp,parindex) as a 2-D rectangular
+// index space.
+//
+// For example, we might interpolate at the points
+//            ipar ipar ipar ipar ipar ipar ipar ipar ipar
+//              1    2    3    4    5    6    7    8    9
+// iperp=10           (2a)   (3b)   (4c)
+// iperp=11          (2d)   (3e)  (4f)   (5g)
+// where the (2a)-(5g) are the interpolation points, with 2-5 being the
+// parindex values and a-g being unique identifiers used in our description
+// below.  In terms of our member data, this interpolation region would
+// be described by
+//	[min,max]_iperp_=[10,11]
+//	[min,max]_ipar_=[1,9]
+//	[min,max]_parindex_array_(10)=[2,5]
+//	[min,max]_parindex_array_(11)=[2,6]
+//	interp_par_(10,2) = x[a]
+//	interp_par_(10,3) = x[b]
+//	interp_par_(10,4) = x[c]
+//	interp_par_(11,2) = x[d]
+//	interp_par_(11,3) = x[e]
+//	interp_par_(11,4) = x[f]
+//	interp_par_(11,5) = x[g]
+//
+
+//
+// We use the Cactus local interpolator CCTK_InterpLocalUniform()
+// to do the interpolation.  To minimize interpolator overheads, we
+// interpolate all the gridfns at each iperp in a single interpolator
+// call.  [Different iperp values involve different sets of (1-D)
+// gridfn data, and so inherently require distinct interpolator calls.]
+//
+// Setting up the array subscripting for the interpolator to access
+// the gridfn data is a bit tricky:  The interpolator accesses the
+// gridfn data using the generic (1-D) subscripting expression
+//	data[offset + i*stride]
+// where  i  is the data array index.  However, we'd rather not use
+//  offset , because it has to be supplied in the parameter table as
+// an array subscripted by  gfn , and so would require changing the
+// parameter table for each call on  interpolate()  (with potentially
+// different numbers of gridfns being interpolated).  Instead, at each
+//  iperp  we use  i = ipar-min_ipar , so the default  offset=0  makes
+// the subscripting expression zero for  ipar = min_ipar .  This also
+// makes the interpolator's  min_i = 0  and  max_i  be  dims-1  (both
+// the defaults), so those also don't have to be set in the parameter
+// table either.  We set the interpolator's data coordinate origin to
+// the  par  coordinate for  min_ipar , so it correctly maps  i --> par .
+// With this strategy we can share the interpolator parameter table
+// across all the  iperp  values, and we don't need to modify the
+// parameter table at all after the initial setup in our constructor.
+// However, we do have to adjust the molecule positions in
+//  patch_interp::molecule_posn() , since the interpolator will return
+//  i  values, while  molecule_posn()  needs  ipar  values.
+//
+
+class	patch_interp
+	{
+public:
+	// to which patch/edge do we belong?
+	const patch& my_patch() const { return my_patch_; }
+	const patch_edge& my_edge() const { return my_edge_; }
+
+
+public:
+	//
+	// ***** main client interface *****
+	//
+	// interpolate specified range of ghosted gridfns
+	// at all the coordinates specified when we were constructed,
+	// store interpolated data in
+	//	data_buffer(ghosted_gfn, iperp, parindex)
+	void interpolate(int ghosted_min_gfn_to_interp,
+			 int ghosted_max_gfn_to_interp,
+			 jtutil::array3d<fp>& data_buffer)
+		const;
+	void interpolate(int ghosted_min_gfn_to_interp,
+			 int ghosted_max_gfn_to_interp,
+			 jtutil::array3d<fp>& data_buffer,
+			 jtutil::array2d<CCTK_INT>& posn_buffer,
+                         jtutil::array3d<fp>& Jacobian_buffe)
+		const;
+
+public:
+	//
+	// ***** Jacobian of interpolate() *****
+	//
+
+	// verify (no-op if ok, error_exit() if not) that interpolator
+	// has a Jacobian sparsity pattern which we grok: at present this
+	// means molecules are fixed-sized hypercubes, with size/shape
+	// independent of interpolation coordinates and the floating-point
+	// values in the input arrays
+	void verify_Jacobian_sparsity_pattern_ok() const;
+
+	//
+	// The API for the remaining Jacobian functions implicitly
+	// assumes that the Jacobian sparsity pattern is "ok" as
+	// verified by  verify_Jacobian_sparsity_pattern_ok() ,
+	// and in particular that  [min,max]_ipar_m  are independent
+	// of iperp and parindex.
+	//
+
+	// get [min,max] ipar m coordinates of interpolation molecules
+	void molecule_minmax_ipar_m(int& min_ipar_m, int& max_ipar_m) const;
+
+	// get interpolation molecule ipar positions in
+	//  molecule_posn_buffer(iperp, parindex)
+	// ... array type is CCTK_INT so we can pass by reference
+	//     to interpolator
+	void molecule_posn(jtutil::array2d<CCTK_INT>& posn_buffer) const;
+
+	// get Jacobian of interpolated data with respect to this patch's
+	// ghosted gridfns,
+	//	partial interpolate() data_buffer(ghosted_gfn, iperp, parindex)
+	//	---------------------------------------------------------------
+	//	    partial ghosted_gridfn(ghosted_gfn, iperp, posn+ipar_m)
+	// store Jacobian in
+	//	Jacobian_buffer(iperp, parindex, ipar_m)
+	// where we implicitly assume the Jacobian to be independent of
+	// ghosted_gfn, and where
+	//	posn = posn_buffer(iperp, parindex)
+	// as returned by  molecule_posn()
+	void Jacobian(jtutil::array3d<fp>& Jacobian_buffer) const;
+
+	//
+	// ***** internal functions *****
+	//
+private:
+	// [min,max] iperp for interpolation and gridfn data
+	int min_iperp() const { return min_iperp_; }
+	int max_iperp() const { return max_iperp_; }
+
+	// min/max (iperp,ipar) of the gridfn data to use for interpolation
+	int min_ipar() const { return min_ipar_; }
+	int max_ipar() const { return max_ipar_; }
+
+	//
+	// ***** constructor, destructor, et al *****
+	//
+public:
+	//
+	// Constructor arguments:
+	// my_edge_in = Identifies the patch/edge to which this
+	//		interpolation region is to belong.
+	// [min,max]_iperp_in = The range of iperp for this interpolation
+	//			region
+	// [min,max]_parindex_array_in(iperp)
+	//	= [min,max] range of parindex actually used at each iperp.
+	//	  We keep references to these arrays, so they should have
+	//	  lifetimes at last as long as that of this object.
+	// interp_par_in(iperp,parindex)
+	//	= Gives the par coordinates at which we will interpolate;
+	//	  array entries outside the range [min,max]_parindex_in
+	//	  are unused.  We keep a reference to this array, so it
+	//	  should have a lifetime at last as long as that of this
+	//	  object.
+	// ok_to_use_[min,max]_par_ghost_zone
+	//	= Boolean flags saying whether or not we should use gridfn
+	//	  data from the [min,max]_par ghost zones in the interpolation.
+	// interp_handle_in = Cactus handle to the interpatch interpolation
+	//		      operator.
+	// interp_par_table_handle_in
+	//	= Cactus handle to a Cactus key/value table giving
+	//	  parameters (eg order) for the interpatch interpolation
+	//	  operator.  This class internally clones this table and
+	//	  modifies the clone, so the original table is not modified
+	//	  by any actions of this class.
+	//
+	// This constructor requires that this patch's gridfns already
+	// exist, since we size various arrays based on the patch's min/max
+	// ghosted gfn.
+	//
+	patch_interp(const patch_edge& my_edge_in,
+		     int min_iperp_in, int max_iperp_in,
+		     const jtutil::array1d<int>& min_parindex_array_in,
+		     const jtutil::array1d<int>& max_parindex_array_in,
+		     const jtutil::array2d<fp>& interp_par_in,
+		     bool ok_to_use_min_par_ghost_zone,
+		     bool ok_to_use_max_par_ghost_zone,
+		     int interp_handle_in, int interp_par_table_handle_in);
+	~patch_interp();
+
+private:
+        // we forbid copying and passing by value
+        // by declaring the copy constructor and assignment operator
+        // private, but never defining them
+	patch_interp(const patch_interp& rhs);
+	patch_interp& operator=(const patch_interp& rhs);
+
+
+	//
+	// ***** data members *****
+	//
+private:
+	const patch& my_patch_;
+	const patch_edge& my_edge_;
+
+	// range of gfn we can handle
+	// (any given interpolate() call may specify a subrange)
+	const int min_gfn_, max_gfn_;
+
+	// these are strictly speaking redundant
+	// but we keep them for use in debugging
+	bool ok_to_use_min_par_ghost_zone_, ok_to_use_max_par_ghost_zone_;
+
+	// patch interpolation region,
+	// i.e. range of (iperp,ipar) in this patch from which
+	// we will use gridfn data in interpolation
+	const int min_iperp_, max_iperp_;
+	const int min_ipar_, max_ipar_;
+
+	// [min,max] parindex at each iperp
+	// ... these are references to arrays passed in to our constructor
+	//     ==> we do *not* own them!
+	// ... indices are (iperp)
+	const jtutil::array1d<int>& min_parindex_array_;
+	const jtutil::array1d<int>& max_parindex_array_;
+
+	// interp_par_(iperp,parindex)
+	//	= Gives the par coordinates at which we will interpolate;
+	//	  array entries outside the range [min,max]_parindex_in
+	//	  are unused (n.b. this interface implicitly takes the
+	//	  par coordinates to be independent of ghosted_gfn).
+	// ... this is a reference to an array passed in to our constructor
+	//     ==> we do *not* own this!
+	const jtutil::array2d<fp>& interp_par_;	// indices (iperp,parindex)
+
+	// Cactus handle to the interpolation operator
+	int interp_handle_;
+
+	// Cactus handle to our private Cactus key/value table
+	// giving parameters for the interpolation operator
+	// ... this starts out as a copy of the passed-in table,
+	//     then gets extra stuff added to it specific to this
+	//     interpolation region; it's shared across all iperp
+	// ... we own this table
+	const int interp_par_table_handle_;
+
+	// (par) origin and delta values of the gridfn data
+	const fp gridfn_coord_origin_, gridfn_coord_delta_;
+
+	// --> start of gridfn data to use for interpolation
+	//     (reset for each iperp)
+	// ... we do *not* own the pointed-to data!
+	// ... index is (gfn)
+	mutable jtutil::array1d<const void*> gridfn_data_ptrs_;
+
+	// --> start of interpolation data buffer for each gridfn
+	//     (reset for each iperp)
+	// ... we do *not* own the pointed-to data!
+	// ... index is (gfn)
+	mutable jtutil::array1d<void*> interp_data_buffer_ptrs_;
+	};
+
+//******************************************************************************
+
+	  }	// namespace AHFinderDirect
+#endif  /* TPATCH_INTERP_H */
--- a/AMSS_NCKU_source/AHF_Direct/patch_system.C
+++ b/AMSS_NCKU_source/AHF_Direct/patch_system.C
--- a/AMSS_NCKU_source/AHF_Direct/patch_system.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_system.h
@@ -0,0 +1,595 @@
+#ifndef TPATCH_SYSTEM_H
+#define TPATCH_SYSTEM_H
+namespace AHFinderDirect
+{
+
+	//******************************************************************************
+
+	//
+	// A  patch_system  object describes a system of interlinked patches.
+	//
+	// Its  const  qualifiers refer (only) to the gridfn data.  Notably, this
+	// means that  synchronize()  is a non-const function (it modifies gridfn
+	// data), while  synchronize_Jacobian()  et al are const functions (they
+	// don't modify gridfn data) even though they may update other internal
+	// state in the  patch_system  object and its subobjects.
+	//
+
+	class patch_system
+	{
+		//
+		// ***** static data & functions describing patch systems *****
+		//
+	public:
+		// what patch-system type are supported?
+		// (see "patch_system_info.hh" for detailed descriptions of these)
+		enum patch_system_type
+		{
+			patch_system__full_sphere,
+			patch_system__plus_z_hemisphere,
+			patch_system__plus_xy_quadrant_mirrored,
+			patch_system__plus_xy_quadrant_rotating,
+			patch_system__plus_xz_quadrant_mirrored,
+			patch_system__plus_xz_quadrant_rotating,
+			patch_system__plus_xyz_octant_mirrored,
+			patch_system__plus_xyz_octant_rotating
+		};
+
+		// maximum number of patches in any patch-system type
+		static const int max_N_patches = 6;
+
+		// decode patch system type into N_patches
+		static int N_patches_of_type(enum patch_system_type type_in);
+
+		// patch system type <--> human-readable character-string name
+		static const char *name_of_type(enum patch_system_type type_in);
+		static enum patch_system_type type_of_name(const char *name_in);
+
+		//
+		// ***** coordinates *****
+		//
+	public:
+#ifdef NOT_USED
+		// global (x,y,z) --> local (x,y,z)
+		fp local_x_of_global_x(fp global_x) const
+		{
+			return global_coords_.local_x_of_global_x(global_x);
+		}
+		fp local_y_of_global_y(fp global_y) const
+		{
+			return global_coords_.local_y_of_global_y(global_y);
+		}
+		fp local_z_of_global_z(fp global_z) const
+		{
+			return global_coords_.local_z_of_global_z(global_z);
+		}
+#endif /* NOT_USED */
+
+#ifdef NOT_USED
+		// local (x,y,z) --> global (x,y,z)
+		fp global_x_of_local_x(fp local_x) const
+		{
+			return global_coords_.global_x_of_local_x(local_x);
+		}
+		fp global_y_of_local_y(fp local_y) const
+		{
+			return global_coords_.global_y_of_local_y(local_y);
+		}
+		fp global_z_of_local_z(fp local_z) const
+		{
+			return global_coords_.global_z_of_local_z(local_z);
+		}
+#endif /* NOT_USED */
+
+		// get global (x,y,z) coordinates of local origin point
+		fp origin_x() const { return global_coords_.origin_x(); }
+		fp origin_y() const { return global_coords_.origin_y(); }
+		fp origin_z() const { return global_coords_.origin_z(); }
+
+		//
+		// ***** meta-info about the entire patch system *****
+		//
+	public:
+		// patch-system type
+		enum patch_system_type type() const { return type_; }
+
+		// total number of patches
+		int N_patches() const { return N_patches_; }
+
+		// get patches by patch number
+		const patch &ith_patch(int pn) const
+		{
+			return *all_patches_[pn];
+		}
+		patch &ith_patch(int pn)
+		{
+			return *all_patches_[pn];
+		}
+
+		// find a patch by +/- xyz "ctype"
+		// FIXME: the present implementation of this function is quite slow
+		const patch &plus_or_minus_xyz_patch(bool is_plus, char ctype)
+			const;
+
+		// find a patch by name, return patch number; error_exit() if not found
+		int patch_number_of_name(const char *name) const;
+
+		// total number of grid points
+		int N_grid_points() const { return N_grid_points_; }
+		int ghosted_N_grid_points() const { return ghosted_N_grid_points_; }
+
+		//
+		// ***** meta-info about gridfns *****
+		//
+	public:
+		int min_gfn() const { return ith_patch(0).min_gfn(); }
+		int max_gfn() const { return ith_patch(0).max_gfn(); }
+		int N_gridfns() const { return ith_patch(0).N_gridfns(); }
+		bool is_valid_gfn(int gfn) const
+		{
+			return ith_patch(0).is_valid_gfn(gfn);
+		}
+		int ghosted_min_gfn() const { return ith_patch(0).ghosted_min_gfn(); }
+		int ghosted_max_gfn() const { return ith_patch(0).ghosted_max_gfn(); }
+		int ghosted_N_gridfns() const
+		{
+			return ith_patch(0).ghosted_N_gridfns();
+		}
+		bool is_valid_ghosted_gfn(int ghosted_gfn) const
+		{
+			return ith_patch(0).is_valid_ghosted_gfn(ghosted_gfn);
+		}
+
+		//
+		// ***** synchronize() and its Jacobian *****
+		//
+	public:
+		// "synchronize" all ghost zones of all patches,
+		// i.e. update the ghost-zone values of the specified gridfns
+		// via the appropriate sequence of symmetry operations
+		// and interpatch interpolations
+		void synchronize(int ghosted_min_gfn_to_sync,
+						 int ghosted_max_gfn_to_sync);
+
+		// ... do this for all ghosted gridfns
+		void synchronize()
+		{
+			synchronize(ghosted_min_gfn(),
+						ghosted_max_gfn());
+		}
+
+		//
+		// do any precomputation necessary to compute Jacobian of
+		//  synchronize() , taking into account synchronize()'s
+		// full 3-phase algorithm
+		//
+		void compute_synchronize_Jacobian(int ghosted_min_gfn_to_sync,
+										  int ghosted_max_gfn_to_sync)
+			const;
+
+		// ... do this for all ghosted gridfns
+		void compute_synchronize_Jacobian()
+			const
+		{
+			compute_synchronize_Jacobian(ghosted_min_gfn(),
+										 ghosted_max_gfn());
+		}
+
+		//
+		// The following functions access the Jacobian computed by
+		//  compute_synchronize_Jacobian() .  Note this API is rather
+		// different than that of ghost_zone::comute_Jacobian()  et al:
+		// here we must take into account synchronize()'s full 3-phase
+		// algorithm, and this may lead to a more general Jacobian
+		// structure.
+		//
+		// This API still implicitly assumes that the Jacobian is
+		// independent of  ghosted_gfn , and that the set of y points
+		// (with nonzero Jacobian values) in a single row of the Jacobian
+		// matrix (i.e. the set of points on which a single ghost-zone
+		// point depends),
+		// - lies entirely within a single y patch
+		// - has a single yiperp value
+		// - have a contiguous interval of yipar; we parameterize this
+		//   interval as  yipar = posn+m
+		//
+
+		// what are the global min/max  m  over all ghost zone points?
+		// (this is useful for sizing the buffer for synchronize_Jacobian())
+		void synchronize_Jacobian_global_minmax_ym(int &min_ym, int &max_ym)
+			const;
+
+		// compute a single row of the Jacobian:
+		// - return value is edge to which y point belongs
+		//   (caller can get patch from this edge)
+		// - store y_iperp and y_posn and min/max ym in named arguments
+		// - stores the Jacobian elements
+		//	partial synchronize() gridfn(ghosted_gfn, px, x_iperp, x_ipar)
+		//	-------------------------------------------------------------
+		//	     partial gridfn(ghosted_gfn, py, y_iperp, y_posn+ym)
+		//   (taking into account synchronize()'s full 3-phase algorithm)
+		//   in the caller-supplied buffer
+		//	Jacobian_buffer(ym)
+		//   for each  ym  in the min/max ym range
+		const patch_edge &
+		synchronize_Jacobian(const ghost_zone &xgz,
+							 int x_iperp, int x_ipar,
+							 int &y_iperp,
+							 int &y_posn, int &min_ym, int &max_ym,
+							 jtutil::array1d<fp> &Jacobian_buffer)
+			const;
+
+		// helper functions for synchronize_Jacobian():
+	private:
+		// "fold" (part of) a Jacobian row
+		// to take a symmetry operation into acount
+		// e_Jac = edge which the Jacobian lies along
+		// e_fold = edge about which to fold
+		// [min,max]_m = range of m in the Jacobian
+		// [min,max]_fold_m = range of m to fold
+		//		      (must be a subrange of {min,max}_m)
+		void fold_Jacobian(const patch_edge &e_Jac, const patch_edge &e_fold,
+						   int iperp,
+						   int posn, int min_m, int max_m,
+						   int min_fold_m, int max_fold_m,
+						   jtutil::array1d<fp> &Jacobian_buffer)
+			const;
+
+		// compute the Jacobian of ghost zone's synchronize()
+		// *without* taking into account 3-phase algorithm
+		const patch_edge &
+		ghost_zone_Jacobian(const ghost_zone &xgz,
+							int x_iperp, int x_ipar,
+							int &y_iperp,
+							int &y_posn, int &min_ym, int &max_ym,
+							jtutil::array1d<fp> &Jacobian_buffer)
+			const;
+
+		//
+		// ***** gridfn operations *****
+		//
+	public:
+		// dst = a
+		void set_gridfn_to_constant(fp a, int dst_gfn);
+
+		// dst = src
+		void gridfn_copy(int src_gfn, int dst_gfn);
+
+		// dst += delta
+		void add_to_ghosted_gridfn(fp delta, int ghosted_dst_gfn);
+
+		void recentering(fp x, fp y, fp z);
+
+		// compute norms of gridfn (only over nominal grid)
+		void gridfn_norms(int src_gfn, jtutil::norm<fp> &norms)
+			const;
+		void ghosted_gridfn_norms(int ghosted_src_gfn, jtutil::norm<fp> &norms)
+			const;
+
+		//
+		// ***** testing (x,y,z) point position versus a surface *****
+		//
+
+		// find patch containing (ray from origin to) given local (x,y,z)
+		// ... if there are multiple patches containing the position,
+		//     we return the one which would still contain it if patches
+		//     didn't overlap; if multiple patches satisfy this criterion
+		//     then it's arbitrary which one we return
+		// ... if no patch contains the position (for a non--full-sphere
+		//     patch system), or the position is at the origin, then
+		//     we return a NULL pointer
+		const patch *patch_containing_local_xyz(fp x, fp y, fp z)
+			const;
+
+		// radius of surface in direction of an (x,y,z) point,
+		// taking into account any patch-system symmetries;
+		// or dummy value 1.0 if point is identical to local origin
+		//
+		// FIXME:
+		// We should provide another API to compute this for a whole
+		// batch of points at once, since this would be more efficient
+		// (the interpolator overhead would be amortized over the whole batch)
+		fp radius_in_local_xyz_direction(int ghosted_radius_gfn,
+										 fp x, fp y, fp z)
+			const;
+
+		//
+		// ***** line/surface operations *****
+		//
+
+		// compute the circumference of a surface in the {xy, xz, yz} plane
+		// ... note this is the full circumference all around the sphere,
+		//     even if the patch system only covers a proper subset of this
+		// ... the implementation assumes adjacent patches are butt-joined
+		// ... plane must be one of "xy", "xz", or "yz"
+		fp circumference(const char plane[],
+						 int ghosted_radius_gfn,
+						 int g_xx_gfn, int g_xy_gfn, int g_xz_gfn,
+						 int g_yy_gfn, int g_yz_gfn,
+						 int g_zz_gfn,
+						 enum patch::integration_method method)
+			const;
+
+		// compute the surface integral of a gridfn over the 2-sphere
+		//	$\int f(\rho,\sigma) \, dA$
+		//		= \int f(\rho,\sigma) \sqrt{|J|} \, d\rho \, d\sigma$
+		// where $J$ is the Jacobian of $(x,y,z)$ with respect to $(rho,sigma)
+		// ... integration method selected by  method  argument
+		// ... src gridfn may be either nominal-grid or ghosted-grid
+		// ... Boolean flags  src_gfn_is_even_across_{xy,xz,yz}_planes
+		//     specify whether the gridfn to be integrated is even (true)
+		//     or odd (false) across the corresponding planes.  Only the
+		//     flags corresponding to boundaries of the patch system are
+		//     used.  For example, for a  plus_z_hemisphere  patch system,
+		//     only the  src_gfn_is_even_across_xy_plane  flag is used.
+		// ... note integral is over the full 2-sphere,
+		//     even if the patch system only covers a proper subset of this
+		// ... the implementation assumes adjacent patches are butt-joined
+		fp integrate_gridfn(int unknown_src_gfn,
+							bool src_gfn_is_even_across_xy_plane,
+							bool src_gfn_is_even_across_xz_plane,
+							bool src_gfn_is_even_across_yz_plane,
+							int ghosted_radius_gfn,
+							int g_xx_gfn, int g_xy_gfn, int g_xz_gfn,
+							int g_yy_gfn, int g_yz_gfn,
+							int g_zz_gfn,
+							enum patch::integration_method method)
+			const;
+
+		//
+		// ***** I/O *****
+		//
+	public:
+		// print to a named file (newly (re)created)
+		// output format is
+		//	dpx	dpy	gridfn
+		void print_gridfn(int gfn, const char output_file_name[]) const
+		{
+			print_unknown_gridfn(false, gfn,
+								 false, false, 0,
+								 output_file_name, false);
+		}
+		void print_ghosted_gridfn(int ghosted_gfn,
+								  const char output_file_name[],
+								  bool want_ghost_zones = true)
+			const
+		{
+			print_unknown_gridfn(true, ghosted_gfn,
+								 false, false, 0,
+								 output_file_name, want_ghost_zones);
+		}
+
+		// print to a named file (newly (re)created)
+		// output format is
+		//	dpx	dpy	gridfn   global_x   global_y   global_z
+		// where global_[xyz} are derived from the angular position
+		// and a specified (unknown-grid) radius gridfn
+		void print_gridfn_with_xyz(int gfn,
+								   bool radius_is_ghosted_flag, int unknown_radius_gfn,
+								   const char output_file_name[])
+			const
+		{
+			print_unknown_gridfn(false, gfn,
+								 true, radius_is_ghosted_flag,
+								 unknown_radius_gfn,
+								 output_file_name, false);
+		}
+		void print_ghosted_gridfn_with_xyz(int ghosted_gfn,
+										   bool radius_is_ghosted_flag, int unknown_radius_gfn,
+										   const char output_file_name[],
+										   bool want_ghost_zones = true)
+			const
+		{
+			print_unknown_gridfn(true, ghosted_gfn,
+								 true, radius_is_ghosted_flag,
+								 unknown_radius_gfn,
+								 output_file_name, want_ghost_zones);
+		}
+
+	public:
+		// read from a named file
+		void read_gridfn(int gfn, const char input_file_name[])
+		{
+			read_unknown_gridfn(false, gfn, input_file_name, false);
+		}
+		void read_ghosted_gridfn(int ghosted_gfn,
+								 const char input_file_name[],
+								 bool want_ghost_zones = true)
+		{
+			read_unknown_gridfn(true, ghosted_gfn,
+								input_file_name, want_ghost_zones);
+		}
+
+	private:
+		// ... internal worker functions
+		void print_unknown_gridfn(bool ghosted_flag, int unknown_gfn,
+								  bool print_xyz_flag, bool radius_is_ghosted_flag,
+								  int unknown_radius_gfn,
+								  const char output_file_name[], bool want_ghost_zones)
+			const;
+		void read_unknown_gridfn(bool ghosted_flag, int unknown_gfn,
+								 const char input_file_name[],
+								 bool want_ghost_zones);
+
+		//
+		// ***** access to gridfns as 1-D arrays *****
+		//
+		// ... n.b. this interface implicitly assumes that gridfn data
+		//     arrays are contiguous across patches; this is ensured by
+		//     setup_gridfn_storage() (called by our constructor)
+		//
+	public:
+		// convert (patch,irho,isigma) <--> 1-D 0-origin grid point number (gpn)
+		int gpn_of_patch_irho_isigma(const patch &p, int irho, int isigma)
+			const
+		{
+#ifdef DEBUG_AHFD
+			printf(" <%d> ", isigma);
+#endif
+			return starting_gpn_[p.patch_number()] + p.gpn_of_irho_isigma(irho, isigma);
+		}
+		int ghosted_gpn_of_patch_irho_isigma(const patch &p,
+											 int irho, int isigma)
+			const
+		{
+			return ghosted_starting_gpn_[p.patch_number()] + p.ghosted_gpn_of_irho_isigma(irho, isigma);
+		}
+		// ... n.b. we return patch as a reference via the function result;
+		//     an alternative would be to have a patch*& argument
+		const patch &
+		patch_irho_isigma_of_gpn(int gpn, int &irho, int &isigma)
+			const;
+		const patch &
+		ghosted_patch_irho_isigma_of_gpn(int gpn, int &irho, int &isigma)
+			const;
+
+		// access actual gridfn data arrays
+		// (low-level, dangerous, use with caution)
+		const fp *gridfn_data(int gfn) const
+		{
+			return ith_patch(0).gridfn_data_array(gfn);
+		}
+		fp *gridfn_data(int gfn)
+		{
+			return ith_patch(0).gridfn_data_array(gfn);
+		}
+		const fp *ghosted_gridfn_data(int ghosted_gfn) const
+		{
+			return ith_patch(0).ghosted_gridfn_data_array(ghosted_gfn);
+		}
+		fp *ghosted_gridfn_data(int ghosted_gfn)
+		{
+			return ith_patch(0).ghosted_gridfn_data_array(ghosted_gfn);
+		}
+
+		//
+		// ***** constructor, destructor *****
+		//
+		// This constructor doesn't support the full generality of the
+		// patch data structures (which would, eg, allow ghost_zone_width
+		// and patch_extend_width and the interpolator parameters to vary
+		// from ghost zone to ghost zone, and the grid spacings to vary
+		// from patch to patch.  But in practice we'd probably never
+		// use that generality...
+		//
+	public:
+		patch_system(fp origin_x_in, fp origin_y_in, fp origin_z_in,
+					 enum patch_system_type type_in,
+					 int ghost_zone_width, int patch_overlap_width,
+					 int N_zones_per_right_angle,
+					 int min_gfn_in, int max_gfn_in,
+					 int ghosted_min_gfn_in, int ghosted_max_gfn_in,
+					 int ip_interp_handle_in, int ip_interp_par_table_handle_in,
+					 int surface_interp_handle_in,
+					 int surface_interp_par_table_handle_in,
+					 bool print_summary_msg_flag, bool print_detailed_msg_flag);
+		~patch_system();
+
+		//
+		// ***** helper functions for constructor *****
+		//
+	private:
+		// construct patches as described by patch_info[] array,
+		// and link them into the patch system
+		// does *NOT* create ghost zones
+		// does *NOT* set up gridfns
+		void create_patches(const struct patch_info patch_info_in[],
+							int ghost_zone_width, int patch_extend_width,
+							int N_zones_per_right_angle,
+							bool print_msg_flag);
+
+		// setup all gridfns with contiguous-across-patches storage
+		void setup_gridfn_storage(int min_gfn_in, int max_gfn_in,
+								  int ghosted_min_gfn_in, int ghosted_max_gfn_in,
+								  bool print_msg_flag);
+
+		// setup (create/interlink) all ghost zones
+		void setup_ghost_zones__full_sphere(int patch_overlap_width,
+											int ip_interp_handle, int ip_interp_par_table_handle,
+											bool print_msg_flag);
+		void setup_ghost_zones__plus_z_hemisphere(int patch_overlap_width,
+												  int ip_interp_handle, int ip_interp_par_table_handle,
+												  bool print_msg_flag);
+		void setup_ghost_zones__plus_xy_quadrant_mirrored(int patch_overlap_width,
+														  int ip_interp_handle, int ip_interp_par_table_handle,
+														  bool print_msg_flag);
+		void setup_ghost_zones__plus_xy_quadrant_rotating(int patch_overlap_width,
+														  int ip_interp_handle, int ip_interp_par_table_handle,
+														  bool print_msg_flag);
+		void setup_ghost_zones__plus_xz_quadrant_mirrored(int patch_overlap_width,
+														  int ip_interp_handle, int ip_interp_par_table_handle,
+														  bool print_msg_flag);
+		void setup_ghost_zones__plus_xz_quadrant_rotating(int patch_overlap_width,
+														  int ip_interp_handle, int ip_interp_par_table_handle,
+														  bool print_msg_flag);
+		void setup_ghost_zones__plus_xyz_octant_mirrored(int patch_overlap_width,
+														 int ip_interp_handle, int ip_interp_par_table_handle,
+														 bool print_msg_flag);
+		void setup_ghost_zones__plus_xyz_octant_rotating(int patch_overlap_width,
+														 int ip_interp_handle, int ip_interp_par_table_handle,
+														 bool print_msg_flag);
+
+		// create/interlink a pair of periodic-symmetry ghost zones
+		static void create_periodic_symmetry_ghost_zones(const patch_edge &ex, const patch_edge &ey,
+														 bool ipar_map_is_plus);
+
+		// construct a pair of interpatch ghost zones
+		// ... automagically figures out which edges are adjacent
+		static void create_interpatch_ghost_zones(patch &px, patch &py,
+												  int patch_overlap_width);
+
+		// finish setup of a pair of interpatch ghost zones
+		// ... automagically figures out which edges are adjacent
+		static void finish_interpatch_setup(patch &px, patch &py,
+											int patch_overlap_width,
+											int ip_interp_handle, int ip_interp_par_table_handle);
+
+		// assert() that all ghost zones of all patches are fully setup
+		void assert_all_ghost_zones_fully_setup() const;
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		patch_system(const patch_system &rhs);
+		patch_system &operator=(const patch_system &rhs);
+
+	private:
+		// local <--> global coordinate mapping
+		global_coords global_coords_;
+
+		// meta-info about patch system
+		enum patch_system_type type_;
+		int N_patches_;
+		int N_grid_points_, ghosted_N_grid_points_;
+
+		// [pn] = --> individual patches
+		// *** constructor initialization list ordering:
+		// *** this must be declared after  N_patches_
+		patch **all_patches_;
+
+		// [pn] = starting grid point number of individual patches
+		// ... arrays are actually of size N_patches_+1, the [N_patches_]
+		//     entries are == N_grid_points_ and ghosted_N_grid_points_
+		// *** constructor initialization list ordering:
+		// *** these must be declared after  N_patches_
+		int *starting_gpn_;
+		int *ghosted_starting_gpn_;
+
+		// pointers to storage blocks for all gridfns
+		// ... patches point into these, but we own the storage blocks
+		fp *gridfn_storage_;
+		fp *ghosted_gridfn_storage_;
+
+		// min/max m over all ghost zone points
+		mutable int global_min_ym_, global_max_ym_;
+
+		// info about the surface interpolator
+		// ... used only by radius_in_local_xyz_direction()
+		int surface_interp_handle_, surface_interp_par_table_handle_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /*  TPATCH_SYSTEM_H  */
--- a/AMSS_NCKU_source/AHF_Direct/patch_system_info.h
+++ b/AMSS_NCKU_source/AHF_Direct/patch_system_info.h
@@ -0,0 +1,183 @@
+#ifndef TPATCH_SYSTEM_INFO_H
+#define TPATCH_SYSTEM_INFO_H
+namespace AHFinderDirect
+{
+
+	//******************************************************************************
+
+	//
+	// This namespace contains static data describing the patch sizes and
+	// shapes for each type of patch system.  Since this data only describes
+	// the patch sizes/shapes, we don't distinguish between the different
+	// boundary conditions.
+	//
+
+	namespace patch_system_info
+	{
+		//
+		// full-sphere patch system
+		// ... covers all 4pi steradians
+		//
+		namespace full_sphere
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				patch_number__mx,
+				patch_number__my,
+				patch_number__mz,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (90 x 90 degrees): dmu [ -45,    45], dnu  [ -45,    45]
+				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, -45.0, 45.0},
+
+				// +x patch (90 x 90 degrees): dnu [  45,   135], dphi [ -45,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 135.0, -45.0, 45.0},
+
+				// +y patch (90 x 90 degrees): dmu [  45,   135], dphi [  45,   135]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 135.0, 45.0, 135.0},
+
+				// -x patch (90 x 90 degrees): dnu [-135,   -45], dphi [ 135,   225]
+				{"-x", patch::patch_is_minus, 'x', -135.0, -45.0, 135.0, 225.0},
+
+				// -y patch (90 x 90 degrees): dmu [-135,   -45], dphi [-135,   -45]
+				{"-y", patch::patch_is_minus, 'y', -135.0, -45.0, -135.0, -45.0},
+
+				// -z patch (90 x 90 degrees): dmu [ 135,   225], dnu  [ 135,   225]
+				{"-z", patch::patch_is_minus, 'z', 135.0, 225.0, 135.0, 225.0},
+			};
+		} // namespace patch_system_info::full_sphere
+
+		//
+		// +z hemisphere (half) patch system
+		// ... mirror symmetry across z=0 plane
+		//
+		namespace plus_z_hemisphere
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				patch_number__mx,
+				patch_number__my,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (90 x 90 degrees): dmu [ -45,    45], dnu  [ -45,    45]
+				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, -45.0, 45.0},
+
+				// +x patch (45 x 90 degrees): dnu [  45,    90], dphi [ -45,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, -45.0, 45.0},
+
+				// +y patch (45 x 90 degrees): dmu [  45,    90], dphi [  45,   135]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 135.0},
+
+				// -x patch (45 x 90 degrees): dnu [ -90,   -45], dphi [ 135,   225]
+				{"-x", patch::patch_is_minus, 'x', -90.0, -45.0, 135.0, 225.0},
+
+				// -y patch (45 x 90 degrees): dmu [ -90,   -45], dphi [-135,   -45]
+				{"-y", patch::patch_is_minus, 'y', -90.0, -45.0, -135.0, -45.0},
+			};
+		} // namespace patch_system_info::plus_z_hemisphere
+
+		//
+		// +[xy] "vertical" quarter-grid (quadrant) patch system
+		// two types of boundary conditions:
+		// ... mirror symmetry across x=0 and y=0 planes
+		// ... 90 degree periodic rotation symmetry about z axis
+		//
+		namespace plus_xy_quadrant
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				patch_number__mz,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (45 x 45 degrees): dmu [   0,    45], dnu  [   0,    45]
+				{"+z", patch::patch_is_plus, 'z', 0.0, 45.0, 0.0, 45.0},
+
+				// +x patch (90 x 45 degrees): dnu [  45,   135], dphi [   0,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 135.0, 0.0, 45.0},
+
+				// +y patch (90 x 45 degrees): dmu [  45,   135], dphi [  45,    90]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 135.0, 45.0, 90.0},
+
+				// -z patch (45 x 45 degrees): dmu [ 135,   180], dnu  [ 135,   180]
+				{"-z", patch::patch_is_minus, 'z', 135.0, 180.0, 135.0, 180.0},
+			};
+		} // namespace patch_system_info::plus_xy_quadrant
+
+		//
+		// +[xz] "horizontal" quarter-grid (quadrant) patch system
+		// two types of boundary conditions
+		// ... mirror symmetry across x=0 plane, z=0 plane
+		// ... 180 degree periodic rotation symmetry about z axis,
+		//     mirror symmetry across z=0 plane
+		//
+		namespace plus_xz_quadrant
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				patch_number__my,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (90 x 45 degrees): dmu [ -45,    45], dnu  [   0,    45]
+				{"+z", patch::patch_is_plus, 'z', -45.0, 45.0, 0.0, 45.0},
+
+				// +x patch (45 x 90 degrees): dnu [  45,    90], dphi [ -45,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, -45.0, 45.0},
+
+				// +y patch (45 x 45 degrees): dmu [  45,    90], dphi [  45,    90]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 90.0},
+
+				// -y patch (45 x 45 degrees): dmu [ -90,   -45], dphi [ -90,   -45]
+				{"-y", patch::patch_is_minus, 'y', -90.0, -45.0, -90.0, -45.0},
+			};
+		} // namespace patch_system_info::plus_xz_quadrant_rotating
+
+		//
+		// +[xyz] (octant) patch system
+		// two types of boundary conditions:
+		// ... mirror symmetry across x=0 plane, y=0 plane, z=0 plane
+		// ... 90 degree periodic rotation symmetry about z axis,
+		//     mirror symmetry across z=0 plane
+		//
+		namespace plus_xyz_octant
+		{
+			enum
+			{
+				patch_number__pz = 0,
+				patch_number__px,
+				patch_number__py,
+				N_patches // no comma
+			};
+			static const struct patch_info patch_info_array[N_patches] = {
+				// +z patch (45 x 45 degrees): dmu [   0,    45], dnu  [   0,    45]
+				{"+z", patch::patch_is_plus, 'z', 0.0, 45.0, 0.0, 45.0},
+
+				// +x patch (45 x 45 degrees): dnu [  45,    90], dphi [   0,    45]
+				{"+x", patch::patch_is_plus, 'x', 45.0, 90.0, 0.0, 45.0},
+
+				// +y patch (45 x 45 degrees): dmu [  45,    90], dphi [  45,    90]
+				{"+y", patch::patch_is_plus, 'y', 45.0, 90.0, 45.0, 90.0},
+			};
+		} // namespace patch_system_info::octant_mirrored
+
+	} // namespace patch_system_info::
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /*  TPATCH_SYSTEM_INFO_H */
--- a/AMSS_NCKU_source/AHF_Direct/round.C
+++ b/AMSS_NCKU_source/AHF_Direct/round.C
@@ -0,0 +1,38 @@
+#include <stdlib.h>
+
+#include "stdc.h"
+#include "util.h"
+
+namespace AHFinderDirect
+{
+       namespace jtutil
+       {
+              template <typename fp_t>
+              int round<fp_t>::to_integer(fp_t x)
+              {
+                     return (x >= 0.0)
+                                ? int(x + 0.5)      // eg 3.6 --> int(4.1) = 4
+                                : -int((-x) + 0.5); // eg -3.6 --> - int(4.1) = -4
+              }
+
+              template <typename fp_t>
+              int round<fp_t>::floor(fp_t x)
+              {
+                     return (x >= 0.0)
+                                ? int(x)
+                                : -ceiling(-x);
+              }
+
+              template <typename fp_t>
+              int round<fp_t>::ceiling(fp_t x)
+              {
+                     return (x >= 0.0)
+                                ? int(x) + (x != fp_t(int(x)))
+                                : -floor(-x);
+              }
+
+              template class round<float>;
+              template class round<double>;
+
+       } // namespace jtutil
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/setup.C
+++ b/AMSS_NCKU_source/AHF_Direct/setup.C
@@ -0,0 +1,188 @@
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include <mpi.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "driver.h"
+using namespace std;
+
+#include "myglobal.h"
+#include "bssn_class.h"
+
+namespace AHFinderDirect
+{
+	struct state state;
+
+	using jtutil::error_exit;
+
+	namespace
+	{
+		int allocate_horizons_to_processor(int N_procs, int my_proc,
+										   int N_horizons, bool multiproc_flag,
+										   horizon_sequence &my_hs)
+		{
+			const int N_active_procs = multiproc_flag ? Mymin(N_procs, N_horizons)
+													  : 1;
+			// Implementation note:
+			// We allocate the horizons to active processors in round-robin order.
+			//
+			int proc = 0;
+			for (int hn = 1; hn <= N_horizons; ++hn)
+			{
+				if (proc == my_proc)
+					my_hs.append_hn(hn);
+				if (++proc >= N_active_procs)
+					proc = 0;
+			}
+
+			return N_active_procs;
+		}
+	}
+
+	extern struct state state;
+
+	void AHFinderDirect_setup(MyList<var> *AHList, MyList<var> *GaugeList, bssn_class *ADM,
+							  int Symmetry, int HN, double *PhysTime)
+	{
+		enum patch_system::patch_system_type ps_type;
+
+		switch (Symmetry)
+		{
+		case 2:
+			ps_type = patch_system::patch_system__plus_xyz_octant_mirrored;
+			break;
+		case 1:
+			ps_type = patch_system::patch_system__plus_z_hemisphere;
+			break;
+		case 0:
+			ps_type = patch_system::patch_system__full_sphere;
+			break;
+		default:
+			jtutil::error_exit(ERROR_EXIT, "** Symmetry=%d is not support by AHFD yet.", Symmetry);
+		}
+
+		int nprocs = 1, myrank = 0;
+		MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+		MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+
+		state.PhysTime = PhysTime; // Synchonize the PhysTime
+		state.Symmetry = Symmetry;
+		state.AHList = AHList;
+		state.GaugeList = GaugeList;
+		state.ADM = ADM;
+		state.N_procs = nprocs;
+		state.my_proc = myrank;
+
+		state.N_horizons = HN;
+
+		//
+		// (genuine) horizon sequence for this processor
+		//
+		state.my_hs = new horizon_sequence(state.N_horizons);
+		horizon_sequence &hs = *state.my_hs;
+
+		const bool multiproc_flag = true;
+		state.N_active_procs = allocate_horizons_to_processor(state.N_procs, state.my_proc,
+															  state.N_horizons, multiproc_flag,
+															  hs);
+
+		// ... horizon numbers run from 1 to N_horizons inclusive
+		//     so the array size is N_horizons+1
+		state.AH_data_array = new AH_data *[HN + 1];
+		for (int hn = 0; hn <= HN; ++hn)
+		{
+			state.AH_data_array[hn] = NULL;
+		}
+
+		int NNP = 0, NNP_out;
+		for (int hn = 1; hn <= hs.N_horizons(); ++hn)
+		{
+			const bool genuine_flag = hs.is_hn_genuine(hn);
+			state.AH_data_array[hn] = new AH_data;
+			struct AH_data &AH_data = *state.AH_data_array[hn];
+
+			AH_data.recentering_flag = false;
+			AH_data.stop_finding = false;
+
+			// create the patch system
+			AH_data.ps_ptr = new patch_system(0, 0, 0, // just dummy set, we will recenter it when setting initial guess
+											  ps_type, 2, 1,
+											  20, 1,
+											  //			      (genuine_flag ? 53 : 0),
+											  (genuine_flag ? gfns::nominal_max_gfn
+															: gfns::skeletal_nominal_max_gfn),
+											  -1, -1,
+											  1, 1,
+											  1, 1,
+											  true, false);
+			patch_system &ps = *AH_data.ps_ptr;
+
+			if (genuine_flag)
+				ps.set_gridfn_to_constant(1.0, gfns::gfn__one);
+
+			AH_data.Jac_ptr = genuine_flag ? new Jacobian(ps) : NULL;
+
+			AH_data.surface_expansion = 0;
+
+			AH_data.initial_find_flag = genuine_flag;
+
+			AH_data.found_flag = false;
+			AH_data.BH_diagnostics_fileptr = NULL;
+
+			NNP = Mymax(NNP, AH_data.ps_ptr->N_grid_points());
+		} // end of for hn
+
+		MPI_Allreduce(&NNP, &NNP_out, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+		state.Data = new double[NNP_out * 35];
+		state.oX = new double[NNP_out];
+		state.oY = new double[NNP_out];
+		state.oZ = new double[NNP_out];
+	}
+	void AHFinderDirect_cleanup()
+	{
+		horizon_sequence &hs = *state.my_hs;
+		for (int hn = 1; hn <= hs.N_horizons(); ++hn)
+		{
+			struct AH_data &AH_data = *state.AH_data_array[hn];
+			if (AH_data.ps_ptr)
+				delete AH_data.ps_ptr;
+			if (AH_data.Jac_ptr)
+				delete AH_data.Jac_ptr;
+			delete state.AH_data_array[hn];
+		} // end of for hn
+		delete[] state.AH_data_array;
+		delete state.my_hs;
+		delete[] state.oX;
+		delete[] state.oY;
+		delete[] state.oZ;
+		delete[] state.Data;
+	}
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/stdc.h
+++ b/AMSS_NCKU_source/AHF_Direct/stdc.h
@@ -0,0 +1,24 @@
+#ifndef AHFINDERDIRECT__STDC_H
+#define AHFINDERDIRECT__STDC_H
+
+#define then /* empty */
+
+#ifdef M_PI
+#define PI M_PI
+#endif
+
+#define iabs(x_) abs(x_)
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+
+		int error_exit(int msg_level, const char *format, ...);
+
+#define ERROR_EXIT (-1)
+#define PANIC_EXIT (-2)
+	}
+}
+
+#endif /* AHFINDERDIRECT__STDC_H */
--- a/AMSS_NCKU_source/AHF_Direct/tgrid.C
+++ b/AMSS_NCKU_source/AHF_Direct/tgrid.C
@@ -0,0 +1,128 @@
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+
+namespace AHFinderDirect
+{
+
+	//*****************************************************************************
+	//*****************************************************************************
+	//*****************************************************************************
+
+	//
+	// This function constructs a  grid_arrays  object.
+	//
+	grid_arrays::grid_arrays(const grid_array_pars &grid_array_pars_in)
+
+		: gridfn_data_(NULL),
+		  ghosted_gridfn_data_(NULL),
+
+		  // these are all set properly by setup_gridfn_storage()
+		  min_gfn_(0), max_gfn_(0),
+		  ghosted_min_gfn_(0), ghosted_max_gfn_(0),
+
+		  min_irho_(grid_array_pars_in.min_irho),
+		  max_irho_(grid_array_pars_in.max_irho),
+		  min_isigma_(grid_array_pars_in.min_isigma),
+		  max_isigma_(grid_array_pars_in.max_isigma),
+
+		  ghosted_min_irho_(grid_array_pars_in.min_irho - grid_array_pars_in.min_rho_ghost_zone_width),
+		  ghosted_max_irho_(grid_array_pars_in.max_irho + grid_array_pars_in.max_rho_ghost_zone_width),
+		  ghosted_min_isigma_(grid_array_pars_in.min_isigma - grid_array_pars_in.min_sigma_ghost_zone_width),
+		  ghosted_max_isigma_(grid_array_pars_in.max_isigma + grid_array_pars_in.max_sigma_ghost_zone_width)
+	// no comma
+	{
+	}
+
+	//*****************************************************************************
+
+	//
+	// This function sets up the gridfn storage arrays in a  grid_arrays  object.
+	//
+	void grid_arrays::setup_gridfn_storage(const gridfn_pars &gridfn_pars_in,
+										   const gridfn_pars &ghosted_gridfn_pars_in)
+	{
+		assert(gridfn_data_ == NULL);
+		gridfn_data_ = new jtutil::array3d<fp>(gridfn_pars_in.min_gfn,
+											   gridfn_pars_in.max_gfn,
+											   min_irho(), max_irho(),
+											   min_isigma(), max_isigma(),
+											   gridfn_pars_in.storage_array,
+											   gridfn_pars_in.gfn_stride,
+											   gridfn_pars_in.irho_stride,
+											   gridfn_pars_in.isigma_stride);
+
+		assert(ghosted_gridfn_data_ == NULL);
+		ghosted_gridfn_data_ = new jtutil::array3d<fp>(ghosted_gridfn_pars_in.min_gfn,
+													   ghosted_gridfn_pars_in.max_gfn,
+													   ghosted_min_irho(), ghosted_max_irho(),
+													   ghosted_min_isigma(), ghosted_max_isigma(),
+													   ghosted_gridfn_pars_in.storage_array,
+													   ghosted_gridfn_pars_in.gfn_stride,
+													   ghosted_gridfn_pars_in.irho_stride,
+													   ghosted_gridfn_pars_in.isigma_stride);
+	}
+
+	//******************************************************************************
+
+	//
+	// This function destroys a  grid_arrays  object.
+	//
+	grid_arrays::~grid_arrays()
+	{
+		delete ghosted_gridfn_data_;
+		delete gridfn_data_;
+	}
+
+	//*****************************************************************************
+	//*****************************************************************************
+	//*****************************************************************************
+
+	//
+	// This function constructs a  grid  object.
+	//
+	grid::grid(const grid_array_pars &grid_array_pars_in,
+			   const grid_pars &grid_pars_in)
+
+		: grid_arrays(grid_array_pars_in),
+
+		  rho_map_(grid_array_pars_in.min_irho - grid_array_pars_in.min_rho_ghost_zone_width,
+				   grid_array_pars_in.max_irho + grid_array_pars_in.max_rho_ghost_zone_width,
+				   jtutil::radians_of_degrees(
+					   grid_pars_in.min_drho - grid_array_pars_in.min_rho_ghost_zone_width * grid_pars_in.delta_drho),
+				   jtutil::radians_of_degrees(grid_pars_in.delta_drho),
+				   jtutil::radians_of_degrees(
+					   grid_pars_in.max_drho + grid_array_pars_in.max_rho_ghost_zone_width * grid_pars_in.delta_drho)),
+
+		  sigma_map_(grid_array_pars_in.min_isigma - grid_array_pars_in.min_sigma_ghost_zone_width,
+					 grid_array_pars_in.max_isigma + grid_array_pars_in.max_sigma_ghost_zone_width,
+					 jtutil::radians_of_degrees(
+						 grid_pars_in.min_dsigma - grid_array_pars_in.min_sigma_ghost_zone_width * grid_pars_in.delta_dsigma),
+					 jtutil::radians_of_degrees(grid_pars_in.delta_dsigma),
+					 jtutil::radians_of_degrees(
+						 grid_pars_in.max_dsigma + grid_array_pars_in.max_sigma_ghost_zone_width * grid_pars_in.delta_dsigma)),
+
+		  min_rho_(jtutil::radians_of_degrees(grid_pars_in.min_drho)),
+		  max_rho_(jtutil::radians_of_degrees(grid_pars_in.max_drho)),
+		  min_sigma_(jtutil::radians_of_degrees(grid_pars_in.min_dsigma)),
+		  max_sigma_(jtutil::radians_of_degrees(grid_pars_in.max_dsigma))
+	// no comma
+	{
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+
+} // namespace AHFinderDirect
--- a/AMSS_NCKU_source/AHF_Direct/tgrid.h
+++ b/AMSS_NCKU_source/AHF_Direct/tgrid.h
@@ -0,0 +1,907 @@
+#ifndef TGRID_H
+#define TGRID_H
+namespace AHFinderDirect
+{
+
+	//*****************************************************************************
+
+	//
+	// grid_arrays - data arrays for a 2D tensor-product grid
+	//
+	// This is a helper class for class grid (below).  This class stores
+	// most of the actual grid function (gridfn) data arrays for a uniform
+	// tensor-product 2D grid.
+	//
+	// The integer grid coordinates are (irho,isigma).  This class deals
+	// with the grid solely at the level of arrays with integer subscripts;
+	// the derived class  grid  deals with the floating-point coordinates
+	// related to those subscripts.
+	//
+	// The grid has a nominal extent, surrounded by "ghost zones" on each
+	// side for finite differencing purposes.
+	//
+	// There are separate sets of nominal-grid and ghosted-grid gridfns.
+	// We identify a gridfn by a small-integer "grid function number", a.k.a.
+	// "gfn".  There are separate gfns for nominal and ghosted gridfns.
+	// In a very few places we refer to "unknown-grid" gridfns; these might
+	// be either nominal-grid or ghosted-grid.
+	//
+	// For our application (apparent horizon finding), it's useful for the
+	// storage for a single gridfn to be contiguous *across all patches*.
+	// (Note this means that the set of all our gridfns is *not* contiguous!)
+	// To accomplish this, we don't allocate the gridfns when we're created,
+	// but rather later, with a separate call  setup_gridfn_storage() .
+	// This way higher-level code can first create all patches, then count
+	// the total amount of storage used, allocate it, then finally call each
+	// patch again to set up its gridfns appropriately.
+	//
+
+	class grid_arrays
+	{
+	public:
+		//
+		// ***** {min,max}_{rho,sigma} "sides" of grid *****
+		//
+
+		//
+		// A grid has 4 (angular) "sides", which we identify as
+		// {min,max}_{rho,sigma}.  Given a side, we define coordinates
+		// (perpendicular,parallel) to it, normally abbreviated to
+		// (perp,par).
+		//
+		// As well as functions directly referring to a specific side,
+		// we also support referring to one of these chosen at run-time,
+		// via Boolean flags:
+		//
+		//	// generic (irho,isigma) coordinate
+		//	iang = want_rho ? irho : isigma
+		//
+		//	// opposite (irho,isigma) coordinate
+		//	ixang = want_rho ? isigma : irho
+		//
+		//	// generic (min,max) direction
+		//	minmax = want_min ? min : max
+		//
+		// FIXME: This system of Boolean flags works ok, but it requires
+		//	  a lot of repetitive code conditional-expression functions
+		//	  in this class.  Is there a cleaner solution?
+
+		// there are precisely this many possible sides
+		enum
+		{
+			N_sides = 4
+		};
+
+		// we specify {min,max} with a Boolean  want_min
+		// ... values for want_min
+		//     FIXME: these should really be bool, but then we couldn't
+		//            use the "enum hack" for in-class constants
+		enum
+		{
+			side_is_min = true,
+			side_is_max = false
+		};
+
+		// we specify {rho,sigma} with a Boolean  want_rho
+		// ... values for wanr_rho
+		//     FIXME: these should really be bool, but then we couldn't
+		//            use the "enum hack" for in-class constants
+		enum
+		{
+			side_is_rho = true,
+			side_is_sigma = false
+		};
+
+		// human-readable names for the sides (for debugging)
+		static const char *minmax_name(bool minmax)
+		{
+			return minmax ? "min" : "max";
+		}
+		static const char *iang_name(bool want_rho)
+		{
+			return want_rho ? "irho" : "isigma";
+		}
+
+		//
+		// ***** array info *****
+		//
+	public:
+		// nominal-grid min/max/sizes
+		int min_irho() const { return min_irho_; }
+		int max_irho() const { return max_irho_; }
+		int min_isigma() const { return min_isigma_; }
+		int max_isigma() const { return max_isigma_; }
+		int min_iang(bool want_rho) const
+		{
+			return want_rho ? min_irho() : min_isigma();
+		}
+		int max_iang(bool want_rho) const
+		{
+			return want_rho ? max_irho() : max_isigma();
+		}
+		int minmax_iang(bool want_min, bool want_rho) const
+		{
+			return want_min ? min_iang(want_rho) : max_iang(want_rho);
+		}
+		int N_irho() const
+		{
+			return jtutil::how_many_in_range(min_irho(), max_irho());
+		}
+		int N_isigma() const
+		{
+			return jtutil::how_many_in_range(min_isigma(), max_isigma());
+		}
+		int N_grid_points() const
+		{
+			return N_irho() * N_isigma();
+		}
+
+		// ghosted-grid min/max/sizes
+		int ghosted_min_irho() const { return ghosted_min_irho_; }
+		int ghosted_max_irho() const { return ghosted_max_irho_; }
+		int ghosted_min_isigma() const
+		{
+			return ghosted_min_isigma_;
+		}
+		int ghosted_max_isigma() const
+		{
+			return ghosted_max_isigma_;
+		}
+		int ghosted_min_iang(bool want_rho) const
+		{
+			return want_rho ? ghosted_min_irho()
+							: ghosted_min_isigma();
+		}
+		int ghosted_max_iang(bool want_rho) const
+		{
+			return want_rho ? ghosted_max_irho()
+							: ghosted_max_isigma();
+		}
+		int ghosted_minmax_iang(bool want_min, bool want_rho) const
+		{
+			return want_min ? ghosted_min_iang(want_rho)
+							: ghosted_max_iang(want_rho);
+		}
+		int ghosted_N_irho() const
+		{
+			return jtutil::how_many_in_range(ghosted_min_irho(),
+											 ghosted_max_irho());
+		}
+		int ghosted_N_isigma() const
+		{
+			return jtutil::how_many_in_range(ghosted_min_isigma(),
+											 ghosted_max_isigma());
+		}
+		int ghosted_N_grid_points() const
+		{
+			return ghosted_N_irho() * ghosted_N_isigma();
+		}
+
+		// "effective" grid min/max/sizes
+		// (= dynamic select between nominal and full grids)
+		int effective_min_irho(bool want_ghost_zones) const
+		{
+			return want_ghost_zones ? ghosted_min_irho() : min_irho();
+		}
+		int effective_max_irho(bool want_ghost_zones) const
+		{
+			return want_ghost_zones ? ghosted_max_irho() : max_irho();
+		}
+		int effective_min_isigma(bool want_ghost_zones) const
+		{
+			return want_ghost_zones ? ghosted_min_isigma() : min_isigma();
+		}
+		int effective_max_isigma(bool want_ghost_zones) const
+		{
+			return want_ghost_zones ? ghosted_max_isigma() : max_isigma();
+		}
+		int effective_N_irho(bool want_ghost_zones) const
+		{
+			return want_ghost_zones ? ghosted_N_irho() : N_irho();
+		}
+		int effective_N_isigma(bool want_ghost_zones) const
+		{
+			return want_ghost_zones ? ghosted_N_isigma() : N_isigma();
+		}
+
+		//
+		// ***** ghost zones *****
+		//
+	public:
+		// ghost zone min/max perpendicular coordinates
+		int min_rho_ghost_zone__min_iperp() const
+		{
+			return ghosted_min_irho();
+		}
+		int min_rho_ghost_zone__max_iperp() const
+		{
+			return min_irho() - 1;
+		}
+		int max_rho_ghost_zone__min_iperp() const
+		{
+			return max_irho() + 1;
+		}
+		int max_rho_ghost_zone__max_iperp() const
+		{
+			return ghosted_max_irho();
+		}
+		int min_sigma_ghost_zone__min_iperp() const
+		{
+			return ghosted_min_isigma();
+		}
+		int min_sigma_ghost_zone__max_iperp() const
+		{
+			return min_isigma() - 1;
+		}
+		int max_sigma_ghost_zone__min_iperp() const
+		{
+			return max_isigma() + 1;
+		}
+		int max_sigma_ghost_zone__max_iperp() const
+		{
+			return ghosted_max_isigma();
+		}
+		int minmax_ang_ghost_zone__min_iperp(bool want_min, bool want_rho) const
+		{
+			return want_min
+					   ? (want_rho ? min_rho_ghost_zone__min_iperp()
+								   : min_sigma_ghost_zone__min_iperp())
+					   : (want_rho ? max_rho_ghost_zone__min_iperp()
+								   : max_sigma_ghost_zone__min_iperp());
+		}
+		int minmax_ang_ghost_zone__max_iperp(bool want_min, bool want_rho) const
+		{
+			return want_min
+					   ? (want_rho ? min_rho_ghost_zone__max_iperp()
+								   : min_sigma_ghost_zone__max_iperp())
+					   : (want_rho ? max_rho_ghost_zone__max_iperp()
+								   : max_sigma_ghost_zone__max_iperp());
+		}
+
+		// ghost zone min/max parallel coordinates
+		// ... not including corners
+		int rho_ghost_zone_without_corners__min_ipar() const
+		{
+			return min_isigma();
+		}
+		int rho_ghost_zone_without_corners__max_ipar() const
+		{
+			return max_isigma();
+		}
+		int sigma_ghost_zone_without_corners__min_ipar() const
+		{
+			return min_irho();
+		}
+		int sigma_ghost_zone_without_corners__max_ipar() const
+		{
+			return max_irho();
+		}
+		int ang_ghost_zone_without_corners__min_ipar(bool want_rho) const
+		{
+			return want_rho ? rho_ghost_zone_without_corners__min_ipar()
+							: sigma_ghost_zone_without_corners__min_ipar();
+		}
+		int ang_ghost_zone_without_corners__max_ipar(bool want_rho) const
+		{
+			return want_rho ? rho_ghost_zone_without_corners__max_ipar()
+							: sigma_ghost_zone_without_corners__max_ipar();
+		}
+		// ... including corners
+		int rho_ghost_zone_with_corners__min_ipar() const
+		{
+			return ghosted_min_isigma();
+		}
+		int rho_ghost_zone_with_corners__max_ipar() const
+		{
+			return ghosted_max_isigma();
+		}
+		int sigma_ghost_zone_with_corners__min_ipar() const
+		{
+			return ghosted_min_irho();
+		}
+		int sigma_ghost_zone_with_corners__max_ipar() const
+		{
+			return ghosted_max_irho();
+		}
+		int ang_ghost_zone_with_corners__min_ipar(bool want_rho) const
+		{
+			return want_rho ? rho_ghost_zone_with_corners__min_ipar()
+							: sigma_ghost_zone_with_corners__min_ipar();
+		}
+		int ang_ghost_zone_with_corners__max_ipar(bool want_rho) const
+		{
+			return want_rho ? rho_ghost_zone_with_corners__max_ipar()
+							: sigma_ghost_zone_with_corners__max_ipar();
+		}
+
+		//
+		// ***** grid-point validity and membership predicates *****
+		//
+	public:
+		bool is_valid_irho(int irho) const
+		{
+			return (irho >= min_irho()) && (irho <= max_irho());
+		}
+		bool is_valid_isigma(int isigma) const
+		{
+			return (isigma >= min_isigma()) && (isigma <= max_isigma());
+		}
+		bool is_in_nominal_grid(int irho, int isigma) const
+		{
+			return is_valid_irho(irho) && is_valid_isigma(isigma);
+		}
+
+		bool is_valid_ghosted_irho(int irho) const
+		{
+			return (irho >= ghosted_min_irho()) && (irho <= ghosted_max_irho());
+		}
+		bool is_valid_ghosted_isigma(int isigma) const
+		{
+			return (isigma >= ghosted_min_isigma()) && (isigma <= ghosted_max_isigma());
+		}
+		bool is_in_ghosted_grid(int irho, int isigma) const
+		{
+			return is_valid_ghosted_irho(irho) && is_valid_ghosted_isigma(isigma);
+		}
+
+		bool is_in_ghost_zone(int irho, int isigma) const
+		{
+			return is_in_ghosted_grid(irho, isigma) && !is_in_nominal_grid(irho, isigma);
+		}
+
+		//
+		// ***** gfn ranges and validity predicates *****
+		//
+	public:
+		// gfn ranges
+		int min_gfn() const
+		{
+			assert(gridfn_data_ != NULL);
+			return (*gridfn_data_).min_i();
+		}
+		int max_gfn() const
+		{
+			assert(gridfn_data_ != NULL);
+			return (*gridfn_data_).max_i();
+		}
+		int N_gridfns() const
+		{
+			return jtutil::how_many_in_range(min_gfn(), max_gfn());
+		}
+		int ghosted_min_gfn() const
+		{
+			assert(ghosted_gridfn_data_ != NULL);
+			return (*ghosted_gridfn_data_).min_i();
+		}
+		int ghosted_max_gfn() const
+		{
+			assert(ghosted_gridfn_data_ != NULL);
+			return (*ghosted_gridfn_data_).max_i();
+		}
+		int ghosted_N_gridfns() const
+		{
+			return jtutil::how_many_in_range(ghosted_min_gfn(),
+											 ghosted_max_gfn());
+		}
+
+		// gfn validity predicates
+		bool is_valid_gfn(int gfn) const
+		{
+			return (gfn >= min_gfn()) && (gfn <= max_gfn());
+		}
+		bool is_valid_ghosted_gfn(int gfn) const
+		{
+			return (gfn >= ghosted_min_gfn()) && (gfn <= ghosted_max_gfn());
+		}
+
+		//
+		// ***** gridfns *****
+		//
+		// n.b. access to rvalue gridfn data must be via references
+		//	in order to allow using  gridfn(...)  as the operand
+		//	of a unary & (address-of) operator
+		//
+	public:
+		// access to nominal-grid gridfn data
+		// ... rvalue
+		const fp &gridfn(int gfn, int irho, int isigma) const
+		{
+			assert(gridfn_data_ != NULL);
+			return (*gridfn_data_)(gfn, irho, isigma);
+		}
+		// ... lvalue
+		fp &gridfn(int gfn, int irho, int isigma)
+		{
+			assert(gridfn_data_ != NULL);
+			return (*gridfn_data_)(gfn, irho, isigma);
+		}
+
+		// access to ghosted-grid gridfn data
+		// ... rvalue
+		const fp &ghosted_gridfn(int gfn, int irho, int isigma) const
+		{
+			assert(gridfn_data_ != NULL);
+			return (*ghosted_gridfn_data_)(gfn, irho, isigma);
+		}
+		// ... lvalue
+		fp &ghosted_gridfn(int gfn, int irho, int isigma)
+		{
+			assert(gridfn_data_ != NULL);
+			return (*ghosted_gridfn_data_)(gfn, irho, isigma);
+		}
+
+		// access to unknown-grid gridfn data
+		// (either nominal or ghosted, depending on Boolean flag)
+		// ... rvalue
+		const fp &unknown_gridfn(bool ghosted_flag,
+								 int unknown_gfn, int irho, int isigma)
+			const
+		{
+			return ghosted_flag ? ghosted_gridfn(unknown_gfn, irho, isigma)
+								: gridfn(unknown_gfn, irho, isigma);
+		}
+		// ... lvalue
+		fp &unknown_gridfn(bool ghosted_flag,
+						   int unknown_gfn, int irho, int isigma)
+		{
+			return ghosted_flag ? ghosted_gridfn(unknown_gfn, irho, isigma)
+								: gridfn(unknown_gfn, irho, isigma);
+		}
+
+		// subscripting info
+		int gfn_stride() const
+		{
+			assert(gridfn_data_ != NULL);
+			return gridfn_data_->subscript_stride_i();
+		}
+		int irho_stride() const
+		{
+			assert(gridfn_data_ != NULL);
+			return gridfn_data_->subscript_stride_j();
+		}
+		int isigma_stride() const
+		{
+			assert(gridfn_data_ != NULL);
+			return gridfn_data_->subscript_stride_k();
+		}
+		int iang_stride(bool want_rho) const
+		{
+			return want_rho ? irho_stride() : isigma_stride();
+		}
+		int ghosted_gfn_stride() const
+		{
+			assert(ghosted_gridfn_data_ != NULL);
+			return ghosted_gridfn_data_->subscript_stride_i();
+		}
+		int ghosted_irho_stride() const
+		{
+			assert(ghosted_gridfn_data_ != NULL);
+			return ghosted_gridfn_data_->subscript_stride_j();
+		}
+		int ghosted_isigma_stride() const
+		{
+			assert(ghosted_gridfn_data_ != NULL);
+			return ghosted_gridfn_data_->subscript_stride_k();
+		}
+		int ghosted_iang_stride(bool want_rho) const
+		{
+			return want_rho ? ghosted_irho_stride()
+							: ghosted_isigma_stride();
+		}
+
+		// validity predicates for 1-D 0-origin grid point number (gpn)
+		bool is_valid_gpn(int gpn) const
+		{
+			return (gpn >= 0) && (gpn < N_grid_points());
+		}
+		bool is_valid_ghosted_gpn(int gpn) const
+		{
+			return (gpn >= 0) && (gpn < ghosted_N_grid_points());
+		}
+
+		// convert (irho,isigma) <--> 1-D 0-origin grid point number (gpn)
+		int gpn_of_irho_isigma(int irho, int isigma) const
+		{
+			assert(is_valid_irho(irho));
+			assert(is_valid_isigma(isigma));
+
+			return (irho - min_irho()) * irho_stride() + (isigma - min_isigma()) * isigma_stride();
+		}
+		int ghosted_gpn_of_irho_isigma(int irho, int isigma) const
+		{
+			assert(is_valid_ghosted_irho(irho));
+			assert(is_valid_ghosted_isigma(isigma));
+			return (irho - ghosted_min_irho()) * ghosted_irho_stride() + (isigma - ghosted_min_isigma()) * ghosted_isigma_stride();
+		}
+		// ... current implementation assumes (& verifies) isigma is contiguous
+		void irho_isigma_of_gpn(int gpn, int &irho, int &isigma) const
+		{
+			assert(is_valid_gpn(gpn));
+			assert(isigma_stride() == 1); // implementation restriction
+			irho = min_irho() + gpn / N_isigma();
+			isigma = min_isigma() + gpn % N_isigma();
+			assert(is_valid_irho(irho));
+			assert(is_valid_isigma(isigma));
+		}
+		// ... current implementation assumes (& verifies) isigma is contiguous
+		void ghosted_irho_isigma_of_gpn(int gpn, int &irho, int &isigma) const
+		{
+			assert(is_valid_ghosted_gpn(gpn));
+			assert(ghosted_isigma_stride() == 1); // implementation
+												  // restriction
+			irho = ghosted_min_irho() + gpn / ghosted_N_isigma();
+			isigma = ghosted_min_isigma() + gpn % ghosted_N_isigma();
+			assert(is_valid_ghosted_irho(irho));
+			assert(is_valid_ghosted_isigma(isigma));
+		}
+
+		// low-level access to data arrays (!!dangerous!!)
+		const fp *gridfn_data_array(int gfn) const
+		{
+			return &gridfn(gfn, min_irho(), min_isigma());
+		}
+		fp *gridfn_data_array(int gfn)
+		{
+			return &gridfn(gfn, min_irho(), min_isigma());
+		}
+		const fp *ghosted_gridfn_data_array(int ghosted_gfn) const
+		{
+			return &ghosted_gridfn(ghosted_gfn, ghosted_min_irho(),
+								   ghosted_min_isigma());
+		}
+		fp *ghosted_gridfn_data_array(int ghosted_gfn)
+		{
+			return &ghosted_gridfn(ghosted_gfn, ghosted_min_irho(),
+								   ghosted_min_isigma());
+		}
+
+		//
+		// ***** argument structures for constructor et al *****
+		//
+	public:
+		// these structures bundle related arguments together so we don't
+		// have 20+ (!) separate arguments to our top-level constructors
+		struct grid_array_pars
+		{
+			int min_irho, max_irho;
+			int min_isigma, max_isigma;
+			int min_rho_ghost_zone_width, max_rho_ghost_zone_width;
+			int min_sigma_ghost_zone_width, max_sigma_ghost_zone_width;
+		};
+		struct gridfn_pars
+		{
+			int min_gfn, max_gfn;
+
+			// gridfn storage will be automatically allocated
+			// if pointer is NULL; any 0 strides are automatically
+			// set to C-style row-major subscripting
+			fp *storage_array;
+			int gfn_stride, irho_stride, isigma_stride;
+		};
+
+		//
+		// ***** constructor, gridfn setup, destructor *****
+		//
+	public:
+		// construct with no gridfns
+		grid_arrays(const grid_array_pars &grid_array_pars_in);
+
+		// set up storage for gridfns
+		void setup_gridfn_storage(const gridfn_pars &gridfn_pars_in,
+								  const gridfn_pars &ghosted_gridfn_pars_in);
+
+		~grid_arrays();
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		grid_arrays(const grid_arrays &rhs);
+		grid_arrays &operator=(const grid_arrays &rhs);
+
+	private:
+		//
+		// ***** the actual gridfn storage arrays *****
+		//
+		// n.b. these pointers are *first* data member in this class
+		// ==> possibly slightly faster access (0 offset from pointer)
+		// ... indices are (gfn, irho, isigma)
+		jtutil::array3d<fp> *gridfn_data_;
+		jtutil::array3d<fp> *ghosted_gridfn_data_;
+
+		// gfn bounds
+		const int min_gfn_, max_gfn_;
+		const int ghosted_min_gfn_, ghosted_max_gfn_;
+
+		// nominal grid min/max bounds
+		const int min_irho_, max_irho_;
+		const int min_isigma_, max_isigma_;
+
+		// full grid min/max bounds
+		const int ghosted_min_irho_, ghosted_max_irho_;
+		const int ghosted_min_isigma_, ghosted_max_isigma_;
+	};
+
+	//******************************************************************************
+
+	//
+	// grid - uniform 2D tensor-product grid
+	//
+	// The grid is uniform in the floating point grid coordinates (rho,sigma).
+	// There is also some (limited) support for expressing these coordinates
+	// in degrees (drho,dsigma); this is useful for humans trying to specify
+	// things in parameter files.
+	//
+	// The nominal (not including the ghost zones) angular grid boundaries
+	// may coincide with grid points, or they may be at "half-integer" grid
+	// coordinates.  That is, suppose we have a unit grid spacing, and a boundary
+	// at an angular coordinate of 0; then the grid may be either 0, 1, 2, ...,
+	// or 0.5, 1.5, 2.5, ... .
+	//
+
+	class grid
+		: public grid_arrays
+	{
+		//
+		// ***** low-level access to coordinate maps *****
+		//
+	public:
+		// direct (read-only) access to the underlying linear_map objects
+		// ... useful for (eg) passing to interpolators
+		const jtutil::linear_map<fp> &rho_map() const { return rho_map_; }
+		const jtutil::linear_map<fp> &sigma_map() const { return sigma_map_; }
+		const jtutil::linear_map<fp> &ang_map(bool want_rho) const
+		{
+			return want_rho ? rho_map() : sigma_map();
+		}
+
+		//
+		// ***** single-axis coordinate conversions *****
+		//
+	public:
+		// ... angles in radians
+		fp rho_of_irho(int irho) const { return rho_map().fp_of_int(irho); }
+		fp sigma_of_isigma(int isigma) const
+		{
+			return sigma_map().fp_of_int(isigma);
+		}
+		fp ang_of_iang(bool want_rho, int iang) const
+		{
+			return want_rho ? rho_of_irho(iang)
+							: sigma_of_isigma(iang);
+		}
+
+		fp fp_irho_of_rho(fp rho) const
+		{
+			return rho_map().fp_int_of_fp(rho);
+		}
+		int irho_of_rho(fp rho, jtutil::linear_map<fp>::noninteger_action
+									nia = jtutil::linear_map<fp>::nia_error)
+			const
+		{
+			return rho_map().int_of_fp(rho, nia);
+		}
+		fp fp_isigma_of_sigma(fp sigma) const
+		{
+			return sigma_map().fp_int_of_fp(sigma);
+		}
+		int isigma_of_sigma(fp sigma, jtutil::linear_map<fp>::noninteger_action
+										  nia = jtutil::linear_map<fp>::nia_error)
+			const
+		{
+			return sigma_map().int_of_fp(sigma, nia);
+		}
+		fp fp_iang_of_ang(bool want_rho, fp ang)
+			const
+		{
+			return want_rho ? fp_irho_of_rho(ang)
+							: fp_isigma_of_sigma(ang);
+		}
+		int iang_of_ang(bool want_rho,
+						fp ang, jtutil::linear_map<fp>::noninteger_action nia = jtutil::linear_map<fp>::nia_error)
+			const
+		{
+			return want_rho ? irho_of_rho(ang, nia)
+							: isigma_of_sigma(ang, nia);
+		}
+
+		// ... angles in degrees
+		fp rho_of_drho(fp drho) const
+		{
+			return jtutil::radians_of_degrees(drho);
+		}
+		fp sigma_of_dsigma(fp dsigma) const
+		{
+			return jtutil::radians_of_degrees(dsigma);
+		}
+		fp drho_of_rho(fp rho) const
+		{
+			return jtutil::degrees_of_radians(rho);
+		}
+		fp dsigma_of_sigma(fp sigma) const
+		{
+			return jtutil::degrees_of_radians(sigma);
+		}
+		fp drho_of_irho(int irho) const
+		{
+			return jtutil::degrees_of_radians(rho_of_irho(irho));
+		}
+		fp dsigma_of_isigma(int isigma) const
+		{
+			return jtutil::degrees_of_radians(sigma_of_isigma(isigma));
+		}
+
+		int irho_of_drho(fp drho, jtutil::linear_map<fp>::noninteger_action
+									  nia = jtutil::linear_map<fp>::nia_error)
+			const
+		{
+			return irho_of_rho(jtutil::radians_of_degrees(drho), nia);
+		}
+		int isigma_of_dsigma(fp dsigma,
+							 jtutil::linear_map<fp>::noninteger_action
+								 nia = jtutil::linear_map<fp>::nia_error)
+			const
+		{
+			return isigma_of_sigma(jtutil::radians_of_degrees(dsigma), nia);
+		}
+
+		//
+		// ***** grid info *****
+		//
+	public:
+		// grid spacings
+		fp delta_rho() const { return rho_map().delta_fp(); }
+		fp delta_sigma() const { return sigma_map().delta_fp(); }
+		fp delta_drho() const
+		{
+			return jtutil::degrees_of_radians(delta_rho());
+		}
+		fp delta_dsigma() const
+		{
+			return jtutil::degrees_of_radians(delta_sigma());
+		}
+		fp delta_ang(bool want_rho) const
+		{
+			return want_rho ? delta_rho() : delta_sigma();
+		}
+		fp delta_dang(bool want_rho) const
+		{
+			return want_rho ? delta_drho() : delta_dsigma();
+		}
+
+		// inverse grid spacings
+		fp inverse_delta_rho() const { return rho_map().inverse_delta_fp(); }
+		fp inverse_delta_sigma() const
+		{
+			return sigma_map().inverse_delta_fp();
+		}
+
+		// nominal grid min/max
+		fp min_rho() const { return min_rho_; }
+		fp max_rho() const { return max_rho_; }
+		fp min_sigma() const { return min_sigma_; }
+		fp max_sigma() const { return max_sigma_; }
+		fp minmax_ang(bool want_min, bool want_rho) const
+		{
+			return want_min ? (want_rho ? min_rho() : min_sigma())
+							: (want_rho ? max_rho() : max_sigma());
+		}
+		fp min_drho() const { return jtutil::degrees_of_radians(min_rho()); }
+		fp max_drho() const { return jtutil::degrees_of_radians(max_rho()); }
+		fp min_dsigma() const
+		{
+			return jtutil::degrees_of_radians(min_sigma());
+		}
+		fp max_dsigma() const
+		{
+			return jtutil::degrees_of_radians(max_sigma());
+		}
+		fp min_dang(bool want_rho) const
+		{
+			return want_rho ? min_drho() : min_dsigma();
+		}
+		fp max_dang(bool want_rho) const
+		{
+			return want_rho ? max_drho() : max_dsigma();
+		}
+
+		// ghosted-grid min/max
+		fp ghosted_min_rho() const
+		{
+			return rho_of_irho(ghosted_min_irho());
+		}
+		fp ghosted_max_rho() const
+		{
+			return rho_of_irho(ghosted_max_irho());
+		}
+		fp ghosted_min_sigma() const
+		{
+			return sigma_of_isigma(ghosted_min_isigma());
+		}
+		fp ghosted_max_sigma() const
+		{
+			return sigma_of_isigma(ghosted_max_isigma());
+		}
+
+		// is a given (drho,dsigma) within the grid?
+		bool is_valid_drho(fp drho) const
+		{
+			return jtutil::fuzzy<fp>::GE(drho, min_drho()) && jtutil::fuzzy<fp>::LE(drho, max_drho());
+		}
+		bool is_valid_dsigma(fp dsigma) const
+		{
+			return jtutil::fuzzy<fp>::GE(dsigma, min_dsigma()) && jtutil::fuzzy<fp>::LE(dsigma, max_dsigma());
+		}
+
+		// reduce a rho/sigma coordinate modulo 2*pi radians (360 degrees)
+		// to be within the ghosted grid,
+		// or error_exit() if no such value exists
+		fp modulo_reduce_rho(fp rho_in) const
+		{
+			return local_coords ::modulo_reduce_ang(rho_in, ghosted_min_rho(),
+													ghosted_max_rho());
+		}
+		fp modulo_reduce_sigma(fp sigma_in) const
+		{
+			return local_coords ::modulo_reduce_ang(sigma_in, ghosted_min_sigma(),
+													ghosted_max_sigma());
+		}
+		fp modulo_reduce_ang(bool want_rho, fp ang_in) const
+		{
+			return want_rho ? modulo_reduce_rho(ang_in)
+							: modulo_reduce_sigma(ang_in);
+		}
+
+		//
+		// ***** misc stuff *****
+		//
+	public:
+		// human-readable names for the sides (for debugging)
+		static const char *ang_name(bool want_rho)
+		{
+			return want_rho ? "rho" : "sigma";
+		}
+		static const char *dang_name(bool want_rho)
+		{
+			return want_rho ? "drho" : "dsigma";
+		}
+
+		//
+		// ***** argument structure for constructor *****
+		//
+
+		// this structure bundles related arguments together so we don't
+		// have 20+ (!) separate arguments to our top-level constructors
+		struct grid_pars // *** note angles in degrees ***
+		{
+			fp min_drho, delta_drho, max_drho;
+			fp min_dsigma, delta_dsigma, max_dsigma;
+		};
+
+		//
+		// ***** constructor, destructor *****
+		//
+		grid(const grid_array_pars &grid_array_pars_in,
+			 const grid_pars &grid_pars_in);
+		// compiler-generated default destructor is ok
+
+	private:
+		// we forbid copying and passing by value
+		// by declaring the copy constructor and assignment operator
+		// private, but never defining them
+		grid(const grid &rhs);
+		grid &operator=(const grid &rhs);
+
+	private:
+		// range of these is the full grid (including ghost zones)
+		const jtutil::linear_map<fp> rho_map_, sigma_map_;
+
+		// angular boundaries of nominal grid
+		const fp min_rho_, max_rho_;
+		const fp min_sigma_, max_sigma_;
+	};
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif /* TGRID_H  */
--- a/AMSS_NCKU_source/AHF_Direct/util.h
+++ b/AMSS_NCKU_source/AHF_Direct/util.h
@@ -0,0 +1,157 @@
+#ifndef AHFINDERDIRECT__UTIL_HH
+#define AHFINDERDIRECT__UTIL_HH
+#ifdef newc
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <strstream>
+#include <cmath>
+using namespace std;
+#else
+#include <iostream.h>
+#include <iomanip.h>
+#include <fstream.h>
+#include <string.h>
+#include <math.h>
+#endif
+
+#define PI M_PI
+
+namespace AHFinderDirect
+{
+	namespace jtutil
+	{
+		inline int how_many_in_range(int low, int high) { return high - low + 1; }
+
+		inline int is_even(int i) { return !(i & 0x1); }
+		inline int is_odd(int i) { return (i & 0x1); }
+
+		template <typename T>
+		inline T tmin(T x, T y) { return (x < y) ? x : y; }
+		template <typename T>
+		inline T tmax(T x, T y) { return (x > y) ? x : y; }
+		template <typename T>
+		inline T abs(T x) { return (x > 0) ? x : -x; }
+
+		template <typename T>
+		inline T pow2(T x) { return x * x; }
+		template <typename T>
+		inline T pow3(T x) { return x * x * x; }
+		template <typename T>
+		inline T pow4(T x) { return pow2(pow2(x)); }
+
+		template <typename fp_t>
+		inline fp_t degrees_of_radians(fp_t radians) { return (180.0 / PI) * radians; }
+		template <typename fp_t>
+		inline fp_t radians_of_degrees(fp_t degrees) { return (PI / 180.0) * degrees; }
+
+		// in miscfp.cc
+		//-----------------------------------------------------
+		double signum(double x);
+		double hypot3(double x, double y, double z);
+		double arctan_xy(double x, double y);
+
+		double modulo_reduce(double x, double xmod, double xmin, double xmax);
+
+		template <typename fp_t>
+		void zero_C_array(int N, fp_t array[]);
+
+		// in error_exit.cc
+		// ------------------------------------------------------
+		int error_exit(int msg_level, const char *format, ...);
+
+		// in norm.cc
+		//
+		template <typename fp_t>
+		class norm
+		{
+		public:
+			// get norms etc
+			fp_t mean() const;
+			fp_t two_norm() const; // sqrt(sum x_i^2)
+			fp_t rms_norm() const; // sqrt(average of x_i^2)
+			fp_t infinity_norm() const { return max_abs_value_; }
+
+			fp_t max_abs_value() const { return max_abs_value_; }
+			fp_t min_abs_value() const { return min_abs_value_; }
+
+			fp_t max_value() const { return max_value_; }
+			fp_t min_value() const { return min_value_; }
+
+			// specify data point
+			void data(fp_t x);
+
+			// have any data points been specified?
+			bool is_empty() const { return N_ == 0; }
+			bool is_nonempty() const { return N_ > 0; }
+
+			// reset ==> just like newly-constructed object
+			void reset();
+
+			// constructor, destructor
+			// ... compiler-generated no-op destructor is ok
+			norm();
+
+		private:
+			// we forbid copying and passing by value
+			// by declaring the copy constructor and assignment operator
+			// private, but never defining them
+			norm(const norm &rhs);
+			norm &operator=(const norm &rhs);
+
+		private:
+			long N_;			 // # of data points
+			fp_t sum_;			 // sum(data)
+			fp_t sum2_;			 // sum(data^2)
+			fp_t max_abs_value_; // max |data|
+			fp_t min_abs_value_; // min |data|
+			fp_t max_value_;	 // max data
+			fp_t min_value_;	 // min data
+		};
+
+		// in fuzzy.cc
+		template <typename fp_t>
+		class fuzzy
+		{
+		public:
+			// comparison tolerance (may be modified by user code if needed)
+			static fp_t get_tolerance() { return tolerance_; }
+			static void set_tolerance(fp_t new_tolerance)
+			{
+				tolerance_ = new_tolerance;
+			}
+
+			// fuzzy commparisons
+			static bool EQ(fp_t x, fp_t y);
+			static bool NE(fp_t x, fp_t y) { return !EQ(x, y); }
+			static bool LT(fp_t x, fp_t y) { return EQ(x, y) ? false : (x < y); }
+			static bool LE(fp_t x, fp_t y) { return EQ(x, y) ? true : (x < y); }
+			static bool GT(fp_t x, fp_t y) { return EQ(x, y) ? false : (x > y); }
+			static bool GE(fp_t x, fp_t y) { return EQ(x, y) ? true : (x > y); }
+
+			static bool is_integer(fp_t x); // is x fuzzily an integer?
+			static int floor(fp_t x);		// round x fuzzily down to integer
+			static int ceiling(fp_t x);		// round x fuzzily up to integer
+
+		private:
+			// comparison tolerance
+			// ... must be explicitly initialized when instantiating
+			//     for a new <fp_t> type, see "fuzzy.cc" for details/examples
+			static fp_t tolerance_;
+		};
+
+		// in round.cc
+		template <typename fp_t>
+		class round
+		{
+		public:
+			static int to_integer(fp_t x); // round to nearest integer
+
+			static int floor(fp_t x);	// round down to integer
+			static int ceiling(fp_t x); // round up to integer
+		};
+
+	} // namespace jtutil
+} // namespace AHFinderDirect
+
+#endif /* AHFINDERDIRECT__UTIL_HH */
--- a/AMSS_NCKU_source/AHF_Direct/util_String.h
+++ b/AMSS_NCKU_source/AHF_Direct/util_String.h
@@ -0,0 +1,45 @@
+#ifndef _UTIL_STRING_H_
+#define _UTIL_STRING_H_ 1
+
+#include <stdarg.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    const char *Util_StrSep(const char **stringp,
+                            const char *delim);
+
+    int Util_SplitString(char **before,
+                         char **after,
+                         const char *string,
+                         const char *sep);
+
+    int Util_SplitFilename(char **dir,
+                           char **file,
+                           const char *string);
+
+    char *Util_Strdup(const char *s);
+
+    size_t Util_Strlcpy(char *dst, const char *src, size_t dst_size);
+    size_t Util_Strlcat(char *dst, const char *src, size_t dst_size);
+
+    int Util_StrCmpi(const char *string1,
+                     const char *string2);
+    int Util_StrMemCmpi(const char *string1,
+                        const char *string2,
+                        size_t len2);
+
+    int Util_vsnprintf(char *str, size_t count, const char *fmt, va_list args);
+    int Util_snprintf(char *str, size_t count, const char *fmt, ...);
+
+    int Util_asprintf(char **buffer, const char *fmt, ...);
+    int Util_asnprintf(char **buffer, size_t size, const char *fmt, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UTIL_STRING_H_ */
--- a/AMSS_NCKU_source/AHF_Direct/util_Table.h
+++ b/AMSS_NCKU_source/AHF_Direct/util_Table.h
@@ -0,0 +1,496 @@
+#ifndef _UTIL_TABLE_H_
+#define _UTIL_TABLE_H_  1
+
+#include "cctk_Types.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/******************************************************************************/
+/***** Macros for Flags Word **************************************************/
+/******************************************************************************/
+
+/*
+ * The hexadecimal forms are more convenient for thinking about
+ * bitwise-oring, but alas Fortran 77 doesn't seem to support
+ * hexadecimal constants, so we give the actual values in decimal.
+ */
+
+/*@@
+  @defines      UTIL_TABLE_FLAGS_DEFAULT
+  @desc         flags-word macro: no flags set (default)
+  @@*/
+#define UTIL_TABLE_FLAGS_DEFAULT                0
+
+/*@@
+  @defines      UTIL_TABLE_FLAGS_CASE_INSENSITIVE
+  @desc         flags-word macro: key comparisons are case-insensitive
+  @@*/
+#define UTIL_TABLE_FLAGS_CASE_INSENSITIVE       1       /* 0x1 */
+
+/*@@
+  @defines      UTIL_TABLE_FLAGS_USER_DEFINED_BASE
+  @desc         flags-word macro: user-defined flags word bit masks
+                should use only this and higher bit positions (i.e.
+                all bit positions below this one are reserved for
+                current or future Cactus use)
+  @@*/
+#define UTIL_TABLE_FLAGS_USER_DEFINED_BASE      65536   /* 0x10000 */
+
+/******************************************************************************/
+/***** Error Codes ************************************************************/
+/******************************************************************************/
+
+/*
+ * error codes specific to the table routines (between -100 and -199)
+ */
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_BAD_FLAGS
+  @desc         error return code: flags word is invalid
+  @@*/
+#define UTIL_ERROR_TABLE_BAD_FLAGS              (-100)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_BAD_KEY
+  @desc         error return code: key contains '/' character
+                                   or is otherwise invalid
+  @@*/
+#define UTIL_ERROR_TABLE_BAD_KEY                (-101)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_STRING_TRUNCATED
+  @desc         error return code: string was truncated to fit in buffer
+  @@*/
+#define UTIL_ERROR_TABLE_STRING_TRUNCATED       (-102)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_NO_SUCH_KEY
+  @desc         error return code: no such key in table
+  @@*/
+#define UTIL_ERROR_TABLE_NO_SUCH_KEY            (-103)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_WRONG_DATA_TYPE
+  @desc         error return code: value associated with this key
+                has the wrong data type for this function
+  @@*/
+#define UTIL_ERROR_TABLE_WRONG_DATA_TYPE        (-104)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_VALUE_IS_EMPTY
+  @desc         error return code: value associated with this key
+                is an empty (0-element) array
+  @@*/
+#define UTIL_ERROR_TABLE_VALUE_IS_EMPTY         (-105)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_ITERATOR_IS_NULL
+  @desc         error return code: table iterator is in "null-pointer" state
+  @@*/
+#define UTIL_ERROR_TABLE_ITERATOR_IS_NULL       (-106)
+
+/*@@
+  @defines      UTIL_ERROR_TABLE_NO_MIXED_TYPE_ARRAY
+  @desc         error return code: different array values have different
+                datatypes
+  @@*/
+#define UTIL_ERROR_TABLE_NO_MIXED_TYPE_ARRAY    (-107)
+
+
+/******************************************************************************/
+/***** Main Table API *********************************************************/
+/******************************************************************************/
+
+/* create/destroy */
+int Util_TableCreate(int flags);
+int Util_TableClone(int handle);
+int Util_TableDestroy(int handle);
+
+/* query */
+int Util_TableQueryFlags(int handle);
+int Util_TableQueryNKeys(int handle);
+int Util_TableQueryMaxKeyLength(int handle);
+int Util_TableQueryValueInfo(int handle,
+                             CCTK_INT *type_code, CCTK_INT *N_elements,
+                             const char *key);
+
+/* misc stuff */
+int Util_TableDeleteKey(int handle, const char *key);
+
+/* convenience routines to create and/or set from a "parameter-file" string */
+int Util_TableCreateFromString(const char string[]);
+int Util_TableSetFromString(int handle, const char string[]);
+
+/* set/get a C-style null-terminated character string */
+int Util_TableSetString(int handle,
+                        const char *string,
+                        const char *key);
+int Util_TableGetString(int handle,
+                        int buffer_length, char buffer[],
+                        const char *key);
+
+/* set/get generic types described by CCTK_VARIABLE_* type codes */
+int Util_TableSetGeneric(int handle,
+                         int type_code, const void *value_ptr,
+                         const char *key);
+int Util_TableSetGenericArray(int handle,
+                              int type_code, int N_elements, const void *array,
+                              const char *key);
+int Util_TableGetGeneric(int handle,
+                         int type_code, void *value_ptr,
+                         const char *key);
+int Util_TableGetGenericArray(int handle,
+                              int type_code, int N_elements, void *array,
+                              const char *key);
+
+/**************************************/
+
+/*
+ * set routines
+ */
+
+/* pointers */
+int Util_TableSetPointer(int handle, CCTK_POINTER value, const char *key);
+int Util_TableSetPointerToConst(int handle,
+                                CCTK_POINTER_TO_CONST value,
+                                const char *key);
+int Util_TableSetFPointer(int handle, CCTK_FPOINTER value, const char *key);
+/*
+ * ... the following function (an alias for the previous one) is for
+ *     backwards compatability only, and is deprecated as of 4.0beta13
+ */
+int Util_TableSetFnPointer(int handle, CCTK_FPOINTER value, const char *key);
+
+/* a single character */
+int Util_TableSetChar(int handle, CCTK_CHAR value, const char *key);
+
+/* integers */
+int Util_TableSetByte(int handle, CCTK_BYTE value, const char *key);
+int Util_TableSetInt(int handle, CCTK_INT value, const char *key);
+#ifdef HAVE_CCTK_INT1
+int Util_TableSetInt1(int handle, CCTK_INT1 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT2
+int Util_TableSetInt2(int handle, CCTK_INT2 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT4
+int Util_TableSetInt4(int handle, CCTK_INT4 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT8
+int Util_TableSetInt8(int handle, CCTK_INT8 value, const char *key);
+#endif
+
+/* real numbers */
+int Util_TableSetReal(int handle, CCTK_REAL value, const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableSetReal4(int handle, CCTK_REAL4 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableSetReal8(int handle, CCTK_REAL8 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableSetReal16(int handle, CCTK_REAL16 value, const char *key);
+#endif
+
+/* complex numbers */
+int Util_TableSetComplex(int handle, CCTK_COMPLEX value, const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableSetComplex8(int handle, CCTK_COMPLEX8 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableSetComplex16(int handle, CCTK_COMPLEX16 value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableSetComplex32(int handle, CCTK_COMPLEX32 value, const char *key);
+#endif
+
+/**************************************/
+
+/* arrays of pointers */
+int Util_TableSetPointerArray(int handle,
+                              int N_elements, const CCTK_POINTER array[],
+                              const char *key);
+int Util_TableSetPointerToConstArray(int handle,
+                                     int N_elements,
+                                     const CCTK_POINTER_TO_CONST array[],
+                                     const char *key);
+int Util_TableSetFPointerArray(int handle,
+                               int N_elements, const CCTK_FPOINTER array[],
+                               const char *key);
+/*
+ * ... the following function (an alias for the previous one) is for
+ *     backwards compatability only, and is deprecated as of 4.0beta13
+ */
+int Util_TableSetFnPointerArray(int handle,
+                                int N_elements, const CCTK_FPOINTER array[],
+                                const char *key);
+
+/* arrays of characters (i.e. character strings with known length) */
+/* note null termination is *not* required or enforced */
+int Util_TableSetCharArray(int handle,
+                           int N_elements, const CCTK_CHAR array[],
+                           const char *key);
+
+/* arrays of integers */
+int Util_TableSetByteArray(int handle,
+                           int N_elements, const CCTK_BYTE array[],
+                           const char *key);
+int Util_TableSetIntArray(int handle,
+                          int N_elements, const CCTK_INT array[],
+                          const char *key);
+#ifdef HAVE_CCTK_INT1
+int Util_TableSetInt1Array(int handle,
+                           int N_elements, const CCTK_INT1 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT2
+int Util_TableSetInt2Array(int handle,
+                           int N_elements, const CCTK_INT2 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT4
+int Util_TableSetInt4Array(int handle,
+                           int N_elements, const CCTK_INT4 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT8
+int Util_TableSetInt8Array(int handle,
+                           int N_elements, const CCTK_INT8 array[],
+                           const char *key);
+#endif
+
+/* arrays of real numbers */
+int Util_TableSetRealArray(int handle,
+                           int N_elements, const CCTK_REAL array[],
+                           const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableSetReal4Array(int handle,
+                            int N_elements, const CCTK_REAL4 array[],
+                            const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableSetReal8Array(int handle,
+                            int N_elements, const CCTK_REAL8 array[],
+                            const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableSetReal16Array(int handle,
+                             int N_elements, const CCTK_REAL16 array[],
+                             const char *key);
+#endif
+
+/* arrays of complex numbers */
+int Util_TableSetComplexArray(int handle,
+                              int N_elements, const CCTK_COMPLEX array[],
+                              const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableSetComplex8Array(int handle,
+                               int N_elements, const CCTK_COMPLEX8 array[],
+                               const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableSetComplex16Array(int handle,
+                                int N_elements, const CCTK_COMPLEX16 array[],
+                                const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableSetComplex32Array(int handle,
+                                int N_elements, const CCTK_COMPLEX32 array[],
+                                const char *key);
+#endif
+
+/**************************************/
+
+/*
+ * get routines
+ */
+
+/* pointers */
+int Util_TableGetPointer(int handle, CCTK_POINTER *value, const char *key);
+int Util_TableGetPointerToConst(int handle,
+                                CCTK_POINTER_TO_CONST *value,
+                                const char *key);
+
+int Util_TableGetFPointer(int handle, CCTK_FPOINTER *value, const char *key);
+/*
+ * ... the following function (an alias for the previous one) is for
+ *     backwards compatability only, and is deprecated as of 4.0beta13
+ */
+int Util_TableGetFnPointer(int handle, CCTK_FPOINTER *value, const char *key);
+
+/* a single character */
+int Util_TableGetChar(int handle, CCTK_CHAR *value, const char *key);
+
+/* integers */
+int Util_TableGetByte(int handle, CCTK_BYTE *value, const char *key);
+int Util_TableGetInt(int handle, CCTK_INT *value, const char *key);
+#ifdef HAVE_CCTK_INT1
+int Util_TableGetInt1(int handle, CCTK_INT1 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT2
+int Util_TableGetInt2(int handle, CCTK_INT2 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT4
+int Util_TableGetInt4(int handle, CCTK_INT4 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_INT8
+int Util_TableGetInt8(int handle, CCTK_INT8 *value, const char *key);
+#endif
+
+/* real numbers */
+int Util_TableGetReal(int handle, CCTK_REAL *value, const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableGetReal4(int handle, CCTK_REAL4 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableGetReal8(int handle, CCTK_REAL8 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableGetReal16(int handle, CCTK_REAL16 *value, const char *key);
+#endif
+
+/* complex numbers */
+int Util_TableGetComplex(int handle, CCTK_COMPLEX *value, const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableGetComplex8(int handle, CCTK_COMPLEX8 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableGetComplex16(int handle, CCTK_COMPLEX16 *value, const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableGetComplex32(int handle, CCTK_COMPLEX32 *value, const char *key);
+#endif
+
+/**************************************/
+
+/* arrays of pointers */
+int Util_TableGetPointerArray(int handle,
+                              int N_elements, CCTK_POINTER array[],
+                              const char *key);
+int Util_TableGetPointerToConstArray(int handle,
+                                     int N_elements,
+                                     CCTK_POINTER_TO_CONST array[],
+                                     const char *key);
+
+int Util_TableGetFPointerArray(int handle,
+                               int N_elements, CCTK_FPOINTER array[],
+                               const char *key);
+/*
+ * ... the following function (an alias for the previous one) is for
+ *     backwards compatability only, and is deprecated as of 4.0beta13
+ */
+int Util_TableGetFnPointerArray(int handle,
+                                int N_elements, CCTK_FPOINTER array[],
+                                const char *key);
+
+/* arrays of characters (i.e. character strings of known length) */
+/* note null termination is *not* required or enforced */
+int Util_TableGetCharArray(int handle,
+                           int N_elements, CCTK_CHAR array[],
+                           const char *key);
+
+/* integers */
+int Util_TableGetByteArray(int handle,
+                           int N_elements, CCTK_BYTE array[],
+                           const char *key);
+int Util_TableGetIntArray(int handle,
+                          int N_elements, CCTK_INT array[],
+                          const char *key);
+#ifdef HAVE_CCTK_INT1
+int Util_TableGetInt1Array(int handle,
+                           int N_elements, CCTK_INT1 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT2
+int Util_TableGetInt2Array(int handle,
+                           int N_elements, CCTK_INT2 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT4
+int Util_TableGetInt4Array(int handle,
+                           int N_elements, CCTK_INT4 array[],
+                           const char *key);
+#endif
+#ifdef HAVE_CCTK_INT8
+int Util_TableGetInt8Array(int handle,
+                           int N_elements, CCTK_INT8 array[],
+                           const char *key);
+#endif
+
+/* real numbers */
+int Util_TableGetRealArray(int handle,
+                           int N_elements, CCTK_REAL array[],
+                           const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableGetReal4Array(int handle,
+                            int N_elements, CCTK_REAL4 array[],
+                            const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableGetReal8Array(int handle,
+                            int N_elements, CCTK_REAL8 array[],
+                            const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableGetReal16Array(int handle,
+                             int N_elements, CCTK_REAL16 array[],
+                             const char *key);
+#endif
+
+/* complex numbers */
+int Util_TableGetComplexArray(int handle,
+                              int N_elements, CCTK_COMPLEX array[],
+                              const char *key);
+#ifdef HAVE_CCTK_REAL4
+int Util_TableGetComplex8Array(int handle,
+                               int N_elements, CCTK_COMPLEX8 array[],
+                               const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL8
+int Util_TableGetComplex16Array(int handle,
+                                int N_elements, CCTK_COMPLEX16 array[],
+                                const char *key);
+#endif
+#ifdef HAVE_CCTK_REAL16
+int Util_TableGetComplex32Array(int handle,
+                                int N_elements, CCTK_COMPLEX32 array[],
+                                const char *key);
+#endif
+
+/******************************************************************************/
+/***** Table Iterator API *****************************************************/
+/******************************************************************************/
+
+/* create/destroy */
+int Util_TableItCreate(int handle);
+int Util_TableItClone(int ihandle);
+int Util_TableItDestroy(int ihandle);
+
+/* test for "null-pointer" state */
+int Util_TableItQueryIsNull(int ihandle);
+int Util_TableItQueryIsNonNull(int ihandle);
+
+/* query what the iterator points to */
+int Util_TableItQueryTableHandle(int ihandle);
+int Util_TableItQueryKeyValueInfo(int ihandle,
+                                  int key_buffer_length, char key_buffer[],
+                                  CCTK_INT *type_code, CCTK_INT *N_elements);
+
+/* change value of iterator */
+int Util_TableItAdvance(int ihandle);
+int Util_TableItResetToStart(int ihandle);
+int Util_TableItSetToNull(int ihandle);
+int Util_TableItSetToKey(int ihandle, const char *key);
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _UTIL_TABLE_H_ */