asc26 amss-ncku initialized

2026-01-13 15:01:15 +08:00
commit f2fc9af70e
272 changed files with 262274 additions and 0 deletions
--- a/AMSS_NCKU_source/Newton.C
+++ b/AMSS_NCKU_source/Newton.C
@@ -0,0 +1,555 @@
+//$Id: Newton.C,v 1.1 2012/04/03 10:49:44 zjcao Exp $
+
+#include "macrodef.h"
+#ifdef With_AHF
+
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+#include <mpi.h>
+
+#include "util_Table.h"
+#include "cctk.h"
+
+#include "config.h"
+#include "stdc.h"
+#include "util.h"
+#include "array.h"
+#include "cpm_map.h"
+#include "linear_map.h"
+
+#include "coords.h"
+#include "tgrid.h"
+#include "fd_grid.h"
+#include "patch.h"
+#include "patch_edge.h"
+#include "patch_interp.h"
+#include "ghost_zone.h"
+#include "patch_system.h"
+
+#include "Jacobian.h"
+
+#include "gfns.h"
+#include "gr.h"
+
+#include "horizon_sequence.h"
+#include "BH_diagnostics.h"
+#include "driver.h"
+#include "myglobal.h"
+
+namespace AHFinderDirect
+{
+	extern struct state state;
+	using jtutil::error_exit;
+
+	void recentering(patch_system &ps, double max_x, double max_y, double max_z,
+					 double min_x, double min_y, double min_z,
+					 double centroid_x, double centroid_y, double centroid_z)
+	{
+		fp ox = ps.origin_x();
+		fp oy = ps.origin_y();
+		fp oz = ps.origin_z();
+
+		const fp CTR_TOLERENCE = .45;
+		bool center = (abs(max_x + min_x - 2.0 * ox) < CTR_TOLERENCE * (max_x - min_x)) &&
+					  (abs(max_y + min_y - 2.0 * oy) < CTR_TOLERENCE * (max_y - min_y)) &&
+					  (abs(max_z + min_z - 2.0 * oz) < CTR_TOLERENCE * (max_z - min_z));
+
+		if (!center)
+		{
+
+			for (int pn = 0; pn < ps.N_patches(); ++pn)
+			{
+				patch &p = ps.ith_patch(pn);
+
+				for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+					for (int isigma = p.min_isigma(); isigma <= p.max_isigma(); ++isigma)
+					{
+
+						p.ghosted_gridfn(gfns::gfn__h, irho, isigma) =
+							sqrt(jtutil::pow2(p.gridfn(gfns::gfn__global_x, irho, isigma) - centroid_x) +
+								 jtutil::pow2(p.gridfn(gfns::gfn__global_y, irho, isigma) - centroid_y) +
+								 jtutil::pow2(p.gridfn(gfns::gfn__global_z, irho, isigma) - centroid_z));
+					}
+			}
+
+			ps.recentering(centroid_x, centroid_y, centroid_z);
+		}
+	}
+
+	namespace
+	{
+		bool broadcast_status(int N_procs, int N_active_procs,
+							  int my_proc, bool my_active_flag,
+							  int hn, int iteration,
+							  enum expansion_status expansion_status,
+							  fp mean_horizon_radius, fp infinity_norm,
+							  bool found_this_horizon, bool I_need_more_iterations,
+							  struct iteration_status_buffers &isb);
+
+		void Newton_step(patch_system &ps,
+						 fp mean_horizon_radius, fp max_allowable_Delta_h_over_h);
+
+		void save_oldh(patch_system &ps);
+
+		int interpolate_alsh(patch_system *ps_ptr)
+		{
+			int status = 1;
+
+#define CAST_PTR_OR_NULL(type_, ptr_) \
+	(ps_ptr == NULL) ? NULL : static_cast<type_>(ptr_)
+
+			//
+			// ***** interpolation points *****
+			//
+			const int N_interp_points = (ps_ptr == NULL) ? 0 : ps_ptr->N_grid_points();
+			double *interp_coords[3] = {
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_x)),
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_y)),
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_z)),
+			};
+
+			double *const output_arrays[] = {
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xx)), // Lapse-1
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xy)), // Sfx
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xz)), // Sfy
+				CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_yy)), // Sfz
+			};
+
+			const int N_output_arrays_dim = sizeof(output_arrays) / sizeof(output_arrays[0]);
+			const int N_output_arrays_use = N_output_arrays_dim;
+
+			double *Data, *oX, *oY, *oZ;
+
+			int s;
+			int Npts = 0;
+			for (int ncpu = 0; ncpu < state.N_procs; ncpu++)
+			{
+
+				if (state.my_proc == ncpu)
+					Npts = N_interp_points;
+
+				MPI_Bcast(&Npts, 1, MPI_INT, ncpu, MPI_COMM_WORLD);
+
+				if (Npts != 0)
+				{
+					Data = new double[Npts * N_output_arrays_use];
+
+					oX = new double[Npts];
+					oY = new double[Npts];
+					oZ = new double[Npts];
+					if (state.my_proc == ncpu)
+					{
+						memcpy(oX, interp_coords[0], Npts * sizeof(double));
+						memcpy(oY, interp_coords[1], Npts * sizeof(double));
+						memcpy(oZ, interp_coords[2], Npts * sizeof(double));
+					}
+					MPI_Bcast(oX, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD);
+					MPI_Bcast(oY, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD);
+					MPI_Bcast(oZ, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD);
+
+					// each cpu calls interpolator
+					s = globalInterpGFLlash(
+						oX, oY, oZ, Npts,
+						Data); // 1 succuss; 0 fail
+
+					if (state.my_proc == ncpu)
+					{
+						status = s;
+
+						if (status == 1)
+						{
+							for (int ngf = 0; ngf < N_output_arrays_use; ngf++)
+							{
+								memcpy(output_arrays[ngf], Data + ngf * N_interp_points,
+									   sizeof(double) * N_interp_points);
+							}
+						}
+					}
+
+					delete[] oX;
+					delete[] oY;
+					delete[] oZ;
+					delete[] Data;
+				}
+			}
+
+			return status;
+		}
+
+	}
+
+	//******************************************************************************
+	void Newton(int N_procs, int N_active_procs, int my_proc,
+				horizon_sequence &hs, struct AH_data *const AH_data_array[],
+				struct iteration_status_buffers &isb, int *dumpid, double *dT)
+	{
+		const bool my_active_flag = hs.has_genuine_horizons();
+		const int N_horizons = hs.N_horizons();
+
+		for (int hn = hs.init_hn();; hn = hs.next_hn()) // hn always =0 for cpu who has no patch_system
+		{
+			bool horizon_is_genuine = hs.is_genuine();
+			const bool there_is_another_genuine_horizon = hs.is_next_genuine();
+
+			struct AH_data *AH_data_ptr = horizon_is_genuine ? AH_data_array[hn] : NULL;
+
+			horizon_is_genuine = horizon_is_genuine && AH_data_ptr->find_trigger && !AH_data_ptr->stop_finding;
+			if (horizon_is_genuine)
+				cout << "being finding horizon #" << hn << endl;
+			patch_system *const ps_ptr = horizon_is_genuine ? AH_data_ptr->ps_ptr : NULL;
+			Jacobian *const Jac_ptr = horizon_is_genuine ? AH_data_ptr->Jac_ptr : NULL;
+			const double add_to_expansion = horizon_is_genuine ? -AH_data_ptr->surface_expansion : 0.0;
+			const int max_iterations = horizon_is_genuine
+										   ? (AH_data_ptr->initial_find_flag ? 80 : 20)
+										   : INT_MAX;
+
+			if (horizon_is_genuine)
+				save_oldh(*ps_ptr);
+
+			for (int iteration = 1;; ++iteration)
+			{
+				if (horizon_is_genuine && iteration == max_iterations)
+					cout << "AHfinder: fail to find horizon #" << hn
+						 << " with Newton iteration " << iteration << " steps!!!" << endl;
+				jtutil::norm<fp> Theta_norms;
+
+				const enum expansion_status raw_expansion_status = expansion(ps_ptr, add_to_expansion,
+																			 (iteration == 1), true, &Theta_norms);
+
+				const bool Theta_is_ok = (raw_expansion_status == expansion_success);
+				const bool norms_are_ok = horizon_is_genuine && Theta_is_ok;
+
+				//
+				// have we found this horizon?
+				// if so, compute and output BH diagnostics
+				//
+				const bool found_this_horizon = norms_are_ok && (Theta_norms.infinity_norm() <= 1e-11);
+
+				if (horizon_is_genuine)
+					AH_data_ptr->found_flag = found_this_horizon;
+
+				if (horizon_is_genuine && found_this_horizon)
+					cout << "found horizon #" << hn << " with " << iteration << " steps!!!" << endl;
+				//
+				// see if the expansion is too big
+				// (if so, we'll give up on this horizon)
+				//
+				const bool expansion_is_too_large = norms_are_ok && (Theta_norms.infinity_norm() > 1e10);
+
+				//
+				// compute the mean horizon radius, and if it's too large,
+				// then pretend expansion() returned a "surface too large" error status
+				//
+				jtutil::norm<fp> h_norms;
+				if (horizon_is_genuine)
+					then ps_ptr->ghosted_gridfn_norms(gfns::gfn__h, h_norms);
+				const fp mean_horizon_radius = horizon_is_genuine ? h_norms.mean()
+																  : 0.0;
+				const bool horizon_is_too_large = (mean_horizon_radius > 1e10);
+
+				const enum expansion_status effective_expansion_status = horizon_is_too_large ? expansion_failure__surface_too_large
+																							  : raw_expansion_status;
+
+				//
+				// see if we need more iterations (either on this or another horizon)
+				//
+
+				// does *this* horizon need more iterations?
+				// i.e. has this horizon's Newton iteration not yet converged?
+				const bool this_horizon_needs_more_iterations = horizon_is_genuine && Theta_is_ok && !found_this_horizon && !expansion_is_too_large && !horizon_is_too_large && (iteration < max_iterations);
+
+				// do I (this processor) need to do more iterations
+				// on this or a following horizon?
+				const bool I_need_more_iterations = this_horizon_needs_more_iterations || there_is_another_genuine_horizon;
+
+				//
+				// broadcast iteration status from each active processor
+				// to all processors, and inclusive-or the "we need more iterations"
+				// flags to see if *any* (active) processor needs more iterations
+				//
+				const bool any_proc_needs_more_iterations = broadcast_status(N_procs, N_active_procs,
+																			 my_proc, my_active_flag,
+																			 hn, iteration, effective_expansion_status,
+																			 mean_horizon_radius,
+																			 (norms_are_ok ? Theta_norms.infinity_norm() : 0.0),
+																			 found_this_horizon, I_need_more_iterations,
+																			 isb);
+				// set found-this-horizon flags
+				// for all active processors' non-dummy horizons
+				for (int found_proc = 0; found_proc < N_active_procs; ++found_proc)
+				{
+					const int found_hn = isb.hn_buffer[found_proc];
+					if (found_hn > 0)
+						AH_data_array[found_hn]->found_flag = isb.found_horizon_buffer[found_proc];
+				}
+
+				//
+				// prepare lapse and shift
+				{
+					int ff = 0, fft = 0;
+					if (found_this_horizon && dumpid[hn - 1] > 0 && dT[hn - 1] > 0)
+						fft = 1;
+					MPI_Allreduce(&fft, &ff, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+					if (ff)
+					{
+						if ((interpolate_alsh(ps_ptr) == 0) && (state.my_proc == 0))
+							cout << "interpolation of lapse and shift for AH failed." << endl;
+					}
+				}
+
+				if (found_this_horizon)
+				{
+					struct BH_diagnostics &BH_diagnostics = AH_data_ptr->BH_diagnostics;
+					// output data
+					if (dumpid[hn - 1] > 0)
+					{
+						char filename[100];
+						sprintf(filename, "ah%02d_%05d.dat", hn, dumpid[hn - 1]);
+						if (dT[hn - 1] > 0)
+						{
+							// gridfunction xx,xy,xz,yy,yz,zz will be used as temp storage
+							BH_diagnostics.compute_signature(*ps_ptr, dT[hn - 1]);
+							ps_ptr->print_gridfn_with_xyz(gfns::gfn__global_zz, true, gfns::gfn__h, filename);
+						}
+						else
+							ps_ptr->print_ghosted_gridfn_with_xyz(gfns::gfn__h, true, gfns::gfn__h, filename, false);
+					}
+
+					BH_diagnostics.compute(*ps_ptr); // gridfunction xx,xy,xz,yy,yz,zz changed
+
+					if (AH_data_ptr->BH_diagnostics_fileptr == NULL)
+						AH_data_ptr->BH_diagnostics_fileptr = BH_diagnostics.setup_output_file(N_horizons, hn);
+					BH_diagnostics.output(AH_data_ptr->BH_diagnostics_fileptr, (*state.PhysTime));
+
+					// recentering
+					recentering(*ps_ptr, (AH_data_ptr->BH_diagnostics).max_x, (AH_data_ptr->BH_diagnostics).max_y, (AH_data_ptr->BH_diagnostics).max_z,
+								(AH_data_ptr->BH_diagnostics).min_x, (AH_data_ptr->BH_diagnostics).min_y, (AH_data_ptr->BH_diagnostics).min_z,
+								(AH_data_ptr->BH_diagnostics).centroid_x, (AH_data_ptr->BH_diagnostics).centroid_y, (AH_data_ptr->BH_diagnostics).centroid_z);
+					AH_data_ptr->recentering_flag = true;
+				}
+
+				//
+				// are all processors done with all their genuine horizons?
+				// or if this is a single-processor run, are we done with this horizon?
+				//
+				if (!any_proc_needs_more_iterations)
+					return; // *** NORMAL RETURN ***
+
+				//
+				// compute the Jacobian matrix
+				// *** this is a synchronous operation across all processors ***
+				//
+
+				const enum expansion_status
+					Jacobian_status = expansion_Jacobian(this_horizon_needs_more_iterations ? ps_ptr : NULL,
+														 this_horizon_needs_more_iterations ? Jac_ptr : NULL,
+														 add_to_expansion,
+														 (iteration == 1),
+														 false);
+				const bool Jacobian_is_ok = (Jacobian_status == expansion_success);
+
+				//
+				// skip to the next horizon unless
+				// this is a genuine Jacobian computation, and it went ok
+				//
+				if (!(this_horizon_needs_more_iterations && Jacobian_is_ok))
+					break; // *** LOOP EXIT ***
+
+				//
+				// compute the Newton step
+				//
+				Jac_ptr->solve_linear_system(gfns::gfn__Theta, gfns::gfn__Delta_h, false);
+
+				Newton_step(*ps_ptr, mean_horizon_radius, 0.1);
+
+				// end of this Newton iteration
+			}
+
+			// end of this horizon
+		}
+
+		// we should never get to here
+		assert(false);
+	}
+
+	//******************************************************************************
+	//******************************************************************************
+	//******************************************************************************
+	namespace
+	{
+		bool broadcast_status(int N_procs, int N_active_procs,
+							  int my_proc, bool my_active_flag,
+							  int hn, int iteration,
+							  enum expansion_status effective_expansion_status,
+							  fp mean_horizon_radius, fp infinity_norm,
+							  bool found_this_horizon, bool I_need_more_iterations,
+							  struct iteration_status_buffers &isb)
+		{
+			assert(my_proc >= 0);
+			assert(my_proc < N_procs);
+
+			enum
+			{
+				buffer_var__hn = 0,	   // also encodes found_this_horizon flag
+									   // in sign: +=true, -=false
+				buffer_var__iteration, // also encodes I_need_more_iterations flag
+									   // in sign: +=true, -=false
+				buffer_var__expansion_status,
+				buffer_var__mean_horizon_radius,
+				buffer_var__Theta_infinity_norm,
+				N_buffer_vars // no comma
+			};
+
+			//
+			// allocate buffers if this is the first use
+			//
+			if (isb.hn_buffer == NULL)
+				then
+				{
+					isb.hn_buffer = new int[N_active_procs];
+					isb.iteration_buffer = new int[N_active_procs];
+					isb.expansion_status_buffer = new enum expansion_status[N_active_procs];
+					isb.mean_horizon_radius_buffer = new fp[N_active_procs];
+					isb.Theta_infinity_norm_buffer = new fp[N_active_procs];
+					isb.found_horizon_buffer = new bool[N_active_procs];
+
+					isb.send_buffer_ptr = new jtutil::array2d<double>(0, N_active_procs - 1,
+																	  0, N_buffer_vars - 1);
+					isb.receive_buffer_ptr = new jtutil::array2d<double>(0, N_active_procs - 1,
+																		 0, N_buffer_vars - 1);
+				}
+			jtutil::array2d<double> &send_buffer = *isb.send_buffer_ptr;
+			jtutil::array2d<double> &receive_buffer = *isb.receive_buffer_ptr;
+
+			//
+			// pack this processor's values into the reduction buffer
+			//
+			jtutil::zero_C_array(send_buffer.N_array(), send_buffer.data_array());
+			if (my_active_flag)
+				then
+				{
+					assert(send_buffer.is_valid_i(my_proc));
+					assert(hn >= 0);	   // encoding scheme assumes this
+					assert(iteration > 0); // encoding scheme assumes this
+					send_buffer(my_proc, buffer_var__hn) = found_this_horizon ? +hn : -hn;
+					send_buffer(my_proc, buffer_var__iteration) = I_need_more_iterations ? +iteration : -iteration;
+					send_buffer(my_proc, buffer_var__expansion_status) = int(effective_expansion_status);
+					send_buffer(my_proc, buffer_var__mean_horizon_radius) = mean_horizon_radius;
+					send_buffer(my_proc, buffer_var__Theta_infinity_norm) = infinity_norm;
+				}
+
+			const int reduction_status = MPI_Allreduce(static_cast<void *>(send_buffer.data_array()),
+													   static_cast<void *>(receive_buffer.data_array()),
+													   send_buffer.N_array(),
+													   MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD);
+
+			// if (reduction_status < 0)
+			if (reduction_status != MPI_SUCCESS)
+				then CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING,
+								"broadcast_status(): error status %d from reduction!",
+								reduction_status); /*NOTREACHED*/
+
+			//
+			// unpack the reduction buffer back to the high-level result buffers and
+			// compute the inclusive-or of the broadcast I_need_more_iterations flags
+			//
+			bool any_proc_needs_more_iterations = false;
+			for (int proc = 0; proc < N_active_procs; ++proc)
+			{
+				const int hn_temp = static_cast<int>(
+					receive_buffer(proc, buffer_var__hn));
+				isb.hn_buffer[proc] = jtutil::abs(hn_temp);
+				isb.found_horizon_buffer[proc] = (hn_temp > 0);
+
+				const int iteration_temp = static_cast<int>(
+					receive_buffer(proc, buffer_var__iteration));
+				isb.iteration_buffer[proc] = jtutil::abs(iteration_temp);
+				const bool proc_needs_more_iterations = (iteration_temp > 0);
+				any_proc_needs_more_iterations |= proc_needs_more_iterations;
+
+				isb.expansion_status_buffer[proc] = static_cast<enum expansion_status>(
+					static_cast<int>(
+						receive_buffer(proc, buffer_var__expansion_status)));
+
+				isb.mean_horizon_radius_buffer[proc] = receive_buffer(proc, buffer_var__mean_horizon_radius);
+				isb.Theta_infinity_norm_buffer[proc] = receive_buffer(proc, buffer_var__Theta_infinity_norm);
+			}
+
+			return any_proc_needs_more_iterations;
+		}
+	}
+	//
+	// This function takes the Newton step, scaling it down if it's too large.
+	//
+	// Arguments:
+	// ps = The patch system containing the gridfns h and Delta_h.
+	// mean_horizon_radius = ||h||_mean
+	// max_allowable_Delta_h_over_h = The maximum allowable
+	//				     ||Delta_h||_infinity / ||h||_mean
+	//				  Any step over this is internally clamped
+	//				  (scaled down) to this size.
+	//
+	namespace
+	{
+		void Newton_step(patch_system &ps,
+						 fp mean_horizon_radius, fp max_allowable_Delta_h_over_h)
+		{
+			//
+			// compute scale factor (1 for small steps, <1 for large steps)
+			//
+
+			const fp max_allowable_Delta_h = max_allowable_Delta_h_over_h * mean_horizon_radius;
+
+			jtutil::norm<fp> Delta_h_norms;
+			ps.gridfn_norms(gfns::gfn__Delta_h, Delta_h_norms);
+			const fp max_Delta_h = Delta_h_norms.infinity_norm();
+
+			const fp scale = (max_Delta_h <= max_allowable_Delta_h)
+								 ? 1.0
+								 : max_allowable_Delta_h / max_Delta_h;
+
+			//
+			// take the Newton step (scaled if necessary)
+			//
+			for (int pn = 0; pn < ps.N_patches(); ++pn)
+			{
+				patch &p = ps.ith_patch(pn);
+
+				for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+				{
+					for (int isigma = p.min_isigma();
+						 isigma <= p.max_isigma();
+						 ++isigma)
+					{
+						p.ghosted_gridfn(gfns::gfn__h, irho, isigma) -= scale * p.gridfn(gfns::gfn__Delta_h, irho, isigma);
+					}
+				}
+			}
+		}
+		void save_oldh(patch_system &ps)
+		{
+			for (int pn = 0; pn < ps.N_patches(); ++pn)
+			{
+				patch &p = ps.ith_patch(pn);
+
+				for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho)
+				{
+					for (int isigma = p.min_isigma();
+						 isigma <= p.max_isigma();
+						 ++isigma)
+					{
+						p.gridfn(gfns::gfn__oldh, irho, isigma) = p.ghosted_gridfn(gfns::gfn__h, irho, isigma);
+					}
+				}
+			}
+		}
+	}
+
+	//******************************************************************************
+
+} // namespace AHFinderDirect
+#endif