//$Id: Newton.C,v 1.1 2012/04/03 10:49:44 zjcao Exp $ #include "macrodef.h" #ifdef With_AHF #include #include #include #include #include #ifdef MPI_STUB #include "mpi_stub.h" #else #include #endif #include "util_Table.h" #include "cctk.h" #include "config.h" #include "stdc.h" #include "util.h" #include "array.h" #include "cpm_map.h" #include "linear_map.h" #include "coords.h" #include "tgrid.h" #include "fd_grid.h" #include "patch.h" #include "patch_edge.h" #include "patch_interp.h" #include "ghost_zone.h" #include "patch_system.h" #include "Jacobian.h" #include "gfns.h" #include "gr.h" #include "horizon_sequence.h" #include "BH_diagnostics.h" #include "driver.h" #include "myglobal.h" namespace AHFinderDirect { extern struct state state; using jtutil::error_exit; void recentering(patch_system &ps, double max_x, double max_y, double max_z, double min_x, double min_y, double min_z, double centroid_x, double centroid_y, double centroid_z) { fp ox = ps.origin_x(); fp oy = ps.origin_y(); fp oz = ps.origin_z(); const fp CTR_TOLERENCE = .45; bool center = (abs(max_x + min_x - 2.0 * ox) < CTR_TOLERENCE * (max_x - min_x)) && (abs(max_y + min_y - 2.0 * oy) < CTR_TOLERENCE * (max_y - min_y)) && (abs(max_z + min_z - 2.0 * oz) < CTR_TOLERENCE * (max_z - min_z)); if (!center) { for (int pn = 0; pn < ps.N_patches(); ++pn) { patch &p = ps.ith_patch(pn); for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho) for (int isigma = p.min_isigma(); isigma <= p.max_isigma(); ++isigma) { p.ghosted_gridfn(gfns::gfn__h, irho, isigma) = sqrt(jtutil::pow2(p.gridfn(gfns::gfn__global_x, irho, isigma) - centroid_x) + jtutil::pow2(p.gridfn(gfns::gfn__global_y, irho, isigma) - centroid_y) + jtutil::pow2(p.gridfn(gfns::gfn__global_z, irho, isigma) - centroid_z)); } } ps.recentering(centroid_x, centroid_y, centroid_z); } } namespace { bool broadcast_status(int N_procs, int N_active_procs, int my_proc, bool my_active_flag, int hn, int iteration, enum expansion_status expansion_status, fp mean_horizon_radius, fp infinity_norm, bool found_this_horizon, bool I_need_more_iterations, struct iteration_status_buffers &isb); void Newton_step(patch_system &ps, fp mean_horizon_radius, fp max_allowable_Delta_h_over_h); void save_oldh(patch_system &ps); int interpolate_alsh(patch_system *ps_ptr) { int status = 1; #define CAST_PTR_OR_NULL(type_, ptr_) \ (ps_ptr == NULL) ? NULL : static_cast(ptr_) // // ***** interpolation points ***** // const int N_interp_points = (ps_ptr == NULL) ? 0 : ps_ptr->N_grid_points(); double *interp_coords[3] = { CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_x)), CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_y)), CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_z)), }; double *const output_arrays[] = { CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xx)), // Lapse-1 CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xy)), // Sfx CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_xz)), // Sfy CAST_PTR_OR_NULL(double *, ps_ptr->gridfn_data(gfns::gfn__global_yy)), // Sfz }; const int N_output_arrays_dim = sizeof(output_arrays) / sizeof(output_arrays[0]); const int N_output_arrays_use = N_output_arrays_dim; double *Data, *oX, *oY, *oZ; int s; int Npts = 0; for (int ncpu = 0; ncpu < state.N_procs; ncpu++) { if (state.my_proc == ncpu) Npts = N_interp_points; MPI_Bcast(&Npts, 1, MPI_INT, ncpu, MPI_COMM_WORLD); if (Npts != 0) { Data = new double[Npts * N_output_arrays_use]; oX = new double[Npts]; oY = new double[Npts]; oZ = new double[Npts]; if (state.my_proc == ncpu) { memcpy(oX, interp_coords[0], Npts * sizeof(double)); memcpy(oY, interp_coords[1], Npts * sizeof(double)); memcpy(oZ, interp_coords[2], Npts * sizeof(double)); } MPI_Bcast(oX, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD); MPI_Bcast(oY, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD); MPI_Bcast(oZ, Npts, MPI_DOUBLE, ncpu, MPI_COMM_WORLD); // each cpu calls interpolator s = globalInterpGFLlash( oX, oY, oZ, Npts, Data); // 1 succuss; 0 fail if (state.my_proc == ncpu) { status = s; if (status == 1) { for (int ngf = 0; ngf < N_output_arrays_use; ngf++) { memcpy(output_arrays[ngf], Data + ngf * N_interp_points, sizeof(double) * N_interp_points); } } } delete[] oX; delete[] oY; delete[] oZ; delete[] Data; } } return status; } } //****************************************************************************** void Newton(int N_procs, int N_active_procs, int my_proc, horizon_sequence &hs, struct AH_data *const AH_data_array[], struct iteration_status_buffers &isb, int *dumpid, double *dT) { const bool my_active_flag = hs.has_genuine_horizons(); const int N_horizons = hs.N_horizons(); for (int hn = hs.init_hn();; hn = hs.next_hn()) // hn always =0 for cpu who has no patch_system { bool horizon_is_genuine = hs.is_genuine(); const bool there_is_another_genuine_horizon = hs.is_next_genuine(); struct AH_data *AH_data_ptr = horizon_is_genuine ? AH_data_array[hn] : NULL; horizon_is_genuine = horizon_is_genuine && AH_data_ptr->find_trigger && !AH_data_ptr->stop_finding; if (horizon_is_genuine) cout << "being finding horizon #" << hn << endl; patch_system *const ps_ptr = horizon_is_genuine ? AH_data_ptr->ps_ptr : NULL; Jacobian *const Jac_ptr = horizon_is_genuine ? AH_data_ptr->Jac_ptr : NULL; const double add_to_expansion = horizon_is_genuine ? -AH_data_ptr->surface_expansion : 0.0; const int max_iterations = horizon_is_genuine ? (AH_data_ptr->initial_find_flag ? 80 : 20) : INT_MAX; if (horizon_is_genuine) save_oldh(*ps_ptr); for (int iteration = 1;; ++iteration) { if (horizon_is_genuine && iteration == max_iterations) cout << "AHfinder: fail to find horizon #" << hn << " with Newton iteration " << iteration << " steps!!!" << endl; jtutil::norm Theta_norms; const enum expansion_status raw_expansion_status = expansion(ps_ptr, add_to_expansion, (iteration == 1), true, &Theta_norms); const bool Theta_is_ok = (raw_expansion_status == expansion_success); const bool norms_are_ok = horizon_is_genuine && Theta_is_ok; // // have we found this horizon? // if so, compute and output BH diagnostics // const bool found_this_horizon = norms_are_ok && (Theta_norms.infinity_norm() <= 1e-11); if (horizon_is_genuine) AH_data_ptr->found_flag = found_this_horizon; if (horizon_is_genuine && found_this_horizon) cout << "found horizon #" << hn << " with " << iteration << " steps!!!" << endl; // // see if the expansion is too big // (if so, we'll give up on this horizon) // const bool expansion_is_too_large = norms_are_ok && (Theta_norms.infinity_norm() > 1e10); // // compute the mean horizon radius, and if it's too large, // then pretend expansion() returned a "surface too large" error status // jtutil::norm h_norms; if (horizon_is_genuine) then ps_ptr->ghosted_gridfn_norms(gfns::gfn__h, h_norms); const fp mean_horizon_radius = horizon_is_genuine ? h_norms.mean() : 0.0; const bool horizon_is_too_large = (mean_horizon_radius > 1e10); const enum expansion_status effective_expansion_status = horizon_is_too_large ? expansion_failure__surface_too_large : raw_expansion_status; // // see if we need more iterations (either on this or another horizon) // // does *this* horizon need more iterations? // i.e. has this horizon's Newton iteration not yet converged? const bool this_horizon_needs_more_iterations = horizon_is_genuine && Theta_is_ok && !found_this_horizon && !expansion_is_too_large && !horizon_is_too_large && (iteration < max_iterations); // do I (this processor) need to do more iterations // on this or a following horizon? const bool I_need_more_iterations = this_horizon_needs_more_iterations || there_is_another_genuine_horizon; // // broadcast iteration status from each active processor // to all processors, and inclusive-or the "we need more iterations" // flags to see if *any* (active) processor needs more iterations // const bool any_proc_needs_more_iterations = broadcast_status(N_procs, N_active_procs, my_proc, my_active_flag, hn, iteration, effective_expansion_status, mean_horizon_radius, (norms_are_ok ? Theta_norms.infinity_norm() : 0.0), found_this_horizon, I_need_more_iterations, isb); // set found-this-horizon flags // for all active processors' non-dummy horizons for (int found_proc = 0; found_proc < N_active_procs; ++found_proc) { const int found_hn = isb.hn_buffer[found_proc]; if (found_hn > 0) AH_data_array[found_hn]->found_flag = isb.found_horizon_buffer[found_proc]; } // // prepare lapse and shift { int ff = 0, fft = 0; if (found_this_horizon && dumpid[hn - 1] > 0 && dT[hn - 1] > 0) fft = 1; MPI_Allreduce(&fft, &ff, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (ff) { if ((interpolate_alsh(ps_ptr) == 0) && (state.my_proc == 0)) cout << "interpolation of lapse and shift for AH failed." << endl; } } if (found_this_horizon) { struct BH_diagnostics &BH_diagnostics = AH_data_ptr->BH_diagnostics; // output data if (dumpid[hn - 1] > 0) { char filename[100]; sprintf(filename, "ah%02d_%05d.dat", hn, dumpid[hn - 1]); if (dT[hn - 1] > 0) { // gridfunction xx,xy,xz,yy,yz,zz will be used as temp storage BH_diagnostics.compute_signature(*ps_ptr, dT[hn - 1]); ps_ptr->print_gridfn_with_xyz(gfns::gfn__global_zz, true, gfns::gfn__h, filename); } else ps_ptr->print_ghosted_gridfn_with_xyz(gfns::gfn__h, true, gfns::gfn__h, filename, false); } BH_diagnostics.compute(*ps_ptr); // gridfunction xx,xy,xz,yy,yz,zz changed if (AH_data_ptr->BH_diagnostics_fileptr == NULL) AH_data_ptr->BH_diagnostics_fileptr = BH_diagnostics.setup_output_file(N_horizons, hn); BH_diagnostics.output(AH_data_ptr->BH_diagnostics_fileptr, (*state.PhysTime)); // recentering recentering(*ps_ptr, (AH_data_ptr->BH_diagnostics).max_x, (AH_data_ptr->BH_diagnostics).max_y, (AH_data_ptr->BH_diagnostics).max_z, (AH_data_ptr->BH_diagnostics).min_x, (AH_data_ptr->BH_diagnostics).min_y, (AH_data_ptr->BH_diagnostics).min_z, (AH_data_ptr->BH_diagnostics).centroid_x, (AH_data_ptr->BH_diagnostics).centroid_y, (AH_data_ptr->BH_diagnostics).centroid_z); AH_data_ptr->recentering_flag = true; } // // are all processors done with all their genuine horizons? // or if this is a single-processor run, are we done with this horizon? // if (!any_proc_needs_more_iterations) return; // *** NORMAL RETURN *** // // compute the Jacobian matrix // *** this is a synchronous operation across all processors *** // const enum expansion_status Jacobian_status = expansion_Jacobian(this_horizon_needs_more_iterations ? ps_ptr : NULL, this_horizon_needs_more_iterations ? Jac_ptr : NULL, add_to_expansion, (iteration == 1), false); const bool Jacobian_is_ok = (Jacobian_status == expansion_success); // // skip to the next horizon unless // this is a genuine Jacobian computation, and it went ok // if (!(this_horizon_needs_more_iterations && Jacobian_is_ok)) break; // *** LOOP EXIT *** // // compute the Newton step // Jac_ptr->solve_linear_system(gfns::gfn__Theta, gfns::gfn__Delta_h, false); Newton_step(*ps_ptr, mean_horizon_radius, 0.1); // end of this Newton iteration } // end of this horizon } // we should never get to here assert(false); } //****************************************************************************** //****************************************************************************** //****************************************************************************** namespace { bool broadcast_status(int N_procs, int N_active_procs, int my_proc, bool my_active_flag, int hn, int iteration, enum expansion_status effective_expansion_status, fp mean_horizon_radius, fp infinity_norm, bool found_this_horizon, bool I_need_more_iterations, struct iteration_status_buffers &isb) { assert(my_proc >= 0); assert(my_proc < N_procs); enum { buffer_var__hn = 0, // also encodes found_this_horizon flag // in sign: +=true, -=false buffer_var__iteration, // also encodes I_need_more_iterations flag // in sign: +=true, -=false buffer_var__expansion_status, buffer_var__mean_horizon_radius, buffer_var__Theta_infinity_norm, N_buffer_vars // no comma }; // // allocate buffers if this is the first use // if (isb.hn_buffer == NULL) then { isb.hn_buffer = new int[N_active_procs]; isb.iteration_buffer = new int[N_active_procs]; isb.expansion_status_buffer = new enum expansion_status[N_active_procs]; isb.mean_horizon_radius_buffer = new fp[N_active_procs]; isb.Theta_infinity_norm_buffer = new fp[N_active_procs]; isb.found_horizon_buffer = new bool[N_active_procs]; isb.send_buffer_ptr = new jtutil::array2d(0, N_active_procs - 1, 0, N_buffer_vars - 1); isb.receive_buffer_ptr = new jtutil::array2d(0, N_active_procs - 1, 0, N_buffer_vars - 1); } jtutil::array2d &send_buffer = *isb.send_buffer_ptr; jtutil::array2d &receive_buffer = *isb.receive_buffer_ptr; // // pack this processor's values into the reduction buffer // jtutil::zero_C_array(send_buffer.N_array(), send_buffer.data_array()); if (my_active_flag) then { assert(send_buffer.is_valid_i(my_proc)); assert(hn >= 0); // encoding scheme assumes this assert(iteration > 0); // encoding scheme assumes this send_buffer(my_proc, buffer_var__hn) = found_this_horizon ? +hn : -hn; send_buffer(my_proc, buffer_var__iteration) = I_need_more_iterations ? +iteration : -iteration; send_buffer(my_proc, buffer_var__expansion_status) = int(effective_expansion_status); send_buffer(my_proc, buffer_var__mean_horizon_radius) = mean_horizon_radius; send_buffer(my_proc, buffer_var__Theta_infinity_norm) = infinity_norm; } const int reduction_status = MPI_Allreduce(static_cast(send_buffer.data_array()), static_cast(receive_buffer.data_array()), send_buffer.N_array(), MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD); // if (reduction_status < 0) if (reduction_status != MPI_SUCCESS) then CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING, "broadcast_status(): error status %d from reduction!", reduction_status); /*NOTREACHED*/ // // unpack the reduction buffer back to the high-level result buffers and // compute the inclusive-or of the broadcast I_need_more_iterations flags // bool any_proc_needs_more_iterations = false; for (int proc = 0; proc < N_active_procs; ++proc) { const int hn_temp = static_cast( receive_buffer(proc, buffer_var__hn)); isb.hn_buffer[proc] = jtutil::abs(hn_temp); isb.found_horizon_buffer[proc] = (hn_temp > 0); const int iteration_temp = static_cast( receive_buffer(proc, buffer_var__iteration)); isb.iteration_buffer[proc] = jtutil::abs(iteration_temp); const bool proc_needs_more_iterations = (iteration_temp > 0); any_proc_needs_more_iterations |= proc_needs_more_iterations; isb.expansion_status_buffer[proc] = static_cast( static_cast( receive_buffer(proc, buffer_var__expansion_status))); isb.mean_horizon_radius_buffer[proc] = receive_buffer(proc, buffer_var__mean_horizon_radius); isb.Theta_infinity_norm_buffer[proc] = receive_buffer(proc, buffer_var__Theta_infinity_norm); } return any_proc_needs_more_iterations; } } // // This function takes the Newton step, scaling it down if it's too large. // // Arguments: // ps = The patch system containing the gridfns h and Delta_h. // mean_horizon_radius = ||h||_mean // max_allowable_Delta_h_over_h = The maximum allowable // ||Delta_h||_infinity / ||h||_mean // Any step over this is internally clamped // (scaled down) to this size. // namespace { void Newton_step(patch_system &ps, fp mean_horizon_radius, fp max_allowable_Delta_h_over_h) { // // compute scale factor (1 for small steps, <1 for large steps) // const fp max_allowable_Delta_h = max_allowable_Delta_h_over_h * mean_horizon_radius; jtutil::norm Delta_h_norms; ps.gridfn_norms(gfns::gfn__Delta_h, Delta_h_norms); const fp max_Delta_h = Delta_h_norms.infinity_norm(); const fp scale = (max_Delta_h <= max_allowable_Delta_h) ? 1.0 : max_allowable_Delta_h / max_Delta_h; // // take the Newton step (scaled if necessary) // for (int pn = 0; pn < ps.N_patches(); ++pn) { patch &p = ps.ith_patch(pn); for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho) { for (int isigma = p.min_isigma(); isigma <= p.max_isigma(); ++isigma) { p.ghosted_gridfn(gfns::gfn__h, irho, isigma) -= scale * p.gridfn(gfns::gfn__Delta_h, irho, isigma); } } } } void save_oldh(patch_system &ps) { for (int pn = 0; pn < ps.N_patches(); ++pn) { patch &p = ps.ith_patch(pn); for (int irho = p.min_irho(); irho <= p.max_irho(); ++irho) { for (int isigma = p.min_isigma(); isigma <= p.max_isigma(); ++isigma) { p.gridfn(gfns::gfn__oldh, irho, isigma) = p.ghosted_gridfn(gfns::gfn__h, irho, isigma); } } } } } //****************************************************************************** } // namespace AHFinderDirect #endif