Compare commits
2 Commits
chb-copilo
...
cjy-oneapi
| Author | SHA1 | Date | |
|---|---|---|---|
| 6796384bf4 | |||
| c974a88d6d |
@@ -8,14 +8,6 @@
|
|||||||
##
|
##
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
## Guard against re-execution by multiprocessing child processes.
|
|
||||||
## Without this, using 'spawn' or 'forkserver' context would cause every
|
|
||||||
## worker to re-run the entire script, spawning exponentially more
|
|
||||||
## workers (fork bomb).
|
|
||||||
if __name__ != '__main__':
|
|
||||||
import sys as _sys
|
|
||||||
_sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
@@ -432,31 +424,26 @@ print(
|
|||||||
|
|
||||||
import plot_xiaoqu
|
import plot_xiaoqu
|
||||||
import plot_GW_strain_amplitude_xiaoqu
|
import plot_GW_strain_amplitude_xiaoqu
|
||||||
from parallel_plot_helper import run_plot_tasks_parallel
|
|
||||||
|
|
||||||
plot_tasks = []
|
|
||||||
|
|
||||||
## Plot black hole trajectory
|
## Plot black hole trajectory
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory) ) )
|
plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory )
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) )
|
plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
|
||||||
|
|
||||||
## Plot black hole separation vs. time
|
## Plot black hole separation vs. time
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) )
|
plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
|
||||||
|
|
||||||
## Plot gravitational waveforms (psi4 and strain amplitude)
|
## Plot gravitational waveforms (psi4 and strain amplitude)
|
||||||
for i in range(input_data.Detector_Number):
|
for i in range(input_data.Detector_Number):
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) )
|
plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
|
||||||
plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) )
|
plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
|
||||||
|
|
||||||
## Plot ADM mass evolution
|
## Plot ADM mass evolution
|
||||||
for i in range(input_data.Detector_Number):
|
for i in range(input_data.Detector_Number):
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) )
|
plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
|
||||||
|
|
||||||
## Plot Hamiltonian constraint violation over time
|
## Plot Hamiltonian constraint violation over time
|
||||||
for i in range(input_data.grid_level):
|
for i in range(input_data.grid_level):
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) )
|
plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
|
||||||
|
|
||||||
run_plot_tasks_parallel(plot_tasks)
|
|
||||||
|
|
||||||
## Plot stored binary data
|
## Plot stored binary data
|
||||||
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
|
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
|
||||||
|
|||||||
@@ -3756,358 +3756,6 @@ void Parallel::Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry)
|
|||||||
delete[] transfer_src;
|
delete[] transfer_src;
|
||||||
delete[] transfer_dst;
|
delete[] transfer_dst;
|
||||||
}
|
}
|
||||||
//
|
|
||||||
// Async Sync: split into SyncBegin (initiate MPI) and SyncEnd (wait + unpack)
|
|
||||||
// This allows overlapping MPI communication with computation.
|
|
||||||
//
|
|
||||||
static void transfer_begin(Parallel::TransferState *ts)
|
|
||||||
{
|
|
||||||
int myrank;
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
|
||||||
int cpusize = ts->cpusize;
|
|
||||||
|
|
||||||
ts->reqs = new MPI_Request[2 * cpusize];
|
|
||||||
ts->stats = new MPI_Status[2 * cpusize];
|
|
||||||
ts->req_no = 0;
|
|
||||||
ts->send_data = new double *[cpusize];
|
|
||||||
ts->rec_data = new double *[cpusize];
|
|
||||||
int length;
|
|
||||||
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
ts->send_data[node] = ts->rec_data[node] = 0;
|
|
||||||
if (node == myrank)
|
|
||||||
{
|
|
||||||
// Local copy: pack then immediately unpack (no MPI needed)
|
|
||||||
if ((length = Parallel::data_packer(0, ts->transfer_src[myrank], ts->transfer_dst[myrank],
|
|
||||||
node, PACK, ts->VarList1, ts->VarList2, ts->Symmetry)))
|
|
||||||
{
|
|
||||||
double *local_data = new double[length];
|
|
||||||
if (!local_data)
|
|
||||||
{
|
|
||||||
cout << "out of memory in transfer_begin, local copy" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
Parallel::data_packer(local_data, ts->transfer_src[myrank], ts->transfer_dst[myrank],
|
|
||||||
node, PACK, ts->VarList1, ts->VarList2, ts->Symmetry);
|
|
||||||
Parallel::data_packer(local_data, ts->transfer_src[node], ts->transfer_dst[node],
|
|
||||||
node, UNPACK, ts->VarList1, ts->VarList2, ts->Symmetry);
|
|
||||||
delete[] local_data;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// send from this cpu to cpu#node
|
|
||||||
if ((length = Parallel::data_packer(0, ts->transfer_src[myrank], ts->transfer_dst[myrank],
|
|
||||||
node, PACK, ts->VarList1, ts->VarList2, ts->Symmetry)))
|
|
||||||
{
|
|
||||||
ts->send_data[node] = new double[length];
|
|
||||||
if (!ts->send_data[node])
|
|
||||||
{
|
|
||||||
cout << "out of memory in transfer_begin, send" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
Parallel::data_packer(ts->send_data[node], ts->transfer_src[myrank], ts->transfer_dst[myrank],
|
|
||||||
node, PACK, ts->VarList1, ts->VarList2, ts->Symmetry);
|
|
||||||
MPI_Isend((void *)ts->send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD,
|
|
||||||
ts->reqs + ts->req_no++);
|
|
||||||
}
|
|
||||||
// receive from cpu#node to this cpu
|
|
||||||
if ((length = Parallel::data_packer(0, ts->transfer_src[node], ts->transfer_dst[node],
|
|
||||||
node, UNPACK, ts->VarList1, ts->VarList2, ts->Symmetry)))
|
|
||||||
{
|
|
||||||
ts->rec_data[node] = new double[length];
|
|
||||||
if (!ts->rec_data[node])
|
|
||||||
{
|
|
||||||
cout << "out of memory in transfer_begin, recv" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
MPI_Irecv((void *)ts->rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD,
|
|
||||||
ts->reqs + ts->req_no++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// NOTE: MPI_Waitall is NOT called here - that happens in transfer_end
|
|
||||||
}
|
|
||||||
//
|
|
||||||
static void transfer_end(Parallel::TransferState *ts)
|
|
||||||
{
|
|
||||||
// Wait for all pending MPI operations
|
|
||||||
MPI_Waitall(ts->req_no, ts->reqs, ts->stats);
|
|
||||||
|
|
||||||
// Unpack received data from remote ranks
|
|
||||||
for (int node = 0; node < ts->cpusize; node++)
|
|
||||||
if (ts->rec_data[node])
|
|
||||||
Parallel::data_packer(ts->rec_data[node], ts->transfer_src[node], ts->transfer_dst[node],
|
|
||||||
node, UNPACK, ts->VarList1, ts->VarList2, ts->Symmetry);
|
|
||||||
|
|
||||||
// Cleanup MPI buffers
|
|
||||||
for (int node = 0; node < ts->cpusize; node++)
|
|
||||||
{
|
|
||||||
if (ts->send_data[node])
|
|
||||||
delete[] ts->send_data[node];
|
|
||||||
if (ts->rec_data[node])
|
|
||||||
delete[] ts->rec_data[node];
|
|
||||||
}
|
|
||||||
delete[] ts->reqs;
|
|
||||||
delete[] ts->stats;
|
|
||||||
delete[] ts->send_data;
|
|
||||||
delete[] ts->rec_data;
|
|
||||||
}
|
|
||||||
//
|
|
||||||
Parallel::SyncHandle *Parallel::SyncBegin(Patch *Pat, MyList<var> *VarList, int Symmetry)
|
|
||||||
{
|
|
||||||
int cpusize;
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
|
||||||
|
|
||||||
SyncHandle *handle = new SyncHandle;
|
|
||||||
handle->num_states = 1;
|
|
||||||
handle->states = new TransferState[1];
|
|
||||||
|
|
||||||
TransferState *ts = &handle->states[0];
|
|
||||||
ts->cpusize = cpusize;
|
|
||||||
ts->VarList1 = VarList;
|
|
||||||
ts->VarList2 = VarList;
|
|
||||||
ts->Symmetry = Symmetry;
|
|
||||||
ts->owns_gsl = true;
|
|
||||||
|
|
||||||
ts->dst = build_ghost_gsl(Pat);
|
|
||||||
ts->src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
ts->transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
ts->transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
ts->src[node] = build_owned_gsl0(Pat, node);
|
|
||||||
build_gstl(ts->src[node], ts->dst, &ts->transfer_src[node], &ts->transfer_dst[node]);
|
|
||||||
}
|
|
||||||
|
|
||||||
transfer_begin(ts);
|
|
||||||
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
//
|
|
||||||
Parallel::SyncHandle *Parallel::SyncBegin(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry)
|
|
||||||
{
|
|
||||||
int cpusize;
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
|
||||||
|
|
||||||
// Count patches
|
|
||||||
int num_patches = 0;
|
|
||||||
MyList<Patch> *Pp = PatL;
|
|
||||||
while (Pp) { num_patches++; Pp = Pp->next; }
|
|
||||||
|
|
||||||
SyncHandle *handle = new SyncHandle;
|
|
||||||
handle->num_states = num_patches + 1; // intra-patch transfers + 1 inter-patch transfer
|
|
||||||
handle->states = new TransferState[handle->num_states];
|
|
||||||
|
|
||||||
// Intra-patch sync: for each patch, build ghost lists and initiate transfer
|
|
||||||
int idx = 0;
|
|
||||||
Pp = PatL;
|
|
||||||
while (Pp)
|
|
||||||
{
|
|
||||||
TransferState *ts = &handle->states[idx];
|
|
||||||
ts->cpusize = cpusize;
|
|
||||||
ts->VarList1 = VarList;
|
|
||||||
ts->VarList2 = VarList;
|
|
||||||
ts->Symmetry = Symmetry;
|
|
||||||
ts->owns_gsl = true;
|
|
||||||
|
|
||||||
ts->dst = build_ghost_gsl(Pp->data);
|
|
||||||
ts->src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
ts->transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
ts->transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
ts->src[node] = build_owned_gsl0(Pp->data, node);
|
|
||||||
build_gstl(ts->src[node], ts->dst, &ts->transfer_src[node], &ts->transfer_dst[node]);
|
|
||||||
}
|
|
||||||
|
|
||||||
transfer_begin(ts);
|
|
||||||
|
|
||||||
idx++;
|
|
||||||
Pp = Pp->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Inter-patch sync: buffer zone exchange between patches
|
|
||||||
{
|
|
||||||
TransferState *ts = &handle->states[idx];
|
|
||||||
ts->cpusize = cpusize;
|
|
||||||
ts->VarList1 = VarList;
|
|
||||||
ts->VarList2 = VarList;
|
|
||||||
ts->Symmetry = Symmetry;
|
|
||||||
ts->owns_gsl = true;
|
|
||||||
|
|
||||||
ts->dst = build_buffer_gsl(PatL);
|
|
||||||
ts->src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
ts->transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
ts->transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
ts->src[node] = build_owned_gsl(PatL, node, 5, Symmetry);
|
|
||||||
build_gstl(ts->src[node], ts->dst, &ts->transfer_src[node], &ts->transfer_dst[node]);
|
|
||||||
}
|
|
||||||
|
|
||||||
transfer_begin(ts);
|
|
||||||
}
|
|
||||||
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
//
|
|
||||||
void Parallel::SyncEnd(SyncHandle *handle)
|
|
||||||
{
|
|
||||||
if (!handle)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// Wait for all pending transfers and unpack
|
|
||||||
for (int i = 0; i < handle->num_states; i++)
|
|
||||||
{
|
|
||||||
TransferState *ts = &handle->states[i];
|
|
||||||
transfer_end(ts);
|
|
||||||
|
|
||||||
// Cleanup grid segment lists only if this state owns them
|
|
||||||
if (ts->owns_gsl)
|
|
||||||
{
|
|
||||||
if (ts->dst)
|
|
||||||
ts->dst->destroyList();
|
|
||||||
for (int node = 0; node < ts->cpusize; node++)
|
|
||||||
{
|
|
||||||
if (ts->src[node])
|
|
||||||
ts->src[node]->destroyList();
|
|
||||||
if (ts->transfer_src[node])
|
|
||||||
ts->transfer_src[node]->destroyList();
|
|
||||||
if (ts->transfer_dst[node])
|
|
||||||
ts->transfer_dst[node]->destroyList();
|
|
||||||
}
|
|
||||||
delete[] ts->src;
|
|
||||||
delete[] ts->transfer_src;
|
|
||||||
delete[] ts->transfer_dst;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
delete[] handle->states;
|
|
||||||
delete handle;
|
|
||||||
}
|
|
||||||
//
|
|
||||||
// SyncPreparePlan: Pre-build grid segment lists for a patch list.
|
|
||||||
// The plan can be reused across multiple SyncBeginWithPlan calls
|
|
||||||
// as long as the mesh topology does not change (no regridding).
|
|
||||||
//
|
|
||||||
Parallel::SyncPlan *Parallel::SyncPreparePlan(MyList<Patch> *PatL, int Symmetry)
|
|
||||||
{
|
|
||||||
int cpusize;
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
|
||||||
|
|
||||||
// Count patches
|
|
||||||
int num_patches = 0;
|
|
||||||
MyList<Patch> *Pp = PatL;
|
|
||||||
while (Pp) { num_patches++; Pp = Pp->next; }
|
|
||||||
|
|
||||||
SyncPlan *plan = new SyncPlan;
|
|
||||||
plan->num_entries = num_patches + 1; // intra-patch + 1 inter-patch
|
|
||||||
plan->Symmetry = Symmetry;
|
|
||||||
plan->entries = new SyncPlanEntry[plan->num_entries];
|
|
||||||
|
|
||||||
// Intra-patch entries: ghost zone exchange within each patch
|
|
||||||
int idx = 0;
|
|
||||||
Pp = PatL;
|
|
||||||
while (Pp)
|
|
||||||
{
|
|
||||||
SyncPlanEntry *pe = &plan->entries[idx];
|
|
||||||
pe->cpusize = cpusize;
|
|
||||||
pe->dst = build_ghost_gsl(Pp->data);
|
|
||||||
pe->src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
pe->transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
pe->transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
pe->src[node] = build_owned_gsl0(Pp->data, node);
|
|
||||||
build_gstl(pe->src[node], pe->dst, &pe->transfer_src[node], &pe->transfer_dst[node]);
|
|
||||||
}
|
|
||||||
idx++;
|
|
||||||
Pp = Pp->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Inter-patch entry: buffer zone exchange between patches
|
|
||||||
{
|
|
||||||
SyncPlanEntry *pe = &plan->entries[idx];
|
|
||||||
pe->cpusize = cpusize;
|
|
||||||
pe->dst = build_buffer_gsl(PatL);
|
|
||||||
pe->src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
pe->transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
pe->transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
pe->src[node] = build_owned_gsl(PatL, node, 5, Symmetry);
|
|
||||||
build_gstl(pe->src[node], pe->dst, &pe->transfer_src[node], &pe->transfer_dst[node]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return plan;
|
|
||||||
}
|
|
||||||
//
|
|
||||||
void Parallel::SyncFreePlan(SyncPlan *plan)
|
|
||||||
{
|
|
||||||
if (!plan)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (int i = 0; i < plan->num_entries; i++)
|
|
||||||
{
|
|
||||||
SyncPlanEntry *pe = &plan->entries[i];
|
|
||||||
if (pe->dst)
|
|
||||||
pe->dst->destroyList();
|
|
||||||
for (int node = 0; node < pe->cpusize; node++)
|
|
||||||
{
|
|
||||||
if (pe->src[node])
|
|
||||||
pe->src[node]->destroyList();
|
|
||||||
if (pe->transfer_src[node])
|
|
||||||
pe->transfer_src[node]->destroyList();
|
|
||||||
if (pe->transfer_dst[node])
|
|
||||||
pe->transfer_dst[node]->destroyList();
|
|
||||||
}
|
|
||||||
delete[] pe->src;
|
|
||||||
delete[] pe->transfer_src;
|
|
||||||
delete[] pe->transfer_dst;
|
|
||||||
}
|
|
||||||
delete[] plan->entries;
|
|
||||||
delete plan;
|
|
||||||
}
|
|
||||||
//
|
|
||||||
// SyncBeginWithPlan: Use pre-built GSLs from a SyncPlan to initiate async transfer.
|
|
||||||
// This avoids the O(cpusize * blocks^2) cost of rebuilding GSLs on every call.
|
|
||||||
//
|
|
||||||
Parallel::SyncHandle *Parallel::SyncBeginWithPlan(SyncPlan *plan, MyList<var> *VarList)
|
|
||||||
{
|
|
||||||
return SyncBeginWithPlan(plan, VarList, VarList);
|
|
||||||
}
|
|
||||||
//
|
|
||||||
Parallel::SyncHandle *Parallel::SyncBeginWithPlan(SyncPlan *plan, MyList<var> *VarList1, MyList<var> *VarList2)
|
|
||||||
{
|
|
||||||
SyncHandle *handle = new SyncHandle;
|
|
||||||
handle->num_states = plan->num_entries;
|
|
||||||
handle->states = new TransferState[handle->num_states];
|
|
||||||
|
|
||||||
for (int i = 0; i < plan->num_entries; i++)
|
|
||||||
{
|
|
||||||
SyncPlanEntry *pe = &plan->entries[i];
|
|
||||||
TransferState *ts = &handle->states[i];
|
|
||||||
|
|
||||||
ts->cpusize = pe->cpusize;
|
|
||||||
ts->VarList1 = VarList1;
|
|
||||||
ts->VarList2 = VarList2;
|
|
||||||
ts->Symmetry = plan->Symmetry;
|
|
||||||
ts->owns_gsl = false; // GSLs are owned by the plan, not this handle
|
|
||||||
|
|
||||||
// Borrow GSL pointers from the plan (do NOT free them in SyncEnd)
|
|
||||||
ts->transfer_src = pe->transfer_src;
|
|
||||||
ts->transfer_dst = pe->transfer_dst;
|
|
||||||
ts->src = pe->src;
|
|
||||||
ts->dst = pe->dst;
|
|
||||||
|
|
||||||
transfer_begin(ts);
|
|
||||||
}
|
|
||||||
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
// collect buffer grid segments or blocks for the periodic boundary condition of given patch
|
// collect buffer grid segments or blocks for the periodic boundary condition of given patch
|
||||||
// ---------------------------------------------------
|
// ---------------------------------------------------
|
||||||
// |con | |con |
|
// |con | |con |
|
||||||
|
|||||||
@@ -81,53 +81,6 @@ namespace Parallel
|
|||||||
int Symmetry);
|
int Symmetry);
|
||||||
void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||||
void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
||||||
|
|
||||||
// Async Sync: overlap MPI communication with computation
|
|
||||||
struct TransferState
|
|
||||||
{
|
|
||||||
MPI_Request *reqs;
|
|
||||||
MPI_Status *stats;
|
|
||||||
int req_no;
|
|
||||||
double **send_data;
|
|
||||||
double **rec_data;
|
|
||||||
int cpusize;
|
|
||||||
MyList<gridseg> **transfer_src;
|
|
||||||
MyList<gridseg> **transfer_dst;
|
|
||||||
MyList<gridseg> **src;
|
|
||||||
MyList<gridseg> *dst;
|
|
||||||
MyList<var> *VarList1;
|
|
||||||
MyList<var> *VarList2;
|
|
||||||
int Symmetry;
|
|
||||||
bool owns_gsl; // true if this state owns and should free the GSLs
|
|
||||||
};
|
|
||||||
struct SyncHandle
|
|
||||||
{
|
|
||||||
TransferState *states;
|
|
||||||
int num_states;
|
|
||||||
};
|
|
||||||
SyncHandle *SyncBegin(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
|
||||||
SyncHandle *SyncBegin(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
|
||||||
void SyncEnd(SyncHandle *handle);
|
|
||||||
|
|
||||||
// Cached GSL plan: pre-build grid segment lists once, reuse across multiple Sync calls
|
|
||||||
struct SyncPlanEntry
|
|
||||||
{
|
|
||||||
int cpusize;
|
|
||||||
MyList<gridseg> **transfer_src;
|
|
||||||
MyList<gridseg> **transfer_dst;
|
|
||||||
MyList<gridseg> **src;
|
|
||||||
MyList<gridseg> *dst;
|
|
||||||
};
|
|
||||||
struct SyncPlan
|
|
||||||
{
|
|
||||||
SyncPlanEntry *entries;
|
|
||||||
int num_entries;
|
|
||||||
int Symmetry;
|
|
||||||
};
|
|
||||||
SyncPlan *SyncPreparePlan(MyList<Patch> *PatL, int Symmetry);
|
|
||||||
void SyncFreePlan(SyncPlan *plan);
|
|
||||||
SyncHandle *SyncBeginWithPlan(SyncPlan *plan, MyList<var> *VarList);
|
|
||||||
SyncHandle *SyncBeginWithPlan(SyncPlan *plan, MyList<var> *VarList1, MyList<var> *VarList2);
|
|
||||||
void OutBdLow2Hi(Patch *Patc, Patch *Patf,
|
void OutBdLow2Hi(Patch *Patc, Patch *Patf,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry);
|
int Symmetry);
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,7 @@
|
|||||||
|
|
||||||
#ifndef TWO_PUNCTURES_H
|
#ifndef TWO_PUNCTURES_H
|
||||||
#define TWO_PUNCTURES_H
|
#define TWO_PUNCTURES_H
|
||||||
|
|
||||||
#include <omp.h>
|
|
||||||
|
|
||||||
#define StencilSize 19
|
#define StencilSize 19
|
||||||
#define N_PlaneRelax 1
|
#define N_PlaneRelax 1
|
||||||
#define NRELAX 200
|
#define NRELAX 200
|
||||||
@@ -33,7 +32,7 @@ private:
|
|||||||
int npoints_A, npoints_B, npoints_phi;
|
int npoints_A, npoints_B, npoints_phi;
|
||||||
|
|
||||||
double target_M_plus, target_M_minus;
|
double target_M_plus, target_M_minus;
|
||||||
|
|
||||||
double admMass;
|
double admMass;
|
||||||
|
|
||||||
double adm_tol;
|
double adm_tol;
|
||||||
@@ -43,17 +42,32 @@ private:
|
|||||||
|
|
||||||
int ntotal;
|
int ntotal;
|
||||||
|
|
||||||
// ===== Precomputed spectral derivative matrices =====
|
// Pre-allocated workspace buffers for hot-path allocation elimination
|
||||||
double *D1_A, *D2_A;
|
// LineRelax_be workspace (sized for n2)
|
||||||
double *D1_B, *D2_B;
|
double *ws_diag_be, *ws_e_be, *ws_f_be, *ws_b_be, *ws_x_be;
|
||||||
double *DF1_phi, *DF2_phi;
|
// LineRelax_al workspace (sized for n1)
|
||||||
|
double *ws_diag_al, *ws_e_al, *ws_f_al, *ws_b_al, *ws_x_al;
|
||||||
// ===== Pre-allocated workspace for LineRelax (per-thread) =====
|
// ThomasAlgorithm workspace (sized for max(n1,n2))
|
||||||
int max_threads;
|
double *ws_thomas_y;
|
||||||
double **ws_diag_be, **ws_e_be, **ws_f_be, **ws_b_be, **ws_x_be;
|
// JFD_times_dv workspace (sized for nvar)
|
||||||
double **ws_l_be, **ws_u_be, **ws_d_be, **ws_y_be;
|
double *ws_jfd_values;
|
||||||
double **ws_diag_al, **ws_e_al, **ws_f_al, **ws_b_al, **ws_x_al;
|
derivs ws_jfd_dU, ws_jfd_U;
|
||||||
double **ws_l_al, **ws_u_al, **ws_d_al, **ws_y_al;
|
// chebft_Zeros workspace (sized for max(n1,n2,n3)+1)
|
||||||
|
double *ws_cheb_c;
|
||||||
|
// fourft workspace (sized for max(n1,n2,n3)/2+1 each)
|
||||||
|
double *ws_four_a, *ws_four_b;
|
||||||
|
// Derivatives_AB3 workspace
|
||||||
|
double *ws_deriv_p, *ws_deriv_dp, *ws_deriv_d2p;
|
||||||
|
double *ws_deriv_q, *ws_deriv_dq;
|
||||||
|
double *ws_deriv_r, *ws_deriv_dr;
|
||||||
|
int *ws_deriv_indx;
|
||||||
|
// F_of_v workspace
|
||||||
|
double *ws_fov_sources;
|
||||||
|
double *ws_fov_values;
|
||||||
|
derivs ws_fov_U;
|
||||||
|
// J_times_dv workspace
|
||||||
|
double *ws_jtdv_values;
|
||||||
|
derivs ws_jtdv_dU, ws_jtdv_U;
|
||||||
|
|
||||||
struct parameters
|
struct parameters
|
||||||
{
|
{
|
||||||
@@ -71,28 +85,6 @@ public:
|
|||||||
int Newtonmaxit);
|
int Newtonmaxit);
|
||||||
~TwoPunctures();
|
~TwoPunctures();
|
||||||
|
|
||||||
// 02/07: New/modified methods
|
|
||||||
void allocate_workspace();
|
|
||||||
void free_workspace();
|
|
||||||
void precompute_derivative_matrices();
|
|
||||||
void build_cheb_deriv_matrices(int n, double *D1, double *D2);
|
|
||||||
void build_fourier_deriv_matrices(int N, double *DF1, double *DF2);
|
|
||||||
void Derivatives_AB3_MatMul(int nvar, int n1, int n2, int n3, derivs v);
|
|
||||||
void ThomasAlgorithm_ws(int N, double *b, double *a, double *c, double *x, double *q,
|
|
||||||
double *l, double *u_ws, double *d, double *y);
|
|
||||||
void LineRelax_be_omp(double *dv,
|
|
||||||
int const i, int const k, int const nvar,
|
|
||||||
int const n1, int const n2, int const n3,
|
|
||||||
double const *rhs, int const *ncols, int **cols,
|
|
||||||
double **JFD, int tid);
|
|
||||||
void LineRelax_al_omp(double *dv,
|
|
||||||
int const j, int const k, int const nvar,
|
|
||||||
int const n1, int const n2, int const n3,
|
|
||||||
double const *rhs, int const *ncols,
|
|
||||||
int **cols, double **JFD, int tid);
|
|
||||||
void relax_omp(double *dv, int const nvar, int const n1, int const n2, int const n3,
|
|
||||||
double const *rhs, int const *ncols, int **cols, double **JFD);
|
|
||||||
|
|
||||||
void Solve();
|
void Solve();
|
||||||
void set_initial_guess(derivs v);
|
void set_initial_guess(derivs v);
|
||||||
int index(int i, int j, int k, int l, int a, int b, int c, int d);
|
int index(int i, int j, int k, int l, int a, int b, int c, int d);
|
||||||
@@ -151,11 +143,23 @@ public:
|
|||||||
double BY_KKofxyz(double x, double y, double z);
|
double BY_KKofxyz(double x, double y, double z);
|
||||||
void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix);
|
void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix);
|
||||||
void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u);
|
void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u);
|
||||||
|
void relax(double *dv, int const nvar, int const n1, int const n2, int const n3,
|
||||||
|
double const *rhs, int const *ncols, int **cols, double **JFD);
|
||||||
|
void LineRelax_be(double *dv,
|
||||||
|
int const i, int const k, int const nvar,
|
||||||
|
int const n1, int const n2, int const n3,
|
||||||
|
double const *rhs, int const *ncols, int **cols,
|
||||||
|
double **JFD);
|
||||||
void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2,
|
void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2,
|
||||||
int n3, derivs dv, derivs u, double *values);
|
int n3, derivs dv, derivs u, double *values);
|
||||||
void LinEquations(double A, double B, double X, double R,
|
void LinEquations(double A, double B, double X, double R,
|
||||||
double x, double r, double phi,
|
double x, double r, double phi,
|
||||||
double y, double z, derivs dU, derivs U, double *values);
|
double y, double z, derivs dU, derivs U, double *values);
|
||||||
|
void LineRelax_al(double *dv,
|
||||||
|
int const j, int const k, int const nvar,
|
||||||
|
int const n1, int const n2, int const n3,
|
||||||
|
double const *rhs, int const *ncols,
|
||||||
|
int **cols, double **JFD);
|
||||||
void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q);
|
void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q);
|
||||||
void Save(char *fname);
|
void Save(char *fname);
|
||||||
// provided by Vasileios Paschalidis (vpaschal@illinois.edu)
|
// provided by Vasileios Paschalidis (vpaschal@illinois.edu)
|
||||||
@@ -164,4 +168,4 @@ public:
|
|||||||
void SpecCoef(parameters par, int ivar, double *v, double *cf);
|
void SpecCoef(parameters par, int ivar, double *v, double *cf);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* TWO_PUNCTURES_H */
|
#endif /* TWO_PUNCTURES_H */
|
||||||
|
|||||||
@@ -186,12 +186,6 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
|
||||||
// Pre-build grid segment lists once for this level's patches.
|
|
||||||
// These are reused across predictor + 3 corrector SyncBegin calls,
|
|
||||||
// avoiding O(cpusize * blocks^2) rebuild each time.
|
|
||||||
Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
|
|
||||||
|
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -327,17 +321,13 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
// Start async ghost zone exchange - overlaps with error check and Shell computation
|
// check error information
|
||||||
Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);
|
|
||||||
|
|
||||||
// check error information (overlaps with MPI transfer)
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -485,7 +475,6 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
|
||||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -496,8 +485,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Complete async ghost zone exchange
|
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||||
if (sync_pre) Parallel::SyncEnd(sync_pre);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -705,17 +693,13 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start async ghost zone exchange - overlaps with error check and Shell computation
|
// check error information
|
||||||
Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);
|
|
||||||
|
|
||||||
// check error information (overlaps with MPI transfer)
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -873,7 +857,6 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
|
||||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -885,8 +868,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Complete async ghost zone exchange
|
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||||
if (sync_cor) Parallel::SyncEnd(sync_cor);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -1060,8 +1042,6 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::SyncFreePlan(sync_plan);
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// for constraint preserving boundary (CPBC)
|
// for constraint preserving boundary (CPBC)
|
||||||
@@ -1095,10 +1075,6 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
|
||||||
// Pre-build grid segment lists once for this level's patches.
|
|
||||||
Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
|
|
||||||
|
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -1566,17 +1542,13 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
// Start async ghost zone exchange - overlaps with error check
|
// check error information
|
||||||
Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);
|
|
||||||
|
|
||||||
// check error information (overlaps with MPI transfer)
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
|
||||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -1586,8 +1558,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Complete async ghost zone exchange
|
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||||
if (sync_pre) Parallel::SyncEnd(sync_pre);
|
|
||||||
|
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
{
|
{
|
||||||
@@ -2132,17 +2103,13 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
sPp = sPp->next;
|
sPp = sPp->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Start async ghost zone exchange - overlaps with error check
|
// check error information
|
||||||
Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);
|
|
||||||
|
|
||||||
// check error information (overlaps with MPI transfer)
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
|
||||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -2153,8 +2120,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Complete async ghost zone exchange
|
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||||
if (sync_cor) Parallel::SyncEnd(sync_cor);
|
|
||||||
|
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
{
|
{
|
||||||
@@ -2380,8 +2346,6 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
DG_List->clearList();
|
DG_List->clearList();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::SyncFreePlan(sync_plan);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#undef MRBD
|
#undef MRBD
|
||||||
|
|||||||
@@ -3035,12 +3035,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
|
||||||
// Pre-build grid segment lists once for this level's patches.
|
|
||||||
// These are reused across predictor + 3 corrector SyncBegin calls,
|
|
||||||
// avoiding O(cpusize * blocks^2) rebuild each time.
|
|
||||||
Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
|
|
||||||
|
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -3164,18 +3158,13 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
|
// check error information
|
||||||
// Start async ghost zone exchange - overlaps with error check and Shell computation
|
|
||||||
Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);
|
|
||||||
|
|
||||||
// check error information (overlaps with MPI transfer)
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -3335,7 +3324,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
|
||||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -3346,8 +3334,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Complete async ghost zone exchange
|
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||||
if (sync_pre) Parallel::SyncEnd(sync_pre);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -3541,10 +3528,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start async ghost zone exchange - overlaps with error check and Shell computation
|
// check error information
|
||||||
Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);
|
|
||||||
|
|
||||||
// check error information (overlaps with MPI transfer)
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
@@ -3552,7 +3536,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -3709,7 +3692,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
|
||||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -3722,8 +3704,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Complete async ghost zone exchange
|
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||||
if (sync_cor) Parallel::SyncEnd(sync_cor);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -3914,8 +3895,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::SyncFreePlan(sync_plan);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -4838,12 +4817,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
|
||||||
// Pre-build grid segment lists once for this level's patches.
|
|
||||||
// These are reused across predictor + 3 corrector SyncBegin calls,
|
|
||||||
// avoiding O(cpusize * blocks^2) rebuild each time.
|
|
||||||
Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
|
|
||||||
|
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -4970,17 +4943,13 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation");
|
||||||
|
|
||||||
// Start async ghost zone exchange - overlaps with error check and BH position
|
// check error information
|
||||||
Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);
|
|
||||||
|
|
||||||
// check error information (overlaps with MPI transfer)
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -4992,8 +4961,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
|
||||||
|
|
||||||
// Complete async ghost zone exchange
|
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||||
if (sync_pre) Parallel::SyncEnd(sync_pre);
|
|
||||||
|
|
||||||
#if (MAPBH == 0)
|
#if (MAPBH == 0)
|
||||||
// for black hole position
|
// for black hole position
|
||||||
@@ -5172,17 +5140,13 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check");
|
||||||
|
|
||||||
// Start async ghost zone exchange - overlaps with error check and BH position
|
// check error information
|
||||||
Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);
|
|
||||||
|
|
||||||
// check error information (overlaps with MPI transfer)
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -5196,8 +5160,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
|
||||||
|
|
||||||
// Complete async ghost zone exchange
|
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||||
if (sync_cor) Parallel::SyncEnd(sync_cor);
|
|
||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
|
||||||
|
|
||||||
@@ -5313,8 +5276,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<endl;
|
// if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<endl;
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"complet GH Step");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"complet GH Step");
|
||||||
|
|
||||||
Parallel::SyncFreePlan(sync_plan);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|||||||
@@ -83,6 +83,10 @@
|
|||||||
real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
|
real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
|
||||||
real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
|
real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
|
||||||
|
|
||||||
|
! shared work arrays (memory pool) to avoid repeated allocation in subroutines
|
||||||
|
real*8, dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh_work2 ! for fderivs_fh/fdderivs_fh (ghost_width=2)
|
||||||
|
real*8, dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh_work3 ! for kodis_fh/lopsided_fh (ghost_width=3)
|
||||||
|
|
||||||
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
|
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
|
||||||
real*8 :: dX, dY, dZ, PI
|
real*8 :: dX, dY, dZ, PI
|
||||||
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
|
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
|
||||||
@@ -151,22 +155,22 @@
|
|||||||
gyy = dyy + ONE
|
gyy = dyy + ONE
|
||||||
gzz = dzz + ONE
|
gzz = dzz + ONE
|
||||||
|
|
||||||
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
|
call fderivs_fh(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
|
call fderivs_fh(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
|
call fderivs_fh(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev,fh_work2)
|
||||||
|
|
||||||
div_beta = betaxx + betayy + betazz
|
div_beta = betaxx + betayy + betazz
|
||||||
|
|
||||||
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
call fderivs_fh(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev,fh_work2)
|
||||||
|
|
||||||
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
|
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
|
||||||
|
|
||||||
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
call fderivs_fh(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
|
call fderivs_fh(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
|
call fderivs_fh(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
call fderivs_fh(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
|
call fderivs_fh(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
call fderivs_fh(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev,fh_work2)
|
||||||
|
|
||||||
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
|
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
|
||||||
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
|
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
|
||||||
@@ -284,8 +288,8 @@
|
|||||||
(gupyy * gupzz + gupyz * gupyz)* Ayz
|
(gupyy * gupzz + gupyz * gupyz)* Ayz
|
||||||
|
|
||||||
! Right hand side for Gam^i without shift terms...
|
! Right hand side for Gam^i without shift terms...
|
||||||
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
call fderivs_fh(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
call fderivs_fh(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev,fh_work2)
|
||||||
|
|
||||||
Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
|
Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
|
||||||
TWO * alpn1 * ( &
|
TWO * alpn1 * ( &
|
||||||
@@ -314,12 +318,12 @@
|
|||||||
Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
|
Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
|
||||||
TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
|
TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
|
||||||
|
|
||||||
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
|
call fdderivs_fh(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
|
||||||
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
|
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev,fh_work2)
|
||||||
call fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,&
|
call fdderivs_fh(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,&
|
||||||
X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
|
X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev,fh_work2)
|
||||||
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
|
call fdderivs_fh(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
|
||||||
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
|
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev,fh_work2)
|
||||||
|
|
||||||
fxx = gxxx + gxyy + gxzz
|
fxx = gxxx + gxyy + gxzz
|
||||||
fxy = gxyx + gyyy + gyzz
|
fxy = gxyx + gyyy + gyzz
|
||||||
@@ -332,9 +336,9 @@
|
|||||||
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
|
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
|
||||||
TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
|
TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
|
||||||
|
|
||||||
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
|
call fderivs_fh(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
|
call fderivs_fh(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev,fh_work2)
|
||||||
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
|
call fderivs_fh(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev,fh_work2)
|
||||||
|
|
||||||
Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
|
Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
|
||||||
Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
|
Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
|
||||||
@@ -377,27 +381,27 @@
|
|||||||
gzzz = gxz * Gamxzz + gyz * Gamyzz + gzz * Gamzzz
|
gzzz = gxz * Gamxzz + gyz * Gamyzz + gzz * Gamzzz
|
||||||
|
|
||||||
!compute Ricci tensor for tilted metric
|
!compute Ricci tensor for tilted metric
|
||||||
call fdderivs(ex,dxx,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev)
|
call fdderivs_fh(ex,dxx,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev,fh_work2)
|
||||||
Rxx = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
Rxx = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||||
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
||||||
|
|
||||||
call fdderivs(ex,dyy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev)
|
call fdderivs_fh(ex,dyy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev,fh_work2)
|
||||||
Ryy = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
Ryy = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||||
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
||||||
|
|
||||||
call fdderivs(ex,dzz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev)
|
call fdderivs_fh(ex,dzz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,symmetry,Lev,fh_work2)
|
||||||
Rzz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
Rzz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||||
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
||||||
|
|
||||||
call fdderivs(ex,gxy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI, ANTI,SYM ,symmetry,Lev)
|
call fdderivs_fh(ex,gxy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI, ANTI,SYM ,symmetry,Lev,fh_work2)
|
||||||
Rxy = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
Rxy = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||||
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
||||||
|
|
||||||
call fdderivs(ex,gxz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI ,SYM ,ANTI,symmetry,Lev)
|
call fdderivs_fh(ex,gxz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI ,SYM ,ANTI,symmetry,Lev,fh_work2)
|
||||||
Rxz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
Rxz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||||
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
||||||
|
|
||||||
call fdderivs(ex,gyz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,ANTI ,ANTI,symmetry,Lev)
|
call fdderivs_fh(ex,gyz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,ANTI ,ANTI,symmetry,Lev,fh_work2)
|
||||||
Ryz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
Ryz = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
||||||
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
( gupxy * fxy + gupxz * fxz + gupyz * fyz ) * TWO
|
||||||
|
|
||||||
@@ -602,7 +606,7 @@
|
|||||||
Gamxzz * gxzy + Gamyzz * gyzy + Gamzzz * gzzy + &
|
Gamxzz * gxzy + Gamyzz * gyzy + Gamzzz * gzzy + &
|
||||||
Gamxyz * gzzx + Gamyyz * gzzy + Gamzyz * gzzz )
|
Gamxyz * gzzx + Gamyyz * gzzy + Gamzyz * gzzz )
|
||||||
!covariant second derivative of chi respect to tilted metric
|
!covariant second derivative of chi respect to tilted metric
|
||||||
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
call fdderivs_fh(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
|
||||||
|
|
||||||
fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
|
fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
|
||||||
fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
|
fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
|
||||||
@@ -628,8 +632,8 @@
|
|||||||
Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
|
Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
|
||||||
|
|
||||||
! covariant second derivatives of the lapse respect to physical metric
|
! covariant second derivatives of the lapse respect to physical metric
|
||||||
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
|
call fdderivs_fh(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
|
||||||
SYM,SYM,SYM,symmetry,Lev)
|
SYM,SYM,SYM,symmetry,Lev,fh_work2)
|
||||||
|
|
||||||
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
|
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
|
||||||
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
|
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
|
||||||
@@ -812,7 +816,7 @@
|
|||||||
betay_rhs = FF*dtSfy
|
betay_rhs = FF*dtSfy
|
||||||
betaz_rhs = FF*dtSfz
|
betaz_rhs = FF*dtSfz
|
||||||
|
|
||||||
call fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
call fderivs_fh(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
|
||||||
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
|
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
|
||||||
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
|
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
|
||||||
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-dsqrt(chin1))**2
|
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-dsqrt(chin1))**2
|
||||||
@@ -824,7 +828,7 @@
|
|||||||
betay_rhs = FF*dtSfy
|
betay_rhs = FF*dtSfy
|
||||||
betaz_rhs = FF*dtSfz
|
betaz_rhs = FF*dtSfz
|
||||||
|
|
||||||
call fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
call fderivs_fh(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
|
||||||
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
|
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
|
||||||
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
|
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
|
||||||
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-chin1)**2
|
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-chin1)**2
|
||||||
@@ -832,7 +836,7 @@
|
|||||||
dtSfy_rhs = Gamy_rhs - reta*dtSfy
|
dtSfy_rhs = Gamy_rhs - reta*dtSfy
|
||||||
dtSfz_rhs = Gamz_rhs - reta*dtSfz
|
dtSfz_rhs = Gamz_rhs - reta*dtSfz
|
||||||
#elif (GAUGE == 4)
|
#elif (GAUGE == 4)
|
||||||
call fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
call fderivs_fh(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
|
||||||
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
|
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
|
||||||
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
|
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
|
||||||
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-dsqrt(chin1))**2
|
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-dsqrt(chin1))**2
|
||||||
@@ -844,7 +848,7 @@
|
|||||||
dtSfy_rhs = ZEO
|
dtSfy_rhs = ZEO
|
||||||
dtSfz_rhs = ZEO
|
dtSfz_rhs = ZEO
|
||||||
#elif (GAUGE == 5)
|
#elif (GAUGE == 5)
|
||||||
call fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
call fderivs_fh(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev,fh_work2)
|
||||||
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
|
reta = gupxx * dtSfx_rhs * dtSfx_rhs + gupyy * dtSfy_rhs * dtSfy_rhs + gupzz * dtSfz_rhs * dtSfz_rhs + &
|
||||||
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
|
TWO * (gupxy * dtSfx_rhs * dtSfy_rhs + gupxz * dtSfx_rhs * dtSfz_rhs + gupyz * dtSfy_rhs * dtSfz_rhs)
|
||||||
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-chin1)**2
|
reta = 1.31d0/2*dsqrt(reta/chin1)/(1-chin1)**2
|
||||||
@@ -945,61 +949,104 @@
|
|||||||
SSA(2)=SYM
|
SSA(2)=SYM
|
||||||
SSA(3)=ANTI
|
SSA(3)=ANTI
|
||||||
|
|
||||||
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
|
!!!!!!!!!advection term part
|
||||||
! lopsided_kodis shares the symmetry_bd buffer between advection and
|
|
||||||
! dissipation, eliminating redundant full-grid copies. For metric variables
|
|
||||||
! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
|
|
||||||
! so the constant offset has no effect on dissipation.
|
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_fh(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
call lopsided_fh(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
call lopsided_fh(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_fh(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
call lopsided_fh(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_fh(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_fh(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
call lopsided_fh(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
call lopsided_fh(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_fh(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
call lopsided_fh(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_fh(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_fh(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_fh(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
call lopsided_fh(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
call lopsided_fh(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
call lopsided_fh(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,fh_work3)
|
||||||
|
!!
|
||||||
|
call lopsided_fh(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,fh_work3)
|
||||||
|
|
||||||
|
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
||||||
|
call lopsided_fh(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,fh_work3)
|
||||||
|
call lopsided_fh(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,fh_work3)
|
||||||
|
call lopsided_fh(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,fh_work3)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
||||||
|
call lopsided_fh(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,fh_work3)
|
||||||
|
call lopsided_fh(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,fh_work3)
|
||||||
|
call lopsided_fh(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,fh_work3)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(eps>0)then
|
||||||
|
! usual Kreiss-Oliger dissipation
|
||||||
|
call kodis_fh(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
|
#if 0
|
||||||
|
#define i 42
|
||||||
|
#define j 40
|
||||||
|
#define k 40
|
||||||
|
if(Lev == 1)then
|
||||||
|
write(*,*) X(i),Y(j),Z(k)
|
||||||
|
write(*,*) "before",Axx_rhs(i,j,k)
|
||||||
|
endif
|
||||||
|
#undef i
|
||||||
|
#undef j
|
||||||
|
#undef k
|
||||||
|
!!stop
|
||||||
|
#endif
|
||||||
|
call kodis_fh(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
|
#if 0
|
||||||
|
#define i 42
|
||||||
|
#define j 40
|
||||||
|
#define k 40
|
||||||
|
if(Lev == 1)then
|
||||||
|
write(*,*) X(i),Y(j),Z(k)
|
||||||
|
write(*,*) "after",Axx_rhs(i,j,k)
|
||||||
|
endif
|
||||||
|
#undef i
|
||||||
|
#undef j
|
||||||
|
#undef k
|
||||||
|
!!stop
|
||||||
|
#endif
|
||||||
|
call kodis_fh(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps,fh_work3)
|
||||||
|
call kodis_fh(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps,fh_work3)
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
!! bam does not apply dissipation on gauge variables
|
!! bam does not apply dissipation on gauge variables
|
||||||
call lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call kodis_fh(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps,fh_work3)
|
||||||
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
call kodis_fh(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
call kodis_fh(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
call kodis_fh(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
|
||||||
#endif
|
|
||||||
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
||||||
call lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
call kodis_fh(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
call kodis_fh(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps,fh_work3)
|
||||||
call lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
call kodis_fh(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps,fh_work3)
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
! No dissipation on gauge variables (advection only)
|
|
||||||
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
|
|
||||||
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
|
||||||
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
|
|
||||||
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
|
|
||||||
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
|
|
||||||
#endif
|
|
||||||
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
|
||||||
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
|
|
||||||
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
|
|
||||||
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
if(co == 0)then
|
if(co == 0)then
|
||||||
! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho
|
! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho
|
||||||
! here trR is respect to physical metric
|
! here trR is respect to physical metric
|
||||||
@@ -1035,12 +1082,12 @@
|
|||||||
|
|
||||||
! mov_Res_j = gupkj*(-1/chi d_k chi*A_ij + D_k A_ij) - 2/3 d_j trK - 8 PI s_j where D respect to physical metric
|
! mov_Res_j = gupkj*(-1/chi d_k chi*A_ij + D_k A_ij) - 2/3 d_j trK - 8 PI s_j where D respect to physical metric
|
||||||
! store D_i A_jk - 1/chi d_i chi*A_jk in gjk_i
|
! store D_i A_jk - 1/chi d_i chi*A_jk in gjk_i
|
||||||
call fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
|
call fderivs_fh(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0,fh_work2)
|
||||||
call fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0)
|
call fderivs_fh(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0,fh_work2)
|
||||||
call fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0)
|
call fderivs_fh(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0,fh_work2)
|
||||||
call fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
|
call fderivs_fh(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0,fh_work2)
|
||||||
call fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0)
|
call fderivs_fh(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0,fh_work2)
|
||||||
call fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)
|
call fderivs_fh(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0,fh_work2)
|
||||||
|
|
||||||
gxxx = gxxx - ( Gamxxx * Axx + Gamyxx * Axy + Gamzxx * Axz &
|
gxxx = gxxx - ( Gamxxx * Axx + Gamyxx * Axy + Gamzxx * Axz &
|
||||||
+ Gamxxx * Axx + Gamyxx * Axy + Gamzxx * Axz) - chix*Axx/chin1
|
+ Gamxxx * Axx + Gamyxx * Axy + Gamzxx * Axz) - chix*Axx/chin1
|
||||||
|
|||||||
@@ -1103,6 +1103,103 @@
|
|||||||
|
|
||||||
end subroutine fderivs
|
end subroutine fderivs
|
||||||
!-----------------------------------------------------------------------------
|
!-----------------------------------------------------------------------------
|
||||||
|
! fderivs variant: reuses caller-provided fh work array (memory pool)
|
||||||
|
!-----------------------------------------------------------------------------
|
||||||
|
subroutine fderivs_fh(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3, &
|
||||||
|
symmetry,onoff,fh)
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer, intent(in ):: ex(1:3),symmetry,onoff
|
||||||
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(in ):: f
|
||||||
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(out):: fx,fy,fz
|
||||||
|
real*8, intent(in) :: X(ex(1)),Y(ex(2)),Z(ex(3))
|
||||||
|
real*8, intent(in ):: SYM1,SYM2,SYM3
|
||||||
|
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)),intent(inout):: fh
|
||||||
|
|
||||||
|
real*8 :: dX,dY,dZ
|
||||||
|
real*8, dimension(3) :: SoA
|
||||||
|
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||||
|
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
|
||||||
|
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||||
|
real*8, parameter :: ZEO=0.d0,ONE=1.d0
|
||||||
|
real*8, parameter :: TWO=2.d0,EIT=8.d0
|
||||||
|
real*8, parameter :: F12=1.2d1
|
||||||
|
|
||||||
|
dX = X(2)-X(1)
|
||||||
|
dY = Y(2)-Y(1)
|
||||||
|
dZ = Z(2)-Z(1)
|
||||||
|
|
||||||
|
imax = ex(1)
|
||||||
|
jmax = ex(2)
|
||||||
|
kmax = ex(3)
|
||||||
|
|
||||||
|
imin = 1
|
||||||
|
jmin = 1
|
||||||
|
kmin = 1
|
||||||
|
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
|
||||||
|
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
|
||||||
|
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
|
||||||
|
|
||||||
|
SoA(1) = SYM1
|
||||||
|
SoA(2) = SYM2
|
||||||
|
SoA(3) = SYM3
|
||||||
|
|
||||||
|
call symmetry_bd(2,ex,f,fh,SoA)
|
||||||
|
|
||||||
|
d12dx = ONE/F12/dX
|
||||||
|
d12dy = ONE/F12/dY
|
||||||
|
d12dz = ONE/F12/dZ
|
||||||
|
|
||||||
|
d2dx = ONE/TWO/dX
|
||||||
|
d2dy = ONE/TWO/dY
|
||||||
|
d2dz = ONE/TWO/dZ
|
||||||
|
|
||||||
|
fx = ZEO
|
||||||
|
fy = ZEO
|
||||||
|
fz = ZEO
|
||||||
|
|
||||||
|
do k=1,ex(3)-1
|
||||||
|
do j=1,ex(2)-1
|
||||||
|
do i=1,ex(1)-1
|
||||||
|
#if 0
|
||||||
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
|
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||||
|
elseif(i+1 <= imax .and. i-1 >= imin)then
|
||||||
|
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
|
||||||
|
endif
|
||||||
|
if(j+2 <= jmax .and. j-2 >= jmin)then
|
||||||
|
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||||
|
elseif(j+1 <= jmax .and. j-1 >= jmin)then
|
||||||
|
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
|
||||||
|
endif
|
||||||
|
if(k+2 <= kmax .and. k-2 >= kmin)then
|
||||||
|
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||||
|
elseif(k+1 <= kmax .and. k-1 >= kmin)then
|
||||||
|
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
|
||||||
|
endif
|
||||||
|
#else
|
||||||
|
if(i+2 <= imax .and. i-2 >= imin .and. &
|
||||||
|
j+2 <= jmax .and. j-2 >= jmin .and. &
|
||||||
|
k+2 <= kmax .and. k-2 >= kmin) then
|
||||||
|
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||||
|
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||||
|
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||||
|
elseif(i+1 <= imax .and. i-1 >= imin .and. &
|
||||||
|
j+1 <= jmax .and. j-1 >= jmin .and. &
|
||||||
|
k+1 <= kmax .and. k-1 >= kmin) then
|
||||||
|
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
|
||||||
|
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
|
||||||
|
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
|
||||||
|
endif
|
||||||
|
#endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine fderivs_fh
|
||||||
|
!-----------------------------------------------------------------------------
|
||||||
!
|
!
|
||||||
! single derivatives dx
|
! single derivatives dx
|
||||||
!
|
!
|
||||||
@@ -1940,6 +2037,162 @@
|
|||||||
|
|
||||||
end subroutine fddyz
|
end subroutine fddyz
|
||||||
|
|
||||||
|
!-----------------------------------------------------------------------------
|
||||||
|
! fdderivs variant: reuses caller-provided fh work array (memory pool)
|
||||||
|
!-----------------------------------------------------------------------------
|
||||||
|
subroutine fdderivs_fh(ex,f,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
|
||||||
|
SYM1,SYM2,SYM3,symmetry,onoff,fh)
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer, intent(in ):: ex(1:3),symmetry,onoff
|
||||||
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ):: f
|
||||||
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out):: fxx,fxy,fxz,fyy,fyz,fzz
|
||||||
|
real*8, intent(in ):: X(ex(1)),Y(ex(2)),Z(ex(3))
|
||||||
|
real*8, intent(in ):: SYM1,SYM2,SYM3
|
||||||
|
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)),intent(inout):: fh
|
||||||
|
|
||||||
|
real*8 :: dX,dY,dZ
|
||||||
|
real*8, dimension(3) :: SoA
|
||||||
|
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||||
|
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
|
||||||
|
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
|
||||||
|
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||||
|
real*8, parameter :: ZEO=0.d0, ONE=1.d0, TWO=2.d0, F1o4=2.5d-1
|
||||||
|
real*8, parameter :: F8=8.d0, F16=1.6d1, F30=3.d1
|
||||||
|
real*8, parameter :: F1o12=ONE/1.2d1, F1o144=ONE/1.44d2
|
||||||
|
|
||||||
|
dX = X(2)-X(1)
|
||||||
|
dY = Y(2)-Y(1)
|
||||||
|
dZ = Z(2)-Z(1)
|
||||||
|
|
||||||
|
imax = ex(1)
|
||||||
|
jmax = ex(2)
|
||||||
|
kmax = ex(3)
|
||||||
|
|
||||||
|
imin = 1
|
||||||
|
jmin = 1
|
||||||
|
kmin = 1
|
||||||
|
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
|
||||||
|
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
|
||||||
|
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
|
||||||
|
|
||||||
|
SoA(1) = SYM1
|
||||||
|
SoA(2) = SYM2
|
||||||
|
SoA(3) = SYM3
|
||||||
|
|
||||||
|
call symmetry_bd(2,ex,f,fh,SoA)
|
||||||
|
|
||||||
|
Sdxdx = ONE /( dX * dX )
|
||||||
|
Sdydy = ONE /( dY * dY )
|
||||||
|
Sdzdz = ONE /( dZ * dZ )
|
||||||
|
|
||||||
|
Fdxdx = F1o12 /( dX * dX )
|
||||||
|
Fdydy = F1o12 /( dY * dY )
|
||||||
|
Fdzdz = F1o12 /( dZ * dZ )
|
||||||
|
|
||||||
|
Sdxdy = F1o4 /( dX * dY )
|
||||||
|
Sdxdz = F1o4 /( dX * dZ )
|
||||||
|
Sdydz = F1o4 /( dY * dZ )
|
||||||
|
|
||||||
|
Fdxdy = F1o144 /( dX * dY )
|
||||||
|
Fdxdz = F1o144 /( dX * dZ )
|
||||||
|
Fdydz = F1o144 /( dY * dZ )
|
||||||
|
|
||||||
|
fxx = ZEO
|
||||||
|
fyy = ZEO
|
||||||
|
fzz = ZEO
|
||||||
|
fxy = ZEO
|
||||||
|
fxz = ZEO
|
||||||
|
fyz = ZEO
|
||||||
|
|
||||||
|
do k=1,ex(3)-1
|
||||||
|
do j=1,ex(2)-1
|
||||||
|
do i=1,ex(1)-1
|
||||||
|
#if 0
|
||||||
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
|
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
|
||||||
|
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
|
||||||
|
elseif(i+1 <= imax .and. i-1 >= imin)then
|
||||||
|
fxx(i,j,k) = Sdxdx*(fh(i-1,j,k)-TWO*fh(i,j,k)+fh(i+1,j,k))
|
||||||
|
endif
|
||||||
|
if(j+2 <= jmax .and. j-2 >= jmin)then
|
||||||
|
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
|
||||||
|
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
|
||||||
|
elseif(j+1 <= jmax .and. j-1 >= jmin)then
|
||||||
|
fyy(i,j,k) = Sdydy*(fh(i,j-1,k)-TWO*fh(i,j,k)+fh(i,j+1,k))
|
||||||
|
endif
|
||||||
|
if(k+2 <= kmax .and. k-2 >= kmin)then
|
||||||
|
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
|
||||||
|
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
|
||||||
|
elseif(k+1 <= kmax .and. k-1 >= kmin)then
|
||||||
|
fzz(i,j,k) = Sdzdz*(fh(i,j,k-1)-TWO*fh(i,j,k)+fh(i,j,k+1))
|
||||||
|
endif
|
||||||
|
if(i+2 <= imax .and. i-2 >= imin .and. j+2 <= jmax .and. j-2 >= jmin)then
|
||||||
|
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
|
||||||
|
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
|
||||||
|
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
|
||||||
|
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
|
||||||
|
elseif(i+1 <= imax .and. i-1 >= imin .and. j+1 <= jmax .and. j-1 >= jmin)then
|
||||||
|
fxy(i,j,k) = Sdxdy*(fh(i-1,j-1,k)-fh(i+1,j-1,k)-fh(i-1,j+1,k)+fh(i+1,j+1,k))
|
||||||
|
endif
|
||||||
|
if(i+2 <= imax .and. i-2 >= imin .and. k+2 <= kmax .and. k-2 >= kmin)then
|
||||||
|
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
|
||||||
|
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
|
||||||
|
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
|
||||||
|
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
|
||||||
|
elseif(i+1 <= imax .and. i-1 >= imin .and. k+1 <= kmax .and. k-1 >= kmin)then
|
||||||
|
fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1))
|
||||||
|
endif
|
||||||
|
if(j+2 <= jmax .and. j-2 >= jmin .and. k+2 <= kmax .and. k-2 >= kmin)then
|
||||||
|
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
|
||||||
|
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
|
||||||
|
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
|
||||||
|
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
|
||||||
|
elseif(j+1 <= jmax .and. j-1 >= jmin .and. k+1 <= kmax .and. k-1 >= kmin)then
|
||||||
|
fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1))
|
||||||
|
endif
|
||||||
|
#else
|
||||||
|
! for bam comparison
|
||||||
|
if(i+2 <= imax .and. i-2 >= imin .and. &
|
||||||
|
j+2 <= jmax .and. j-2 >= jmin .and. &
|
||||||
|
k+2 <= kmax .and. k-2 >= kmin) then
|
||||||
|
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
|
||||||
|
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
|
||||||
|
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
|
||||||
|
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
|
||||||
|
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
|
||||||
|
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
|
||||||
|
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
|
||||||
|
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
|
||||||
|
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
|
||||||
|
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
|
||||||
|
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
|
||||||
|
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
|
||||||
|
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
|
||||||
|
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
|
||||||
|
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
|
||||||
|
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
|
||||||
|
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
|
||||||
|
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
|
||||||
|
elseif(i+1 <= imax .and. i-1 >= imin .and. &
|
||||||
|
j+1 <= jmax .and. j-1 >= jmin .and. &
|
||||||
|
k+1 <= kmax .and. k-1 >= kmin) then
|
||||||
|
fxx(i,j,k) = Sdxdx*(fh(i-1,j,k)-TWO*fh(i,j,k)+fh(i+1,j,k))
|
||||||
|
fyy(i,j,k) = Sdydy*(fh(i,j-1,k)-TWO*fh(i,j,k)+fh(i,j+1,k))
|
||||||
|
fzz(i,j,k) = Sdzdz*(fh(i,j,k-1)-TWO*fh(i,j,k)+fh(i,j,k+1))
|
||||||
|
fxy(i,j,k) = Sdxdy*(fh(i-1,j-1,k)-fh(i+1,j-1,k)-fh(i-1,j+1,k)+fh(i+1,j+1,k))
|
||||||
|
fxz(i,j,k) = Sdxdz*(fh(i-1,j,k-1)-fh(i+1,j,k-1)-fh(i-1,j,k+1)+fh(i+1,j,k+1))
|
||||||
|
fyz(i,j,k) = Sdydz*(fh(i,j-1,k-1)-fh(i,j+1,k-1)-fh(i,j-1,k+1)+fh(i,j+1,k+1))
|
||||||
|
endif
|
||||||
|
#endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine fdderivs_fh
|
||||||
|
|
||||||
#elif (ghost_width == 4)
|
#elif (ghost_width == 4)
|
||||||
! sixth order code
|
! sixth order code
|
||||||
|
|
||||||
|
|||||||
@@ -215,6 +215,99 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
|
|||||||
|
|
||||||
end subroutine kodis
|
end subroutine kodis
|
||||||
|
|
||||||
|
!-----------------------------------------------------------------------------
|
||||||
|
! kodis variant: reuses caller-provided fh work array (memory pool)
|
||||||
|
!-----------------------------------------------------------------------------
|
||||||
|
subroutine kodis_fh(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps,fh)
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
! argument variables
|
||||||
|
integer,intent(in) :: Symmetry
|
||||||
|
integer,dimension(3),intent(in)::ex
|
||||||
|
real*8, dimension(1:3), intent(in) :: SoA
|
||||||
|
double precision,intent(in),dimension(ex(1))::X
|
||||||
|
double precision,intent(in),dimension(ex(2))::Y
|
||||||
|
double precision,intent(in),dimension(ex(3))::Z
|
||||||
|
double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
|
||||||
|
double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
|
||||||
|
real*8,intent(in) :: eps
|
||||||
|
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)),intent(inout):: fh
|
||||||
|
! local variables
|
||||||
|
integer :: imin,jmin,kmin,imax,jmax,kmax
|
||||||
|
integer :: i,j,k
|
||||||
|
real*8 :: dX,dY,dZ
|
||||||
|
real*8, parameter :: ONE=1.d0,SIX=6.d0,FIT=1.5d1,TWT=2.d1
|
||||||
|
real*8,parameter::cof=6.4d1 ! 2^6
|
||||||
|
integer, parameter :: NO_SYMM=0, OCTANT=2
|
||||||
|
|
||||||
|
dX = X(2)-X(1)
|
||||||
|
dY = Y(2)-Y(1)
|
||||||
|
dZ = Z(2)-Z(1)
|
||||||
|
|
||||||
|
imax = ex(1)
|
||||||
|
jmax = ex(2)
|
||||||
|
kmax = ex(3)
|
||||||
|
|
||||||
|
imin = 1
|
||||||
|
jmin = 1
|
||||||
|
kmin = 1
|
||||||
|
|
||||||
|
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
|
||||||
|
if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin = -2
|
||||||
|
if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin = -2
|
||||||
|
|
||||||
|
call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
|
|
||||||
|
do k=1,ex(3)
|
||||||
|
do j=1,ex(2)
|
||||||
|
do i=1,ex(1)
|
||||||
|
|
||||||
|
if(i-3 >= imin .and. i+3 <= imax .and. &
|
||||||
|
j-3 >= jmin .and. j+3 <= jmax .and. &
|
||||||
|
k-3 >= kmin .and. k+3 <= kmax) then
|
||||||
|
#if 0
|
||||||
|
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dX/cof * ( &
|
||||||
|
(fh(i-3,j,k)+fh(i+3,j,k)) - &
|
||||||
|
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
|
||||||
|
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
|
||||||
|
TWT* fh(i,j,k) )
|
||||||
|
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dY/cof * ( &
|
||||||
|
(fh(i,j-3,k)+fh(i,j+3,k)) - &
|
||||||
|
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
|
||||||
|
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
|
||||||
|
TWT* fh(i,j,k) )
|
||||||
|
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dZ/cof * ( &
|
||||||
|
(fh(i,j,k-3)+fh(i,j,k+3)) - &
|
||||||
|
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
|
||||||
|
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
|
||||||
|
TWT* fh(i,j,k) )
|
||||||
|
#else
|
||||||
|
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
|
||||||
|
(fh(i-3,j,k)+fh(i+3,j,k)) - &
|
||||||
|
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
|
||||||
|
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
|
||||||
|
TWT* fh(i,j,k) )/dX + &
|
||||||
|
( &
|
||||||
|
(fh(i,j-3,k)+fh(i,j+3,k)) - &
|
||||||
|
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
|
||||||
|
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
|
||||||
|
TWT* fh(i,j,k) )/dY + &
|
||||||
|
( &
|
||||||
|
(fh(i,j,k-3)+fh(i,j,k+3)) - &
|
||||||
|
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
|
||||||
|
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
|
||||||
|
TWT* fh(i,j,k) )/dZ )
|
||||||
|
#endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine kodis_fh
|
||||||
|
|
||||||
#elif (ghost_width == 4)
|
#elif (ghost_width == 4)
|
||||||
! sixth order code
|
! sixth order code
|
||||||
!------------------------------------------------------------------------------------------------------------------------------
|
!------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -488,11 +488,9 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
|
|||||||
end subroutine lopsided
|
end subroutine lopsided
|
||||||
|
|
||||||
!-----------------------------------------------------------------------------
|
!-----------------------------------------------------------------------------
|
||||||
! Combined advection (lopsided) + Kreiss-Oliger dissipation (kodis)
|
! lopsided variant: reuses caller-provided fh work array (memory pool)
|
||||||
! Shares the symmetry_bd buffer fh, eliminating one full-grid copy per call.
|
|
||||||
! Mathematically identical to calling lopsided then kodis separately.
|
|
||||||
!-----------------------------------------------------------------------------
|
!-----------------------------------------------------------------------------
|
||||||
subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
subroutine lopsided_fh(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,fh)
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
!~~~~~~> Input parameters:
|
!~~~~~~> Input parameters:
|
||||||
@@ -503,11 +501,9 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
|
|
||||||
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
|
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
|
||||||
real*8,dimension(3),intent(in) ::SoA
|
real*8,dimension(3),intent(in) ::SoA
|
||||||
real*8,intent(in) :: eps
|
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)),intent(inout):: fh
|
||||||
|
|
||||||
!~~~~~~> local variables:
|
!~~~~~~> local variables:
|
||||||
! note index -2,-1,0, so we have 3 extra points
|
|
||||||
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh
|
|
||||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||||
real*8 :: dX,dY,dZ
|
real*8 :: dX,dY,dZ
|
||||||
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
|
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
|
||||||
@@ -515,9 +511,6 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1
|
real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1
|
||||||
real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0
|
real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0
|
||||||
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||||
! kodis parameters
|
|
||||||
real*8, parameter :: SIX=6.d0,FIT=1.5d1,TWT=2.d1
|
|
||||||
real*8, parameter :: cof=6.4d1 ! 2^6
|
|
||||||
|
|
||||||
dX = X(2)-X(1)
|
dX = X(2)-X(1)
|
||||||
dY = Y(2)-Y(1)
|
dY = Y(2)-Y(1)
|
||||||
@@ -542,16 +535,18 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -2
|
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -2
|
||||||
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -2
|
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -2
|
||||||
|
|
||||||
! Single symmetry_bd call shared by both advection and dissipation
|
|
||||||
call symmetry_bd(3,ex,f,fh,SoA)
|
call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
|
|
||||||
! ---- Advection (lopsided) loop ----
|
! upper bound set ex-1 only for efficiency,
|
||||||
! upper bound set ex-1 only for efficiency,
|
|
||||||
! the loop body will set ex 0 also
|
! the loop body will set ex 0 also
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
! x direction
|
#if 0
|
||||||
|
!! old code - same as original lopsided
|
||||||
|
#else
|
||||||
|
!! new code, 2012dec27, based on bam
|
||||||
|
! x direction
|
||||||
if(Sfx(i,j,k) > ZEO)then
|
if(Sfx(i,j,k) > ZEO)then
|
||||||
if(i+3 <= imax)then
|
if(i+3 <= imax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
@@ -560,7 +555,6 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
elseif(i+2 <= imax)then
|
elseif(i+2 <= imax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||||
|
|
||||||
elseif(i+1 <= imax)then
|
elseif(i+1 <= imax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||||
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
|
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
|
||||||
@@ -574,7 +568,6 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
elseif(i-2 >= imin)then
|
elseif(i-2 >= imin)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||||
|
|
||||||
elseif(i-1 >= imin)then
|
elseif(i-1 >= imin)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
|
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
|
||||||
@@ -582,7 +575,7 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
! y direction
|
! y direction
|
||||||
if(Sfy(i,j,k) > ZEO)then
|
if(Sfy(i,j,k) > ZEO)then
|
||||||
if(j+3 <= jmax)then
|
if(j+3 <= jmax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
@@ -591,7 +584,6 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
elseif(j+2 <= jmax)then
|
elseif(j+2 <= jmax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||||
|
|
||||||
elseif(j+1 <= jmax)then
|
elseif(j+1 <= jmax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
|
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
|
||||||
@@ -605,7 +597,6 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
elseif(j-2 >= jmin)then
|
elseif(j-2 >= jmin)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||||
|
|
||||||
elseif(j-1 >= jmin)then
|
elseif(j-1 >= jmin)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
|
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
|
||||||
@@ -613,7 +604,7 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
! z direction
|
! z direction
|
||||||
if(Sfz(i,j,k) > ZEO)then
|
if(Sfz(i,j,k) > ZEO)then
|
||||||
if(k+3 <= kmax)then
|
if(k+3 <= kmax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
@@ -622,7 +613,6 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
elseif(k+2 <= kmax)then
|
elseif(k+2 <= kmax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||||
|
|
||||||
elseif(k+1 <= kmax)then
|
elseif(k+1 <= kmax)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
|
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
|
||||||
@@ -636,51 +626,20 @@ subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|||||||
elseif(k-2 >= kmin)then
|
elseif(k-2 >= kmin)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||||
|
|
||||||
elseif(k-1 >= kmin)then
|
elseif(k-1 >= kmin)then
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
|
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
|
||||||
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
|
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
#endif
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
! ---- Dissipation (kodis) loop ----
|
|
||||||
if(eps > ZEO) then
|
|
||||||
do k=1,ex(3)
|
|
||||||
do j=1,ex(2)
|
|
||||||
do i=1,ex(1)
|
|
||||||
|
|
||||||
if(i-3 >= imin .and. i+3 <= imax .and. &
|
|
||||||
j-3 >= jmin .and. j+3 <= jmax .and. &
|
|
||||||
k-3 >= kmin .and. k+3 <= kmax) then
|
|
||||||
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
|
|
||||||
(fh(i-3,j,k)+fh(i+3,j,k)) - &
|
|
||||||
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
|
|
||||||
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
|
|
||||||
TWT* fh(i,j,k) )/dX + &
|
|
||||||
( &
|
|
||||||
(fh(i,j-3,k)+fh(i,j+3,k)) - &
|
|
||||||
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
|
|
||||||
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
|
|
||||||
TWT* fh(i,j,k) )/dY + &
|
|
||||||
( &
|
|
||||||
(fh(i,j,k-3)+fh(i,j,k+3)) - &
|
|
||||||
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
|
|
||||||
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
|
|
||||||
TWT* fh(i,j,k) )/dZ )
|
|
||||||
endif
|
|
||||||
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
endif
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
end subroutine lopsided_kodis
|
end subroutine lopsided_fh
|
||||||
|
|
||||||
#elif (ghost_width == 4)
|
#elif (ghost_width == 4)
|
||||||
! sixth order code
|
! sixth order code
|
||||||
|
|||||||
@@ -16,12 +16,6 @@ include makefile.inc
|
|||||||
.cu.o:
|
.cu.o:
|
||||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||||
|
|
||||||
TwoPunctures.o: TwoPunctures.C
|
|
||||||
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
|
|
||||||
|
|
||||||
TwoPunctureABE.o: TwoPunctureABE.C
|
|
||||||
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
|
|
||||||
|
|
||||||
# Input files
|
# Input files
|
||||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||||
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
||||||
@@ -102,7 +96,7 @@ ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
|||||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||||
|
|
||||||
TwoPunctureABE: $(TwoPunctureFILES)
|
TwoPunctureABE: $(TwoPunctureFILES)
|
||||||
$(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
|
|||||||
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
|
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
|
||||||
## -fp-model fast=2: Aggressive floating-point optimizations
|
## -fp-model fast=2: Aggressive floating-point optimizations
|
||||||
## -fma: Enable fused multiply-add instructions
|
## -fma: Enable fused multiply-add instructions
|
||||||
|
## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
|
|||||||
@@ -10,17 +10,18 @@
|
|||||||
|
|
||||||
import AMSS_NCKU_Input as input_data
|
import AMSS_NCKU_Input as input_data
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
|
||||||
## CPU core binding configuration using taskset
|
## CPU core binding configuration using taskset
|
||||||
## taskset ensures all child processes inherit the CPU affinity mask
|
## taskset ensures all child processes inherit the CPU affinity mask
|
||||||
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
|
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
|
||||||
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
|
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
|
||||||
NUMACTL_CPU_BIND = "taskset -c 0-111"
|
NUMACTL_CPU_BIND = "taskset -c 16-47,64-95"
|
||||||
|
#NUMACTL_CPU_BIND = "taskset -c 0-111"
|
||||||
|
|
||||||
## Build parallelism configuration
|
## Build parallelism configuration
|
||||||
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
|
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
|
||||||
## Set make -j to utilize available cores for faster builds
|
## Set make -j to utilize available cores for faster builds
|
||||||
BUILD_JOBS = 104
|
BUILD_JOBS = 64
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
@@ -152,7 +153,7 @@ def run_ABE():
|
|||||||
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
|
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
|
||||||
|
|
||||||
def run_TwoPunctureABE():
|
def run_TwoPunctureABE():
|
||||||
tp_time1=time.time()
|
|
||||||
print( )
|
print( )
|
||||||
print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
|
print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
|
||||||
print( )
|
print( )
|
||||||
@@ -179,9 +180,7 @@ def run_TwoPunctureABE():
|
|||||||
print( )
|
print( )
|
||||||
print( " The TwoPunctureABE simulation is finished " )
|
print( " The TwoPunctureABE simulation is finished " )
|
||||||
print( )
|
print( )
|
||||||
tp_time2=time.time()
|
|
||||||
et=tp_time2-tp_time1
|
|
||||||
print(f"Used time: {et}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|||||||
@@ -1,29 +0,0 @@
|
|||||||
import multiprocessing
|
|
||||||
|
|
||||||
def run_plot_task(task):
|
|
||||||
"""Execute a single plotting task.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
task : tuple
|
|
||||||
A tuple of (function, args_tuple) where function is a callable
|
|
||||||
plotting function and args_tuple contains its arguments.
|
|
||||||
"""
|
|
||||||
func, args = task
|
|
||||||
return func(*args)
|
|
||||||
|
|
||||||
|
|
||||||
def run_plot_tasks_parallel(plot_tasks):
|
|
||||||
"""Execute a list of independent plotting tasks in parallel.
|
|
||||||
|
|
||||||
Uses the 'fork' context to create worker processes so that the main
|
|
||||||
script is NOT re-imported/re-executed in child processes.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
plot_tasks : list of tuples
|
|
||||||
Each element is (function, args_tuple).
|
|
||||||
"""
|
|
||||||
ctx = multiprocessing.get_context('fork')
|
|
||||||
with ctx.Pool() as pool:
|
|
||||||
pool.map(run_plot_task, plot_tasks)
|
|
||||||
@@ -11,8 +11,6 @@
|
|||||||
import numpy ## numpy for array operations
|
import numpy ## numpy for array operations
|
||||||
import scipy ## scipy for interpolation and signal processing
|
import scipy ## scipy for interpolation and signal processing
|
||||||
import math
|
import math
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
|
||||||
import matplotlib.pyplot as plt ## matplotlib for plotting
|
import matplotlib.pyplot as plt ## matplotlib for plotting
|
||||||
import os ## os for system/file operations
|
import os ## os for system/file operations
|
||||||
|
|
||||||
|
|||||||
@@ -8,23 +8,16 @@
|
|||||||
##
|
##
|
||||||
#################################################
|
#################################################
|
||||||
|
|
||||||
## Restrict OpenMP to one thread per process so that running
|
|
||||||
## many workers in parallel does not create an O(workers * BLAS_threads)
|
|
||||||
## thread explosion. The variable MUST be set before numpy/scipy
|
|
||||||
## are imported, because the BLAS library reads them only at load time.
|
|
||||||
import os
|
|
||||||
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import scipy
|
import scipy
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib.colors import LogNorm
|
from matplotlib.colors import LogNorm
|
||||||
from mpl_toolkits.mplot3d import Axes3D
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
## import torch
|
## import torch
|
||||||
import AMSS_NCKU_Input as input_data
|
import AMSS_NCKU_Input as input_data
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
|
|
||||||
@@ -199,19 +192,3 @@ def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ):
|
|||||||
|
|
||||||
####################################################################################
|
####################################################################################
|
||||||
|
|
||||||
|
|
||||||
####################################################################################
|
|
||||||
## Allow this module to be run as a standalone script so that each
|
|
||||||
## binary-data plot can be executed in a fresh subprocess whose BLAS
|
|
||||||
## environment variables (set above) take effect before numpy loads.
|
|
||||||
##
|
|
||||||
## Usage: python3 plot_binary_data.py <filename> <binary_outdir> <figure_outdir>
|
|
||||||
####################################################################################
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import sys
|
|
||||||
if len(sys.argv) != 4:
|
|
||||||
print(f"Usage: {sys.argv[0]} <filename> <binary_outdir> <figure_outdir>")
|
|
||||||
sys.exit(1)
|
|
||||||
plot_binary_data(sys.argv[1], sys.argv[2], sys.argv[3])
|
|
||||||
|
|
||||||
|
|||||||
@@ -8,8 +8,6 @@
|
|||||||
#################################################
|
#################################################
|
||||||
|
|
||||||
import numpy ## numpy for array operations
|
import numpy ## numpy for array operations
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
|
||||||
import matplotlib.pyplot as plt ## matplotlib for plotting
|
import matplotlib.pyplot as plt ## matplotlib for plotting
|
||||||
from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots
|
from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots
|
||||||
import glob
|
import glob
|
||||||
@@ -17,9 +15,6 @@ import os ## operating system utilities
|
|||||||
|
|
||||||
import plot_binary_data
|
import plot_binary_data
|
||||||
import AMSS_NCKU_Input as input_data
|
import AMSS_NCKU_Input as input_data
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import multiprocessing
|
|
||||||
|
|
||||||
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
|
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
|
||||||
|
|
||||||
@@ -55,40 +50,10 @@ def generate_binary_data_plot( binary_outdir, figure_outdir ):
|
|||||||
file_list.append(x)
|
file_list.append(x)
|
||||||
print(x)
|
print(x)
|
||||||
|
|
||||||
## Plot each file in parallel using subprocesses.
|
## Plot each file in the list
|
||||||
## Each subprocess is a fresh Python process where the BLAS thread-count
|
|
||||||
## environment variables (set at the top of plot_binary_data.py) take
|
|
||||||
## effect before numpy is imported. This avoids the thread explosion
|
|
||||||
## that occurs when multiprocessing.Pool with 'fork' context inherits
|
|
||||||
## already-initialized multi-threaded BLAS from the parent.
|
|
||||||
script = os.path.join( os.path.dirname(__file__), "plot_binary_data.py" )
|
|
||||||
max_workers = min( multiprocessing.cpu_count(), len(file_list) ) if file_list else 0
|
|
||||||
|
|
||||||
running = []
|
|
||||||
failed = []
|
|
||||||
for filename in file_list:
|
for filename in file_list:
|
||||||
print(filename)
|
print(filename)
|
||||||
proc = subprocess.Popen(
|
plot_binary_data.plot_binary_data(filename, binary_outdir, figure_outdir)
|
||||||
[sys.executable, script, filename, binary_outdir, figure_outdir],
|
|
||||||
)
|
|
||||||
running.append( (proc, filename) )
|
|
||||||
## Keep at most max_workers subprocesses active at a time
|
|
||||||
if len(running) >= max_workers:
|
|
||||||
p, fn = running.pop(0)
|
|
||||||
p.wait()
|
|
||||||
if p.returncode != 0:
|
|
||||||
failed.append(fn)
|
|
||||||
|
|
||||||
## Wait for all remaining subprocesses to finish
|
|
||||||
for p, fn in running:
|
|
||||||
p.wait()
|
|
||||||
if p.returncode != 0:
|
|
||||||
failed.append(fn)
|
|
||||||
|
|
||||||
if failed:
|
|
||||||
print( " WARNING: the following binary data plots failed:" )
|
|
||||||
for fn in failed:
|
|
||||||
print( " ", fn )
|
|
||||||
|
|
||||||
print( )
|
print( )
|
||||||
print( " Binary Data Plot Has been Finished " )
|
print( " Binary Data Plot Has been Finished " )
|
||||||
|
|||||||
Reference in New Issue
Block a user