Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
cc06e30404
|
|||
|
25c79dc7cd
|
|||
|
a725d34dd3
|
|||
| 2791d2e225 | |||
| 72ce153e48 | |||
| 5b7e05cd32 | |||
| 85afe00fc5 | |||
| 5c1790277b | |||
|
|
79af79d471 |
@@ -8,6 +8,14 @@
|
||||
##
|
||||
##################################################################
|
||||
|
||||
## Guard against re-execution by multiprocessing child processes.
|
||||
## Without this, using 'spawn' or 'forkserver' context would cause every
|
||||
## worker to re-run the entire script, spawning exponentially more
|
||||
## workers (fork bomb).
|
||||
if __name__ != '__main__':
|
||||
import sys as _sys
|
||||
_sys.exit(0)
|
||||
|
||||
|
||||
##################################################################
|
||||
|
||||
@@ -424,26 +432,31 @@ print(
|
||||
|
||||
import plot_xiaoqu
|
||||
import plot_GW_strain_amplitude_xiaoqu
|
||||
from parallel_plot_helper import run_plot_tasks_parallel
|
||||
|
||||
plot_tasks = []
|
||||
|
||||
## Plot black hole trajectory
|
||||
plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory )
|
||||
plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
|
||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory) ) )
|
||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) )
|
||||
|
||||
## Plot black hole separation vs. time
|
||||
plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
|
||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) )
|
||||
|
||||
## Plot gravitational waveforms (psi4 and strain amplitude)
|
||||
for i in range(input_data.Detector_Number):
|
||||
plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
|
||||
plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
|
||||
plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) )
|
||||
plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) )
|
||||
|
||||
## Plot ADM mass evolution
|
||||
for i in range(input_data.Detector_Number):
|
||||
plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
|
||||
plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) )
|
||||
|
||||
## Plot Hamiltonian constraint violation over time
|
||||
for i in range(input_data.grid_level):
|
||||
plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
|
||||
plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) )
|
||||
|
||||
run_plot_tasks_parallel(plot_tasks)
|
||||
|
||||
## Plot stored binary data
|
||||
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
|
||||
|
||||
@@ -5286,6 +5286,203 @@ void Parallel::OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||
delete[] transfer_src;
|
||||
delete[] transfer_dst;
|
||||
}
|
||||
|
||||
// Restrict_cached: cache grid segment lists, reuse buffers via transfer_cached
|
||||
void Parallel::Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache)
|
||||
{
|
||||
if (!cache.valid)
|
||||
{
|
||||
int cpusize;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||||
cache.cpusize = cpusize;
|
||||
|
||||
if (!cache.combined_src)
|
||||
{
|
||||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||||
cache.send_lengths = new int[cpusize];
|
||||
cache.recv_lengths = new int[cpusize];
|
||||
cache.send_bufs = new double *[cpusize];
|
||||
cache.recv_bufs = new double *[cpusize];
|
||||
cache.send_buf_caps = new int[cpusize];
|
||||
cache.recv_buf_caps = new int[cpusize];
|
||||
for (int i = 0; i < cpusize; i++)
|
||||
{
|
||||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
||||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
||||
}
|
||||
cache.max_reqs = 2 * cpusize;
|
||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
||||
cache.stats = new MPI_Status[cache.max_reqs];
|
||||
}
|
||||
|
||||
MyList<Parallel::gridseg> *dst = build_complete_gsl(PatcL);
|
||||
for (int node = 0; node < cpusize; node++)
|
||||
{
|
||||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatfL, node, 2, Symmetry);
|
||||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
||||
if (src_owned) src_owned->destroyList();
|
||||
}
|
||||
if (dst) dst->destroyList();
|
||||
|
||||
cache.valid = true;
|
||||
}
|
||||
|
||||
transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache);
|
||||
}
|
||||
|
||||
// OutBdLow2Hi_cached: cache grid segment lists, reuse buffers via transfer_cached
|
||||
void Parallel::OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache)
|
||||
{
|
||||
if (!cache.valid)
|
||||
{
|
||||
int cpusize;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||||
cache.cpusize = cpusize;
|
||||
|
||||
if (!cache.combined_src)
|
||||
{
|
||||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||||
cache.send_lengths = new int[cpusize];
|
||||
cache.recv_lengths = new int[cpusize];
|
||||
cache.send_bufs = new double *[cpusize];
|
||||
cache.recv_bufs = new double *[cpusize];
|
||||
cache.send_buf_caps = new int[cpusize];
|
||||
cache.recv_buf_caps = new int[cpusize];
|
||||
for (int i = 0; i < cpusize; i++)
|
||||
{
|
||||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
||||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
||||
}
|
||||
cache.max_reqs = 2 * cpusize;
|
||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
||||
cache.stats = new MPI_Status[cache.max_reqs];
|
||||
}
|
||||
|
||||
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
||||
for (int node = 0; node < cpusize; node++)
|
||||
{
|
||||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry);
|
||||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
||||
if (src_owned) src_owned->destroyList();
|
||||
}
|
||||
if (dst) dst->destroyList();
|
||||
|
||||
cache.valid = true;
|
||||
}
|
||||
|
||||
transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache);
|
||||
}
|
||||
|
||||
// OutBdLow2Himix_cached: same as OutBdLow2Hi_cached but uses transfermix for unpacking
|
||||
void Parallel::OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache)
|
||||
{
|
||||
if (!cache.valid)
|
||||
{
|
||||
int cpusize;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||||
cache.cpusize = cpusize;
|
||||
|
||||
if (!cache.combined_src)
|
||||
{
|
||||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||||
cache.send_lengths = new int[cpusize];
|
||||
cache.recv_lengths = new int[cpusize];
|
||||
cache.send_bufs = new double *[cpusize];
|
||||
cache.recv_bufs = new double *[cpusize];
|
||||
cache.send_buf_caps = new int[cpusize];
|
||||
cache.recv_buf_caps = new int[cpusize];
|
||||
for (int i = 0; i < cpusize; i++)
|
||||
{
|
||||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
||||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
||||
}
|
||||
cache.max_reqs = 2 * cpusize;
|
||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
||||
cache.stats = new MPI_Status[cache.max_reqs];
|
||||
}
|
||||
|
||||
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
||||
for (int node = 0; node < cpusize; node++)
|
||||
{
|
||||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry);
|
||||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
||||
if (src_owned) src_owned->destroyList();
|
||||
}
|
||||
if (dst) dst->destroyList();
|
||||
|
||||
cache.valid = true;
|
||||
}
|
||||
|
||||
// Use transfermix instead of transfer for mix-mode interpolation
|
||||
int myrank;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &cache.cpusize);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
int cpusize = cache.cpusize;
|
||||
|
||||
int req_no = 0;
|
||||
for (int node = 0; node < cpusize; node++)
|
||||
{
|
||||
if (node == myrank)
|
||||
{
|
||||
int length = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||||
cache.recv_lengths[node] = length;
|
||||
if (length > 0)
|
||||
{
|
||||
if (length > cache.recv_buf_caps[node])
|
||||
{
|
||||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||||
cache.recv_bufs[node] = new double[length];
|
||||
cache.recv_buf_caps[node] = length;
|
||||
}
|
||||
data_packermix(cache.recv_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int slength = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||||
cache.send_lengths[node] = slength;
|
||||
if (slength > 0)
|
||||
{
|
||||
if (slength > cache.send_buf_caps[node])
|
||||
{
|
||||
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
|
||||
cache.send_bufs[node] = new double[slength];
|
||||
cache.send_buf_caps[node] = slength;
|
||||
}
|
||||
data_packermix(cache.send_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||||
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
|
||||
}
|
||||
int rlength = data_packermix(0, cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
||||
cache.recv_lengths[node] = rlength;
|
||||
if (rlength > 0)
|
||||
{
|
||||
if (rlength > cache.recv_buf_caps[node])
|
||||
{
|
||||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||||
cache.recv_bufs[node] = new double[rlength];
|
||||
cache.recv_buf_caps[node] = rlength;
|
||||
}
|
||||
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Waitall(req_no, cache.reqs, cache.stats);
|
||||
|
||||
for (int node = 0; node < cpusize; node++)
|
||||
if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
|
||||
data_packermix(cache.recv_bufs[node], cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
||||
}
|
||||
|
||||
// collect all buffer grid segments or blocks for given patch
|
||||
MyList<Parallel::gridseg> *Parallel::build_buffer_gsl(Patch *Pat)
|
||||
{
|
||||
|
||||
@@ -130,6 +130,15 @@ namespace Parallel
|
||||
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||
int Symmetry);
|
||||
void Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
void OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
void OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
void Prolong(Patch *Patc, Patch *Patf,
|
||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||
int Symmetry);
|
||||
|
||||
@@ -321,22 +321,7 @@ void Z4c_class::Step(int lev, int YN)
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
// check error information
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
|
||||
|
||||
#ifdef WithShell
|
||||
// evolve Shell Patches
|
||||
@@ -468,24 +453,16 @@ void Z4c_class::Step(int lev, int YN)
|
||||
sPp = sPp->next;
|
||||
}
|
||||
}
|
||||
// check error information
|
||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
||||
MPI_Request err_req_pre;
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_pre);
|
||||
}
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
Parallel::AsyncSyncState async_pre;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -504,6 +481,24 @@ void Z4c_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
||||
|
||||
#ifdef WithShell
|
||||
// Complete non-blocking error reduction and check
|
||||
MPI_Wait(&err_req_pre, MPI_STATUS_IGNORE);
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
@@ -693,23 +688,7 @@ void Z4c_class::Step(int lev, int YN)
|
||||
Pp = Pp->next;
|
||||
}
|
||||
|
||||
// check error information
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
||||
<< " variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
|
||||
|
||||
#ifdef WithShell
|
||||
// evolve Shell Patches
|
||||
@@ -850,25 +829,16 @@ void Z4c_class::Step(int lev, int YN)
|
||||
sPp = sPp->next;
|
||||
}
|
||||
}
|
||||
// check error information
|
||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
||||
MPI_Request err_req_cor;
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count
|
||||
<< " variables at t = " << PhysTime << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
|
||||
}
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
Parallel::AsyncSyncState async_cor;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -886,6 +856,25 @@ void Z4c_class::Step(int lev, int YN)
|
||||
<< " seconds! " << endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
||||
|
||||
#ifdef WithShell
|
||||
// Complete non-blocking error reduction and check
|
||||
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
||||
<< " variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
@@ -1252,22 +1241,7 @@ void Z4c_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// check error information
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
|
||||
|
||||
// evolve Shell Patches
|
||||
if (lev == 0)
|
||||
@@ -1542,23 +1516,15 @@ void Z4c_class::Step(int lev, int YN)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// check error information
|
||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
||||
MPI_Request err_req_pre;
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_pre);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
Parallel::AsyncSyncState async_pre;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
||||
|
||||
if (lev == 0)
|
||||
{
|
||||
@@ -1620,6 +1586,22 @@ void Z4c_class::Step(int lev, int YN)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
||||
|
||||
// Complete non-blocking error reduction and check
|
||||
MPI_Wait(&err_req_pre, MPI_STATUS_IGNORE);
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
@@ -1841,23 +1823,7 @@ void Z4c_class::Step(int lev, int YN)
|
||||
Pp = Pp->next;
|
||||
}
|
||||
|
||||
// check error information
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
||||
<< " variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
|
||||
|
||||
// evolve Shell Patches
|
||||
if (lev == 0)
|
||||
@@ -2103,24 +2069,15 @@ void Z4c_class::Step(int lev, int YN)
|
||||
sPp = sPp->next;
|
||||
}
|
||||
}
|
||||
// check error information
|
||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
||||
MPI_Request err_req_cor;
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count
|
||||
<< " variables at t = " << PhysTime << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
Parallel::AsyncSyncState async_cor;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
||||
|
||||
if (lev == 0)
|
||||
{
|
||||
@@ -2170,6 +2127,23 @@ void Z4c_class::Step(int lev, int YN)
|
||||
}
|
||||
// end smooth
|
||||
#endif
|
||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
||||
|
||||
// Complete non-blocking error reduction and check
|
||||
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
|
||||
if (ERROR)
|
||||
{
|
||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||
if (myrank == 0)
|
||||
{
|
||||
if (ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
||||
<< " variables at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
|
||||
@@ -5819,21 +5819,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
#endif
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||
#endif
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#elif (RPB == 1)
|
||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
||||
@@ -5880,21 +5870,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
#endif
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||
#endif
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#elif (RPB == 1)
|
||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||
@@ -5969,21 +5949,11 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||
#endif
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#elif (RPB == 1)
|
||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
||||
@@ -6001,21 +5971,11 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||
#endif
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#elif (RPB == 1)
|
||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||
@@ -6076,21 +6036,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#elif (RPB == 1)
|
||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||
@@ -6110,21 +6060,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#elif (RPB == 1)
|
||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||
@@ -6161,21 +6101,11 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
||||
}
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#elif (RPB == 1)
|
||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||
@@ -6184,21 +6114,11 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
||||
else // no time refinement levels and for all same time levels
|
||||
{
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#elif (RPB == 1)
|
||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||
|
||||
@@ -945,103 +945,60 @@
|
||||
SSA(2)=SYM
|
||||
SSA(3)=ANTI
|
||||
|
||||
!!!!!!!!!advection term part
|
||||
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
|
||||
! lopsided_kodis shares the symmetry_bd buffer between advection and
|
||||
! dissipation, eliminating redundant full-grid copies. For metric variables
|
||||
! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
|
||||
! so the constant offset has no effect on dissipation.
|
||||
|
||||
call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS)
|
||||
call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA)
|
||||
call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA)
|
||||
call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
|
||||
call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS)
|
||||
call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA)
|
||||
call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA)
|
||||
call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
|
||||
call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
call lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
|
||||
call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS)
|
||||
call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS)
|
||||
call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA)
|
||||
!!
|
||||
call lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
||||
|
||||
#if 1
|
||||
!! bam does not apply dissipation on gauge variables
|
||||
call lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
||||
call lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
||||
#endif
|
||||
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
||||
call lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
||||
call lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
||||
#endif
|
||||
#else
|
||||
! No dissipation on gauge variables (advection only)
|
||||
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||
|
||||
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
||||
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
|
||||
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
|
||||
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
|
||||
#endif
|
||||
|
||||
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
||||
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
|
||||
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
|
||||
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
|
||||
#endif
|
||||
|
||||
if(eps>0)then
|
||||
! usual Kreiss-Oliger dissipation
|
||||
call kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps)
|
||||
#if 0
|
||||
#define i 42
|
||||
#define j 40
|
||||
#define k 40
|
||||
if(Lev == 1)then
|
||||
write(*,*) X(i),Y(j),Z(k)
|
||||
write(*,*) "before",Axx_rhs(i,j,k)
|
||||
endif
|
||||
#undef i
|
||||
#undef j
|
||||
#undef k
|
||||
!!stop
|
||||
#endif
|
||||
call kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps)
|
||||
#if 0
|
||||
#define i 42
|
||||
#define j 40
|
||||
#define k 40
|
||||
if(Lev == 1)then
|
||||
write(*,*) X(i),Y(j),Z(k)
|
||||
write(*,*) "after",Axx_rhs(i,j,k)
|
||||
endif
|
||||
#undef i
|
||||
#undef j
|
||||
#undef k
|
||||
!!stop
|
||||
#endif
|
||||
call kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps)
|
||||
|
||||
#if 1
|
||||
!! bam does not apply dissipation on gauge variables
|
||||
call kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps)
|
||||
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
||||
call kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps)
|
||||
call kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
endif
|
||||
|
||||
if(co == 0)then
|
||||
! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho
|
||||
|
||||
@@ -487,6 +487,201 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
|
||||
|
||||
end subroutine lopsided
|
||||
|
||||
!-----------------------------------------------------------------------------
|
||||
! Combined advection (lopsided) + Kreiss-Oliger dissipation (kodis)
|
||||
! Shares the symmetry_bd buffer fh, eliminating one full-grid copy per call.
|
||||
! Mathematically identical to calling lopsided then kodis separately.
|
||||
!-----------------------------------------------------------------------------
|
||||
subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
||||
implicit none
|
||||
|
||||
!~~~~~~> Input parameters:
|
||||
|
||||
integer, intent(in) :: ex(1:3),Symmetry
|
||||
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
|
||||
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
|
||||
|
||||
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
|
||||
real*8,dimension(3),intent(in) ::SoA
|
||||
real*8,intent(in) :: eps
|
||||
|
||||
!~~~~~~> local variables:
|
||||
! note index -2,-1,0, so we have 3 extra points
|
||||
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh
|
||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||
real*8 :: dX,dY,dZ
|
||||
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
|
||||
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F3=3.d0
|
||||
real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1
|
||||
real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0
|
||||
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||
! kodis parameters
|
||||
real*8, parameter :: SIX=6.d0,FIT=1.5d1,TWT=2.d1
|
||||
real*8, parameter :: cof=6.4d1 ! 2^6
|
||||
|
||||
dX = X(2)-X(1)
|
||||
dY = Y(2)-Y(1)
|
||||
dZ = Z(2)-Z(1)
|
||||
|
||||
d12dx = ONE/F12/dX
|
||||
d12dy = ONE/F12/dY
|
||||
d12dz = ONE/F12/dZ
|
||||
|
||||
d2dx = ONE/TWO/dX
|
||||
d2dy = ONE/TWO/dY
|
||||
d2dz = ONE/TWO/dZ
|
||||
|
||||
imax = ex(1)
|
||||
jmax = ex(2)
|
||||
kmax = ex(3)
|
||||
|
||||
imin = 1
|
||||
jmin = 1
|
||||
kmin = 1
|
||||
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
|
||||
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -2
|
||||
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -2
|
||||
|
||||
! Single symmetry_bd call shared by both advection and dissipation
|
||||
call symmetry_bd(3,ex,f,fh,SoA)
|
||||
|
||||
! ---- Advection (lopsided) loop ----
|
||||
! upper bound set ex-1 only for efficiency,
|
||||
! the loop body will set ex 0 also
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
! x direction
|
||||
if(Sfx(i,j,k) > ZEO)then
|
||||
if(i+3 <= imax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
|
||||
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
|
||||
elseif(i+2 <= imax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||
|
||||
elseif(i+1 <= imax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
|
||||
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
|
||||
endif
|
||||
elseif(Sfx(i,j,k) < ZEO)then
|
||||
if(i-3 >= imin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
|
||||
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
|
||||
elseif(i-2 >= imin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||
|
||||
elseif(i-1 >= imin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
|
||||
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
|
||||
endif
|
||||
endif
|
||||
|
||||
! y direction
|
||||
if(Sfy(i,j,k) > ZEO)then
|
||||
if(j+3 <= jmax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
|
||||
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
|
||||
elseif(j+2 <= jmax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||
|
||||
elseif(j+1 <= jmax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
|
||||
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
|
||||
endif
|
||||
elseif(Sfy(i,j,k) < ZEO)then
|
||||
if(j-3 >= jmin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
|
||||
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
|
||||
elseif(j-2 >= jmin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||
|
||||
elseif(j-1 >= jmin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
|
||||
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
|
||||
endif
|
||||
endif
|
||||
|
||||
! z direction
|
||||
if(Sfz(i,j,k) > ZEO)then
|
||||
if(k+3 <= kmax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
|
||||
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
|
||||
elseif(k+2 <= kmax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||
|
||||
elseif(k+1 <= kmax)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
|
||||
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
|
||||
endif
|
||||
elseif(Sfz(i,j,k) < ZEO)then
|
||||
if(k-3 >= kmin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
|
||||
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
|
||||
elseif(k-2 >= kmin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||
|
||||
elseif(k-1 >= kmin)then
|
||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
|
||||
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! ---- Dissipation (kodis) loop ----
|
||||
if(eps > ZEO) then
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
|
||||
if(i-3 >= imin .and. i+3 <= imax .and. &
|
||||
j-3 >= jmin .and. j+3 <= jmax .and. &
|
||||
k-3 >= kmin .and. k+3 <= kmax) then
|
||||
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
|
||||
(fh(i-3,j,k)+fh(i+3,j,k)) - &
|
||||
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
|
||||
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
|
||||
TWT* fh(i,j,k) )/dX + &
|
||||
( &
|
||||
(fh(i,j-3,k)+fh(i,j+3,k)) - &
|
||||
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
|
||||
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
|
||||
TWT* fh(i,j,k) )/dY + &
|
||||
( &
|
||||
(fh(i,j,k-3)+fh(i,j,k+3)) - &
|
||||
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
|
||||
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
|
||||
TWT* fh(i,j,k) )/dZ )
|
||||
endif
|
||||
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
end subroutine lopsided_kodis
|
||||
|
||||
#elif (ghost_width == 4)
|
||||
! sixth order code
|
||||
! Compute advection terms in right hand sides of field equations
|
||||
|
||||
@@ -13,7 +13,7 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
|
||||
## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization)
|
||||
## -fprofile-instr-use: use collected profile data to guide optimization decisions
|
||||
## (branch prediction, basic block layout, inlining, loop unrolling)
|
||||
PROFDATA = /home/amss/AMSS-NCKU/pgo_profile/default.profdata
|
||||
PROFDATA = ../../pgo_profile/default.profdata
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-fprofile-instr-use=$(PROFDATA) \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||
|
||||
29
parallel_plot_helper.py
Normal file
29
parallel_plot_helper.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import multiprocessing
|
||||
|
||||
def run_plot_task(task):
|
||||
"""Execute a single plotting task.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
task : tuple
|
||||
A tuple of (function, args_tuple) where function is a callable
|
||||
plotting function and args_tuple contains its arguments.
|
||||
"""
|
||||
func, args = task
|
||||
return func(*args)
|
||||
|
||||
|
||||
def run_plot_tasks_parallel(plot_tasks):
|
||||
"""Execute a list of independent plotting tasks in parallel.
|
||||
|
||||
Uses the 'fork' context to create worker processes so that the main
|
||||
script is NOT re-imported/re-executed in child processes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
plot_tasks : list of tuples
|
||||
Each element is (function, args_tuple).
|
||||
"""
|
||||
ctx = multiprocessing.get_context('fork')
|
||||
with ctx.Pool() as pool:
|
||||
pool.map(run_plot_task, plot_tasks)
|
||||
Binary file not shown.
BIN
pgo_profile/default.profdata.backup
Normal file
BIN
pgo_profile/default.profdata.backup
Normal file
Binary file not shown.
BIN
pgo_profile/default_15874826282416242821_0_58277.profraw
Normal file
BIN
pgo_profile/default_15874826282416242821_0_58277.profraw
Normal file
Binary file not shown.
@@ -11,6 +11,8 @@
|
||||
import numpy ## numpy for array operations
|
||||
import scipy ## scipy for interpolation and signal processing
|
||||
import math
|
||||
import matplotlib
|
||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
||||
import matplotlib.pyplot as plt ## matplotlib for plotting
|
||||
import os ## os for system/file operations
|
||||
|
||||
|
||||
@@ -8,16 +8,23 @@
|
||||
##
|
||||
#################################################
|
||||
|
||||
## Restrict OpenMP to one thread per process so that running
|
||||
## many workers in parallel does not create an O(workers * BLAS_threads)
|
||||
## thread explosion. The variable MUST be set before numpy/scipy
|
||||
## are imported, because the BLAS library reads them only at load time.
|
||||
import os
|
||||
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
||||
|
||||
import numpy
|
||||
import scipy
|
||||
import matplotlib
|
||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.colors import LogNorm
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
## import torch
|
||||
import AMSS_NCKU_Input as input_data
|
||||
|
||||
import os
|
||||
|
||||
|
||||
#########################################################################################
|
||||
|
||||
@@ -192,3 +199,19 @@ def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ):
|
||||
|
||||
####################################################################################
|
||||
|
||||
|
||||
####################################################################################
|
||||
## Allow this module to be run as a standalone script so that each
|
||||
## binary-data plot can be executed in a fresh subprocess whose BLAS
|
||||
## environment variables (set above) take effect before numpy loads.
|
||||
##
|
||||
## Usage: python3 plot_binary_data.py <filename> <binary_outdir> <figure_outdir>
|
||||
####################################################################################
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if len(sys.argv) != 4:
|
||||
print(f"Usage: {sys.argv[0]} <filename> <binary_outdir> <figure_outdir>")
|
||||
sys.exit(1)
|
||||
plot_binary_data(sys.argv[1], sys.argv[2], sys.argv[3])
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
#################################################
|
||||
|
||||
import numpy ## numpy for array operations
|
||||
import matplotlib
|
||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
||||
import matplotlib.pyplot as plt ## matplotlib for plotting
|
||||
from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots
|
||||
import glob
|
||||
@@ -15,6 +17,9 @@ import os ## operating system utilities
|
||||
|
||||
import plot_binary_data
|
||||
import AMSS_NCKU_Input as input_data
|
||||
import subprocess
|
||||
import sys
|
||||
import multiprocessing
|
||||
|
||||
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
|
||||
|
||||
@@ -50,10 +55,40 @@ def generate_binary_data_plot( binary_outdir, figure_outdir ):
|
||||
file_list.append(x)
|
||||
print(x)
|
||||
|
||||
## Plot each file in the list
|
||||
## Plot each file in parallel using subprocesses.
|
||||
## Each subprocess is a fresh Python process where the BLAS thread-count
|
||||
## environment variables (set at the top of plot_binary_data.py) take
|
||||
## effect before numpy is imported. This avoids the thread explosion
|
||||
## that occurs when multiprocessing.Pool with 'fork' context inherits
|
||||
## already-initialized multi-threaded BLAS from the parent.
|
||||
script = os.path.join( os.path.dirname(__file__), "plot_binary_data.py" )
|
||||
max_workers = min( multiprocessing.cpu_count(), len(file_list) ) if file_list else 0
|
||||
|
||||
running = []
|
||||
failed = []
|
||||
for filename in file_list:
|
||||
print(filename)
|
||||
plot_binary_data.plot_binary_data(filename, binary_outdir, figure_outdir)
|
||||
proc = subprocess.Popen(
|
||||
[sys.executable, script, filename, binary_outdir, figure_outdir],
|
||||
)
|
||||
running.append( (proc, filename) )
|
||||
## Keep at most max_workers subprocesses active at a time
|
||||
if len(running) >= max_workers:
|
||||
p, fn = running.pop(0)
|
||||
p.wait()
|
||||
if p.returncode != 0:
|
||||
failed.append(fn)
|
||||
|
||||
## Wait for all remaining subprocesses to finish
|
||||
for p, fn in running:
|
||||
p.wait()
|
||||
if p.returncode != 0:
|
||||
failed.append(fn)
|
||||
|
||||
if failed:
|
||||
print( " WARNING: the following binary data plots failed:" )
|
||||
for fn in failed:
|
||||
print( " ", fn )
|
||||
|
||||
print( )
|
||||
print( " Binary Data Plot Has been Finished " )
|
||||
|
||||
Reference in New Issue
Block a user