Compare commits

...

8 Commits

Author SHA1 Message Date
jaunatisblue
f147f79ffa 修改block划分,对负载高的rank所在block进行划分,添加到空rank,空rank是平移得到的 2026-02-26 09:40:46 +08:00
jaunatisblue
8abac8dd88 对rank运行时间统计,两个函数分别在不同的计算中被调用,因此我对两个重载的函数分别进行了mpi实际计算时间的统计,对于第一个PatList_Interp_Points 调用 Interp_points,我取排名前三的rank时间,发现每次只有一个rank时间较长,Rank [ 52]: Calc 0.000012 s
Rank [  20]: Calc 0.000003 s

Rank [  35]: Calc 0.000003 s

Rank [  10]: Calc 0.000010 s

Rank [  17]: Calc 0.000005 s

Rank [  32]: Calc 0.000003 s,而且rank不固定,一般就是rank 10 和 rank 52;
但尽管有很多,比前者时间还是少很多
对于第二个Surf_Wave 调用 Interp_points,我发现前四个rank时间最长,比较固定,就是下面四个rank

Rank [  27]: Calc 0.331978 s

Rank [  35]: Calc 0.242219 s

Rank [  28]: Calc 0.242132 s

Rank [  36]: Calc 0.197024 s
因此下面surf_wave是核心
2026-02-24 14:34:24 +08:00
82339f5282 Merge lopsided advection + kodis dissipation to share symmetry_bd buffer
Cherry-picked from 38c2c30.
2026-02-20 13:36:27 +08:00
94f38c57f9 Don't hardcode pgo profile path 2026-02-20 13:36:27 +08:00
85d1e8de87 Add Intel SIMD vectorization directives to hot-spot functions
Apply Intel Advisor optimization recommendations:
- Add FORCEINLINE to polint for better inlining
- Add SIMD VECTORLENGTHFOR and UNROLL directives to fderivs,
  fdderivs, symmetry_bd, and kodis functions

This improves vectorization efficiency of finite difference
computations.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-14 00:43:39 +08:00
5b7e05cd32 PGO updated 2026-02-11 18:26:30 +08:00
85afe00fc5 Merge plotting optimizations from chb-copilot-test
- Implement multiprocessing-based parallel plotting
- Add parallel_plot_helper.py for concurrent plot task execution
- Use matplotlib 'Agg' backend for multiprocessing safety
- Set OMP_NUM_THREADS=1 to prevent BLAS thread explosion
- Use subprocess for binary data plots to avoid thread conflicts
- Add fork bomb protection in main program

This merge only includes plotting improvements and excludes
MPI communication changes to preserve existing optimizations.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-11 16:19:17 +08:00
5c1790277b Replace nested OutBdLow2Hi loops with batch calls in RestrictProlong
The 8 nested while(Ppc){while(Pp){OutBdLow2Hi(single,single,...)}}
loops across RestrictProlong (3 overloads) and ProlongRestrict each
produced N_c × N_f separate transfer() → MPI_Waitall barriers.
Replace with the existing batch OutBdLow2Hi(MyList<Patch>*,...) which
merges all patch pairs into a single transfer() call with 1 MPI_Waitall.

Also add Restrict_cached, OutBdLow2Hi_cached, OutBdLow2Himix_cached
to Parallel (unused for now — kept as infrastructure for future use).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-11 16:09:08 +08:00
22 changed files with 15169 additions and 13903 deletions

View File

@@ -8,6 +8,14 @@
## ##
################################################################## ##################################################################
## Guard against re-execution by multiprocessing child processes.
## Without this, using 'spawn' or 'forkserver' context would cause every
## worker to re-run the entire script, spawning exponentially more
## workers (fork bomb).
if __name__ != '__main__':
import sys as _sys
_sys.exit(0)
################################################################## ##################################################################
@@ -58,7 +66,8 @@ if os.path.exists(File_directory):
## Prompt whether to overwrite the existing directory ## Prompt whether to overwrite the existing directory
while True: while True:
try: try:
inputvalue = input() ## inputvalue = input()
inputvalue = "continue"
## If the user agrees to overwrite, proceed and remove the existing directory ## If the user agrees to overwrite, proceed and remove the existing directory
if ( inputvalue == "continue" ): if ( inputvalue == "continue" ):
print( " Continue the calculation !!! " ) print( " Continue the calculation !!! " )
@@ -424,26 +433,31 @@ print(
import plot_xiaoqu import plot_xiaoqu
import plot_GW_strain_amplitude_xiaoqu import plot_GW_strain_amplitude_xiaoqu
from parallel_plot_helper import run_plot_tasks_parallel
plot_tasks = []
## Plot black hole trajectory ## Plot black hole trajectory
plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory ) plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory) ) )
plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory ) plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) )
## Plot black hole separation vs. time ## Plot black hole separation vs. time
plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory ) plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) )
## Plot gravitational waveforms (psi4 and strain amplitude) ## Plot gravitational waveforms (psi4 and strain amplitude)
for i in range(input_data.Detector_Number): for i in range(input_data.Detector_Number):
plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i ) plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) )
plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i ) plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) )
## Plot ADM mass evolution ## Plot ADM mass evolution
for i in range(input_data.Detector_Number): for i in range(input_data.Detector_Number):
plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i ) plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) )
## Plot Hamiltonian constraint violation over time ## Plot Hamiltonian constraint violation over time
for i in range(input_data.grid_level): for i in range(input_data.grid_level):
plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i ) plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) )
run_plot_tasks_parallel(plot_tasks)
## Plot stored binary data ## Plot stored binary data
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory ) plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )

File diff suppressed because it is too large Load Diff

View File

@@ -24,6 +24,7 @@ using namespace std;
#endif #endif
#include <mpi.h> #include <mpi.h>
#include <memory.h>
#include "MyList.h" #include "MyList.h"
#include "Block.h" #include "Block.h"
#include "Parallel.h" #include "Parallel.h"

File diff suppressed because it is too large Load Diff

View File

@@ -1,204 +1,235 @@
#ifndef PARALLEL_H #ifndef PARALLEL_H
#define PARALLEL_H #define PARALLEL_H
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <fstream> #include <fstream>
#include <cstdlib> #include <cstdlib>
#include <cstdio> #include <cstdio>
#include <string> #include <string>
#include <cmath> #include <cmath>
#include <new> #include <new>
using namespace std; using namespace std;
#include <memory.h>
#include "Parallel_bam.h" #include "Parallel_bam.h"
#include "var.h" #include "var.h"
#include "MPatch.h" #include "MPatch.h"
#include "Block.h" #include "Block.h"
#include "MyList.h" #include "MyList.h"
#include "macrodef.h" //need dim; ghost_width; CONTRACT #include "macrodef.h" //need dim; ghost_width; CONTRACT
namespace Parallel namespace Parallel
{ {
struct gridseg struct gridseg
{ {
double llb[dim]; double llb[dim];
double uub[dim]; double uub[dim];
int shape[dim]; int shape[dim];
double illb[dim], iuub[dim]; // only use for OutBdLow2Hi double illb[dim], iuub[dim]; // only use for OutBdLow2Hi
Block *Bg; Block *Bg;
}; };
int partition1(int &nx, int split_size, int min_width, int cpusize, int shape); // special for 1 diemnsion int partition1(int &nx, int split_size, int min_width, int cpusize, int shape); // special for 1 diemnsion
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape); int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
void KillBlocks(MyList<Patch> *PatchLIST); MyList<Block> *distribute_hard(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z)); int ib0_orig, int ib3_orig,
void setfunction(int rank, MyList<Block> *BlL, var *vn, double func(double x, double y, double z)); int jb1_orig, int jb4_orig,
void writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax, int kb2_orig, int kb5_orig,
double zmin, double zmax, char *filename, double *data_out); Patch* PP, int r_left, int r_right,
void writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax, int ingfsi, int fngfsi, bool periodic,
char *filename, double *datain); Block* &split_first_block, Block* &split_last_block);
void getarrayindex(int DIM, int *shape, int *index, int n); Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
int getarraylocation(int DIM, int *shape, int *index); int block_id, int ingfsi, int fngfsi, int lev);
void copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin, void KillBlocks(MyList<Patch> *PatchLIST);
int *shape, double *datain, double *llb, double *uub);
void Dump_CPU_Data(MyList<Block> *BlL, MyList<var> *DumpList, char *tag, double time, double dT); void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
void Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT); void setfunction(int rank, MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
void Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd); void writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax,
double *Collect_Data(Patch *PP, var *VP); double zmin, double zmax, char *filename, double *data_out);
void d2Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT); void writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax,
void d2Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd); char *filename, double *datain);
void Dump_Data0(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT); void getarrayindex(int DIM, int *shape, int *index, int n);
double global_interp(int DIM, int *ext, double **CoX, double *datain, int getarraylocation(int DIM, int *shape, int *index);
double *poX, int ordn, double *SoA, int Symmetry); void copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin,
double global_interp(int DIM, int *ext, double **CoX, double *datain, int *shape, double *datain, double *llb, double *uub);
double *poX, int ordn); void Dump_CPU_Data(MyList<Block> *BlL, MyList<var> *DumpList, char *tag, double time, double dT);
double Lagrangian_Int(double x, int npts, double *xpts, double *funcvals); void Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT);
double LagrangePoly(double x, int pt, int npts, double *xpts); void Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd);
MyList<gridseg> *build_complete_gsl(Patch *Pat); double *Collect_Data(Patch *PP, var *VP);
MyList<gridseg> *build_complete_gsl(MyList<Patch> *PatL); void d2Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT);
MyList<gridseg> *build_complete_gsl_virtual(MyList<Patch> *PatL); void d2Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd);
MyList<gridseg> *build_complete_gsl_virtual2(MyList<Patch> *PatL); // - buffer void Dump_Data0(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT);
MyList<gridseg> *build_owned_gsl0(Patch *Pat, int rank_in); // - ghost without extension, special for Sync usage double global_interp(int DIM, int *ext, double **CoX, double *datain,
MyList<gridseg> *build_owned_gsl1(Patch *Pat, int rank_in); // - ghost, similar to build_owned_gsl0 but extend one point on left side for vertex grid double *poX, int ordn, double *SoA, int Symmetry);
MyList<gridseg> *build_owned_gsl2(Patch *Pat, int rank_in); // - buffer - ghost double global_interp(int DIM, int *ext, double **CoX, double *datain,
MyList<gridseg> *build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry); // - ghost - BD ghost double *poX, int ordn);
MyList<gridseg> *build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry); // - buffer - ghost - BD ghost double Lagrangian_Int(double x, int npts, double *xpts, double *funcvals);
MyList<gridseg> *build_owned_gsl5(Patch *Pat, int rank_in); // similar to build_owned_gsl2 but no extension double LagrangePoly(double x, int pt, int npts, double *xpts);
MyList<gridseg> *build_owned_gsl(MyList<Patch> *PatL, int rank_in, int type, int Symmetry); MyList<gridseg> *build_complete_gsl(Patch *Pat);
void build_gstl(MyList<gridseg> *srci, MyList<gridseg> *dsti, MyList<gridseg> **out_src, MyList<gridseg> **out_dst); MyList<gridseg> *build_complete_gsl(MyList<Patch> *PatL);
int data_packer(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir, MyList<gridseg> *build_complete_gsl_virtual(MyList<Patch> *PatL);
MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry); MyList<gridseg> *build_complete_gsl_virtual2(MyList<Patch> *PatL); // - buffer
void transfer(MyList<gridseg> **src, MyList<gridseg> **dst, MyList<gridseg> *build_owned_gsl0(Patch *Pat, int rank_in); // - ghost without extension, special for Sync usage
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */, MyList<gridseg> *build_owned_gsl1(Patch *Pat, int rank_in); // - ghost, similar to build_owned_gsl0 but extend one point on left side for vertex grid
int Symmetry); MyList<gridseg> *build_owned_gsl2(Patch *Pat, int rank_in); // - buffer - ghost
int data_packermix(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir, MyList<gridseg> *build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry); // - ghost - BD ghost
MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry); MyList<gridseg> *build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry); // - buffer - ghost - BD ghost
void transfermix(MyList<gridseg> **src, MyList<gridseg> **dst, MyList<gridseg> *build_owned_gsl5(Patch *Pat, int rank_in); // similar to build_owned_gsl2 but no extension
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */, MyList<gridseg> *build_owned_gsl(MyList<Patch> *PatL, int rank_in, int type, int Symmetry);
int Symmetry); void build_gstl(MyList<gridseg> *srci, MyList<gridseg> *dsti, MyList<gridseg> **out_src, MyList<gridseg> **out_dst);
void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry); int data_packer(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir,
void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry); MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry);
void Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry); void transfer(MyList<gridseg> **src, MyList<gridseg> **dst,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
struct SyncCache { int Symmetry);
bool valid; int data_packermix(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir,
int cpusize; MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry);
MyList<gridseg> **combined_src; void transfermix(MyList<gridseg> **src, MyList<gridseg> **dst,
MyList<gridseg> **combined_dst; MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
int *send_lengths; int Symmetry);
int *recv_lengths; void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry);
double **send_bufs; void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
double **recv_bufs; void Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
int *send_buf_caps;
int *recv_buf_caps; struct SyncCache {
MPI_Request *reqs; bool valid;
MPI_Status *stats; int cpusize;
int max_reqs; MyList<gridseg> **combined_src;
bool lengths_valid; MyList<gridseg> **combined_dst;
SyncCache(); int *send_lengths;
void invalidate(); int *recv_lengths;
void destroy(); double **send_bufs;
}; double **recv_bufs;
int *send_buf_caps;
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache); int *recv_buf_caps;
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst, MPI_Request *reqs;
MyList<var> *VarList1, MyList<var> *VarList2, MPI_Status *stats;
int Symmetry, SyncCache &cache); int max_reqs;
bool lengths_valid;
struct AsyncSyncState { SyncCache();
int req_no; void invalidate();
bool active; void destroy();
AsyncSyncState() : req_no(0), active(false) {} };
};
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
SyncCache &cache, AsyncSyncState &state); MyList<var> *VarList1, MyList<var> *VarList2,
void Sync_finish(SyncCache &cache, AsyncSyncState &state, int Symmetry, SyncCache &cache);
MyList<var> *VarList, int Symmetry);
void OutBdLow2Hi(Patch *Patc, Patch *Patf, struct AsyncSyncState {
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, int req_no;
int Symmetry); bool active;
void OutBdLow2Hi(MyList<Patch> *PatcL, MyList<Patch> *PatfL, AsyncSyncState() : req_no(0), active(false) {}
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, };
int Symmetry);
void OutBdLow2Himix(Patch *Patc, Patch *Patf, void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, SyncCache &cache, AsyncSyncState &state);
int Symmetry); void Sync_finish(SyncCache &cache, AsyncSyncState &state,
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL, MyList<var> *VarList, int Symmetry);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, void OutBdLow2Hi(Patch *Patc, Patch *Patf,
int Symmetry); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
void Prolong(Patch *Patc, Patch *Patf, int Symmetry);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, void OutBdLow2Hi(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
int Symmetry); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
void Prolongint(Patch *Patc, Patch *Patf, int Symmetry);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, void OutBdLow2Himix(Patch *Patc, Patch *Patf,
int Symmetry); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
void Restrict(MyList<Patch> *PatcL, MyList<Patch> *PatfL, int Symmetry);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
int Symmetry); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
void Restrict_after(MyList<Patch> *PatcL, MyList<Patch> *PatfL, int Symmetry);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, void Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
int Symmetry); // for -ghost - BDghost MyList<var> *VarList1, MyList<var> *VarList2,
MyList<Parallel::gridseg> *build_PhysBD_gsl(Patch *Pat); int Symmetry, SyncCache &cache);
MyList<Parallel::gridseg> *build_ghost_gsl(MyList<Patch> *PatL); void OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<Parallel::gridseg> *build_ghost_gsl(Patch *Pat); MyList<var> *VarList1, MyList<var> *VarList2,
MyList<Parallel::gridseg> *build_buffer_gsl(Patch *Pat); int Symmetry, SyncCache &cache);
MyList<Parallel::gridseg> *build_buffer_gsl(MyList<Patch> *PatL); void OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
MyList<Parallel::gridseg> *gsl_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); MyList<var> *VarList1, MyList<var> *VarList2,
MyList<Parallel::gridseg> *gs_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); int Symmetry, SyncCache &cache);
MyList<Parallel::gridseg> *gsl_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); void Prolong(Patch *Patc, Patch *Patf,
MyList<Parallel::gridseg> *gs_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only); int Symmetry);
MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue void Prolongint(Patch *Patc, Patch *Patf,
MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti, int Symmetry);
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst); void Restrict(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
double L2Norm(Patch *Pat, var *vf); int Symmetry);
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only); void Restrict_after(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
void checkvarl(MyList<var> *pp, bool first_only); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat); int Symmetry); // for -ghost - BDghost
MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat); MyList<Parallel::gridseg> *build_PhysBD_gsl(Patch *Pat);
void prepare_inter_time_level(Patch *Pat, MyList<Parallel::gridseg> *build_ghost_gsl(MyList<Patch> *PatL);
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */, MyList<Parallel::gridseg> *build_ghost_gsl(Patch *Pat);
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex); MyList<Parallel::gridseg> *build_buffer_gsl(Patch *Pat);
void prepare_inter_time_level(Patch *Pat, MyList<Parallel::gridseg> *build_buffer_gsl(MyList<Patch> *PatL);
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */, MyList<Parallel::gridseg> *gsl_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex); MyList<Parallel::gridseg> *gs_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
void prepare_inter_time_level(MyList<Patch> *PatL, MyList<Parallel::gridseg> *gsl_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */, MyList<Parallel::gridseg> *gs_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex); MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only);
void prepare_inter_time_level(MyList<Patch> *Pat, MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */, MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat);
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex); void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
void merge_gsl(MyList<gridseg> *&A, const double ratio); MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
bool merge_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C, const double ratio); void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
// Add ghost region to tangent plane double L2Norm(Patch *Pat, var *vf);
// we assume the grids have the same resolution void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
void add_ghost_touch(MyList<gridseg> *&A); void checkvarl(MyList<var> *pp, bool first_only);
void cut_gsl(MyList<gridseg> *&A); MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
bool cut_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C); MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat);
MyList<Parallel::gridseg> *gs_subtract_virtual(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); void prepare_inter_time_level(Patch *Pat,
void fill_level_data(MyList<Patch> *PatLd, MyList<Patch> *PatLs, MyList<Patch> *PatcL, MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *FutureList, MyList<var> *VarList3 /* target (t+a*dt) */, int tindex);
MyList<var> *tmList, int Symmetry, bool BB, bool CC); void prepare_inter_time_level(Patch *Pat,
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList, MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
int NN, double **XX, MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex);
double *Shellf, int Symmetry); void prepare_inter_time_level(MyList<Patch> *PatL,
void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape); MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl); MyList<var> *VarList3 /* target (t+a*dt) */, int tindex);
void checkpatchlist(MyList<Patch> *PatL, bool buflog); void prepare_inter_time_level(MyList<Patch> *Pat,
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here); MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex);
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList, void merge_gsl(MyList<gridseg> *&A, const double ratio);
int NN, double **XX, bool merge_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C, const double ratio);
double *Shellf, int Symmetry, MPI_Comm Comm_here); // Add ghost region to tangent plane
#if (PSTR == 1 || PSTR == 2 || PSTR == 3) // we assume the grids have the same resolution
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi, void add_ghost_touch(MyList<gridseg> *&A);
bool periodic, int start_rank, int end_rank, int nodes = 0); void cut_gsl(MyList<gridseg> *&A);
#endif bool cut_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C);
} MyList<Parallel::gridseg> *gs_subtract_virtual(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
#endif /*PARALLEL_H */ void fill_level_data(MyList<Patch> *PatLd, MyList<Patch> *PatLs, MyList<Patch> *PatcL,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *FutureList,
MyList<var> *tmList, int Symmetry, bool BB, bool CC);
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry);
void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape);
bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl);
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry, MPI_Comm Comm_here);
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
bool periodic, int start_rank, int end_rank, int nodes = 0);
// Redistribute blocks with time statistics for load balancing
MyList<Block> *distribute(MyList<Patch> *PatchLIST, MyList<Block> *OldBlockL,
int cpusize, int ingfsi, int fngfsi,
bool periodic, int start_rank, int end_rank, int nodes = 0);
#endif
// Dynamic load balancing: split blocks for heavy ranks
void split_heavy_blocks(MyList<Patch> *PatL, int *heavy_ranks, int num_heavy,
int split_factor, int cpusize, int ingfsi, int fngfsi);
// Check if load balancing is needed based on interpolation times
bool check_load_balance_need(double *rank_times, int nprocs, int &num_heavy, int *heavy_ranks);
}
#endif /*PARALLEL_H */

View File

@@ -5819,21 +5819,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif #endif
#if (RPB == 0) #if (RPB == 0)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif #endif
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
@@ -5880,21 +5870,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif #endif
#if (RPB == 0) #if (RPB == 0)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif #endif
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
@@ -5969,21 +5949,11 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#if (RPB == 0) #if (RPB == 0)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif #endif
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
@@ -6001,21 +5971,11 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
#if (RPB == 0) #if (RPB == 0)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif #endif
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
@@ -6076,21 +6036,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#if (RPB == 0) #if (RPB == 0)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif #endif
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
@@ -6110,21 +6060,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
#if (RPB == 0) #if (RPB == 0)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif #endif
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
@@ -6161,21 +6101,11 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
} }
#if (RPB == 0) #if (RPB == 0)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif #endif
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
@@ -6184,21 +6114,11 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
else // no time refinement levels and for all same time levels else // no time refinement levels and for all same time levels
{ {
#if (RPB == 0) #if (RPB == 0)
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif #endif
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);

View File

@@ -945,103 +945,60 @@
SSA(2)=SYM SSA(2)=SYM
SSA(3)=ANTI SSA(3)=ANTI
!!!!!!!!!advection term part !!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
! lopsided_kodis shares the symmetry_bd buffer between advection and
! dissipation, eliminating redundant full-grid copies. For metric variables
! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
! so the constant offset has no effect on dissipation.
call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS) call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA) call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA) call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS) call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA) call lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA) call lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
!!
#if 1
!! bam does not apply dissipation on gauge variables
call lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
call lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
#endif
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
#endif
#else
! No dissipation on gauge variables (advection only)
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) #if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif #endif
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) #if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif #endif
if(eps>0)then
! usual Kreiss-Oliger dissipation
call kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
#define k 40
if(Lev == 1)then
write(*,*) X(i),Y(j),Z(k)
write(*,*) "before",Axx_rhs(i,j,k)
endif
#undef i
#undef j
#undef k
!!stop
#endif #endif
call kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
#define k 40
if(Lev == 1)then
write(*,*) X(i),Y(j),Z(k)
write(*,*) "after",Axx_rhs(i,j,k)
endif
#undef i
#undef j
#undef k
!!stop
#endif
call kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps)
#if 1
!! bam does not apply dissipation on gauge variables
call kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps)
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps)
#endif
#endif
endif
if(co == 0)then if(co == 0)then
! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho ! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho

File diff suppressed because it is too large Load Diff

View File

@@ -1,92 +1,107 @@
#ifndef CGH_H #ifndef CGH_H
#define CGH_H #define CGH_H
#include <mpi.h> #include <mpi.h>
#include "MyList.h" #include "MyList.h"
#include "MPatch.h" #include "MPatch.h"
#include "macrodef.h" #include "macrodef.h"
#include "monitor.h" #include "monitor.h"
#include "Parallel.h" #include "Parallel.h"
class cgh class cgh
{ {
public: public:
int levels, movls, BH_num_in; int levels, movls, BH_num_in;
// information of boxes // information of boxes
int *grids; int *grids;
double ***bbox; double ***bbox;
int ***shape; int ***shape;
double ***handle; double ***handle;
double ***Porgls; double ***Porgls;
double *Lt; double *Lt;
// information of Patch list // information of Patch list
MyList<Patch> **PatL; MyList<Patch> **PatL;
// information of OutBdLow2Hi point list and Restrict point list // information of OutBdLow2Hi point list and Restrict point list
#if (RPB == 1) #if (RPB == 1)
MyList<Parallel::pointstru_bam> **bdsul, **rsul; MyList<Parallel::pointstru_bam> **bdsul, **rsul;
#endif #endif
#if (PSTR == 1 || PSTR == 2 || PSTR == 3) #if (PSTR == 1 || PSTR == 2 || PSTR == 3)
int mylev; int mylev;
int *start_rank, *end_rank; int *start_rank, *end_rank;
MPI_Comm *Commlev; MPI_Comm *Commlev;
#endif #endif
protected: protected:
int ingfs, fngfs; int ingfs, fngfs;
static constexpr double ratio = 0.75; static constexpr double ratio = 0.75;
int trfls; int trfls;
public: public:
cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, monitor *ErrorMonitor); cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, monitor *ErrorMonitor);
~cgh(); ~cgh();
void compose_cgh(int nprocs); void compose_cgh(int nprocs);
void sethandle(monitor *ErrorMonitor); void sethandle(monitor *ErrorMonitor);
void checkPatchList(MyList<Patch> *PatL, bool buflog); void checkPatchList(MyList<Patch> *PatL, bool buflog);
void Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);
void Regrid_fake(int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid_fake(int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);
void recompose_cgh(int nprocs, bool *lev_flag, void recompose_cgh(int nprocs, bool *lev_flag,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB); int Symmetry, bool BB);
void recompose_cgh_fake(int nprocs, bool *lev_flag, void recompose_cgh_fake(int nprocs, bool *lev_flag,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB); int Symmetry, bool BB);
void read_bbox(int Symmetry, char *filename); void read_bbox(int Symmetry, char *filename);
MyList<Patch> *construct_patchlist(int lev, int Symmetry); MyList<Patch> *construct_patchlist(int lev, int Symmetry);
bool Interp_One_Point(MyList<var> *VarList, bool Interp_One_Point(MyList<var> *VarList,
double *XX, /*input global Cartesian coordinate*/ double *XX, /*input global Cartesian coordinate*/
double *Shellf, int Symmetry); double *Shellf, int Symmetry);
void recompose_cgh_Onelevel(int nprocs, int lev, void recompose_cgh_Onelevel(int nprocs, int lev,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB); int Symmetry, bool BB);
void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);
void Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);
void settrfls(const int lev); void settrfls(const int lev);
#if (PSTR == 1 || PSTR == 2 || PSTR == 3) #if (PSTR == 1 || PSTR == 2 || PSTR == 3)
void construct_mylev(int nprocs); void construct_mylev(int nprocs);
#endif #endif
};
// Load balancing support
#endif /* CGH_H */ bool enable_load_balance; // Enable load balancing
int load_balance_check_interval; // Check interval (in time steps)
int current_time_step; // Current time step counter
double *rank_interp_times; // Store interpolation times for each rank
int *heavy_ranks; // Store heavy rank numbers
int num_heavy_ranks; // Number of heavy ranks
void init_load_balance(int nprocs);
void update_interp_time(int rank, double time);
bool check_and_rebalance(int nprocs, int lev,
MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB);
};
#endif /* CGH_H */

View File

@@ -69,10 +69,12 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
! x direction ! x direction
if(i+1 <= imax .and. i-1 >= imin)then if(i+1 <= imax .and. i-1 >= imin)then
! !
! - f(i-1) + f(i+1) ! - f(i-1) + f(i+1)
@@ -371,6 +373,8 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1

View File

@@ -883,13 +883,17 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i integer::i
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
enddo enddo
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1 do i=0,ord-1
funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2) funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2)
enddo enddo
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1 do i=0,ord-1
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3) funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
enddo enddo
@@ -1112,6 +1116,7 @@ end subroutine d2dump
! Lagrangian polynomial interpolation ! Lagrangian polynomial interpolation
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
!DIR$ ATTRIBUTES FORCEINLINE :: polint
subroutine polint(xa, ya, x, y, dy, ordn) subroutine polint(xa, ya, x, y, dy, ordn)
implicit none implicit none

View File

@@ -65,6 +65,8 @@ real*8,intent(in) :: eps
! dx^4 ! dx^4
! note the sign (-1)^r-1, now r=2 ! note the sign (-1)^r-1, now r=2
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)

View File

@@ -487,6 +487,201 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
end subroutine lopsided end subroutine lopsided
!-----------------------------------------------------------------------------
! Combined advection (lopsided) + Kreiss-Oliger dissipation (kodis)
! Shares the symmetry_bd buffer fh, eliminating one full-grid copy per call.
! Mathematically identical to calling lopsided then kodis separately.
!-----------------------------------------------------------------------------
subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
real*8,intent(in) :: eps
!~~~~~~> local variables:
! note index -2,-1,0, so we have 3 extra points
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F3=3.d0
real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1
real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
! kodis parameters
real*8, parameter :: SIX=6.d0,FIT=1.5d1,TWT=2.d1
real*8, parameter :: cof=6.4d1 ! 2^6
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -2
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -2
! Single symmetry_bd call shared by both advection and dissipation
call symmetry_bd(3,ex,f,fh,SoA)
! ---- Advection (lopsided) loop ----
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
! x direction
if(Sfx(i,j,k) > ZEO)then
if(i+3 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
elseif(i+2 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
endif
elseif(Sfx(i,j,k) < ZEO)then
if(i-3 >= imin)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
elseif(i-2 >= imin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i-1 >= imin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
endif
endif
! y direction
if(Sfy(i,j,k) > ZEO)then
if(j+3 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
elseif(j+2 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
endif
elseif(Sfy(i,j,k) < ZEO)then
if(j-3 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
elseif(j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
endif
endif
! z direction
if(Sfz(i,j,k) > ZEO)then
if(k+3 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
elseif(k+2 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
endif
elseif(Sfz(i,j,k) < ZEO)then
if(k-3 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
elseif(k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
endif
endif
enddo
enddo
enddo
! ---- Dissipation (kodis) loop ----
if(eps > ZEO) then
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i-3 >= imin .and. i+3 <= imax .and. &
j-3 >= jmin .and. j+3 <= jmax .and. &
k-3 >= kmin .and. k+3 <= kmax) then
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - &
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
TWT* fh(i,j,k) )/dX + &
( &
(fh(i,j-3,k)+fh(i,j+3,k)) - &
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
TWT* fh(i,j,k) )/dY + &
( &
(fh(i,j,k-3)+fh(i,j,k+3)) - &
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )/dZ )
endif
enddo
enddo
enddo
endif
return
end subroutine lopsided_kodis
#elif (ghost_width == 4) #elif (ghost_width == 4)
! sixth order code ! sixth order code
! Compute advection terms in right hand sides of field equations ! Compute advection terms in right hand sides of field equations

View File

@@ -13,7 +13,7 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization) ## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization)
## -fprofile-instr-use: use collected profile data to guide optimization decisions ## -fprofile-instr-use: use collected profile data to guide optimization decisions
## (branch prediction, basic block layout, inlining, loop unrolling) ## (branch prediction, basic block layout, inlining, loop unrolling)
PROFDATA = /home/amss/AMSS-NCKU/pgo_profile/default.profdata PROFDATA = ../../pgo_profile/default.profdata
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-fprofile-instr-use=$(PROFDATA) \ -fprofile-instr-use=$(PROFDATA) \
-Dfortran3 -Dnewc -I${MKLROOT}/include -Dfortran3 -Dnewc -I${MKLROOT}/include

File diff suppressed because it is too large Load Diff

29
parallel_plot_helper.py Normal file
View File

@@ -0,0 +1,29 @@
import multiprocessing
def run_plot_task(task):
"""Execute a single plotting task.
Parameters
----------
task : tuple
A tuple of (function, args_tuple) where function is a callable
plotting function and args_tuple contains its arguments.
"""
func, args = task
return func(*args)
def run_plot_tasks_parallel(plot_tasks):
"""Execute a list of independent plotting tasks in parallel.
Uses the 'fork' context to create worker processes so that the main
script is NOT re-imported/re-executed in child processes.
Parameters
----------
plot_tasks : list of tuples
Each element is (function, args_tuple).
"""
ctx = multiprocessing.get_context('fork')
with ctx.Pool() as pool:
pool.map(run_plot_task, plot_tasks)

Binary file not shown.

Binary file not shown.

View File

@@ -11,6 +11,8 @@
import numpy ## numpy for array operations import numpy ## numpy for array operations
import scipy ## scipy for interpolation and signal processing import scipy ## scipy for interpolation and signal processing
import math import math
import matplotlib
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import matplotlib.pyplot as plt ## matplotlib for plotting import matplotlib.pyplot as plt ## matplotlib for plotting
import os ## os for system/file operations import os ## os for system/file operations

View File

@@ -8,16 +8,23 @@
## ##
################################################# #################################################
## Restrict OpenMP to one thread per process so that running
## many workers in parallel does not create an O(workers * BLAS_threads)
## thread explosion. The variable MUST be set before numpy/scipy
## are imported, because the BLAS library reads them only at load time.
import os
os.environ.setdefault("OMP_NUM_THREADS", "1")
import numpy import numpy
import scipy import scipy
import matplotlib
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm from matplotlib.colors import LogNorm
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
## import torch ## import torch
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data
import os
######################################################################################### #########################################################################################
@@ -192,3 +199,19 @@ def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ):
#################################################################################### ####################################################################################
####################################################################################
## Allow this module to be run as a standalone script so that each
## binary-data plot can be executed in a fresh subprocess whose BLAS
## environment variables (set above) take effect before numpy loads.
##
## Usage: python3 plot_binary_data.py <filename> <binary_outdir> <figure_outdir>
####################################################################################
if __name__ == '__main__':
import sys
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <filename> <binary_outdir> <figure_outdir>")
sys.exit(1)
plot_binary_data(sys.argv[1], sys.argv[2], sys.argv[3])

View File

@@ -8,6 +8,8 @@
################################################# #################################################
import numpy ## numpy for array operations import numpy ## numpy for array operations
import matplotlib
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import matplotlib.pyplot as plt ## matplotlib for plotting import matplotlib.pyplot as plt ## matplotlib for plotting
from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots
import glob import glob
@@ -15,6 +17,9 @@ import os ## operating system utilities
import plot_binary_data import plot_binary_data
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data
import subprocess
import sys
import multiprocessing
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots # plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
@@ -50,10 +55,40 @@ def generate_binary_data_plot( binary_outdir, figure_outdir ):
file_list.append(x) file_list.append(x)
print(x) print(x)
## Plot each file in the list ## Plot each file in parallel using subprocesses.
## Each subprocess is a fresh Python process where the BLAS thread-count
## environment variables (set at the top of plot_binary_data.py) take
## effect before numpy is imported. This avoids the thread explosion
## that occurs when multiprocessing.Pool with 'fork' context inherits
## already-initialized multi-threaded BLAS from the parent.
script = os.path.join( os.path.dirname(__file__), "plot_binary_data.py" )
max_workers = min( multiprocessing.cpu_count(), len(file_list) ) if file_list else 0
running = []
failed = []
for filename in file_list: for filename in file_list:
print(filename) print(filename)
plot_binary_data.plot_binary_data(filename, binary_outdir, figure_outdir) proc = subprocess.Popen(
[sys.executable, script, filename, binary_outdir, figure_outdir],
)
running.append( (proc, filename) )
## Keep at most max_workers subprocesses active at a time
if len(running) >= max_workers:
p, fn = running.pop(0)
p.wait()
if p.returncode != 0:
failed.append(fn)
## Wait for all remaining subprocesses to finish
for p, fn in running:
p.wait()
if p.returncode != 0:
failed.append(fn)
if failed:
print( " WARNING: the following binary data plots failed:" )
for fn in failed:
print( " ", fn )
print( ) print( )
print( " Binary Data Plot Has been Finished " ) print( " Binary Data Plot Has been Finished " )