Compare commits

..

6 Commits

Author SHA1 Message Date
cc06e30404 Apply async Sync optimization to Z4c_class using Sync_start/finish pattern
Replaces blocking Parallel::Sync + MPI_Allreduce in Z4c_class Step() with
non-blocking MPI_Iallreduce overlapped with Sync_start/Sync_finish, matching
the pattern already used in bssn_class on cjy-oneapi-opus-hotfix. Covers both ABEtype==2
and CPBC variants (predictor + corrector = 4 call sites).

Cherry-picked optimization from afd4006, adapted to SyncCache
infrastructure instead of the separate SyncPlan API.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 09:58:26 +08:00
25c79dc7cd Merge lopsided advection + kodis dissipation to share symmetry_bd buffer
Cherry-picked from 38c2c30.
2026-02-20 09:57:51 +08:00
a725d34dd3 Don't hardcode pgo profile path 2026-02-20 08:48:25 +08:00
2791d2e225 Merge pull request 'PGO updated' (#1) from cjy-oneapi-opus-hotfix into main
Reviewed-on: #1
2026-02-11 19:17:35 +08:00
72ce153e48 Merge cjy-oneapi-opus-hotfix into main 2026-02-11 19:15:12 +08:00
CGH0S7
79af79d471 baseline updated 2026-02-05 19:53:55 +08:00
13 changed files with 14025 additions and 14937 deletions

View File

@@ -66,8 +66,7 @@ if os.path.exists(File_directory):
## Prompt whether to overwrite the existing directory ## Prompt whether to overwrite the existing directory
while True: while True:
try: try:
## inputvalue = input() inputvalue = input()
inputvalue = "continue"
## If the user agrees to overwrite, proceed and remove the existing directory ## If the user agrees to overwrite, proceed and remove the existing directory
if ( inputvalue == "continue" ): if ( inputvalue == "continue" ):
print( " Continue the calculation !!! " ) print( " Continue the calculation !!! " )

File diff suppressed because it is too large Load Diff

View File

@@ -24,7 +24,6 @@ using namespace std;
#endif #endif
#include <mpi.h> #include <mpi.h>
#include <memory.h>
#include "MyList.h" #include "MyList.h"
#include "Block.h" #include "Block.h"
#include "Parallel.h" #include "Parallel.h"

File diff suppressed because it is too large Load Diff

View File

@@ -1,235 +1,213 @@
#ifndef PARALLEL_H #ifndef PARALLEL_H
#define PARALLEL_H #define PARALLEL_H
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <fstream> #include <fstream>
#include <cstdlib> #include <cstdlib>
#include <cstdio> #include <cstdio>
#include <string> #include <string>
#include <cmath> #include <cmath>
#include <new> #include <new>
using namespace std; using namespace std;
#include <memory.h>
#include "Parallel_bam.h" #include "Parallel_bam.h"
#include "var.h" #include "var.h"
#include "MPatch.h" #include "MPatch.h"
#include "Block.h" #include "Block.h"
#include "MyList.h" #include "MyList.h"
#include "macrodef.h" //need dim; ghost_width; CONTRACT #include "macrodef.h" //need dim; ghost_width; CONTRACT
namespace Parallel namespace Parallel
{ {
struct gridseg struct gridseg
{ {
double llb[dim]; double llb[dim];
double uub[dim]; double uub[dim];
int shape[dim]; int shape[dim];
double illb[dim], iuub[dim]; // only use for OutBdLow2Hi double illb[dim], iuub[dim]; // only use for OutBdLow2Hi
Block *Bg; Block *Bg;
}; };
int partition1(int &nx, int split_size, int min_width, int cpusize, int shape); // special for 1 diemnsion int partition1(int &nx, int split_size, int min_width, int cpusize, int shape); // special for 1 diemnsion
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape); int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
MyList<Block> *distribute_hard(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks void KillBlocks(MyList<Patch> *PatchLIST);
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
int ib0_orig, int ib3_orig, void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
int jb1_orig, int jb4_orig, void setfunction(int rank, MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
int kb2_orig, int kb5_orig, void writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax,
Patch* PP, int r_left, int r_right, double zmin, double zmax, char *filename, double *data_out);
int ingfsi, int fngfsi, bool periodic, void writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax,
Block* &split_first_block, Block* &split_last_block); char *filename, double *datain);
Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox, void getarrayindex(int DIM, int *shape, int *index, int n);
int block_id, int ingfsi, int fngfsi, int lev); int getarraylocation(int DIM, int *shape, int *index);
void KillBlocks(MyList<Patch> *PatchLIST); void copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin,
int *shape, double *datain, double *llb, double *uub);
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z)); void Dump_CPU_Data(MyList<Block> *BlL, MyList<var> *DumpList, char *tag, double time, double dT);
void setfunction(int rank, MyList<Block> *BlL, var *vn, double func(double x, double y, double z)); void Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT);
void writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax, void Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd);
double zmin, double zmax, char *filename, double *data_out); double *Collect_Data(Patch *PP, var *VP);
void writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax, void d2Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT);
char *filename, double *datain); void d2Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd);
void getarrayindex(int DIM, int *shape, int *index, int n); void Dump_Data0(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT);
int getarraylocation(int DIM, int *shape, int *index); double global_interp(int DIM, int *ext, double **CoX, double *datain,
void copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin, double *poX, int ordn, double *SoA, int Symmetry);
int *shape, double *datain, double *llb, double *uub); double global_interp(int DIM, int *ext, double **CoX, double *datain,
void Dump_CPU_Data(MyList<Block> *BlL, MyList<var> *DumpList, char *tag, double time, double dT); double *poX, int ordn);
void Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT); double Lagrangian_Int(double x, int npts, double *xpts, double *funcvals);
void Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd); double LagrangePoly(double x, int pt, int npts, double *xpts);
double *Collect_Data(Patch *PP, var *VP); MyList<gridseg> *build_complete_gsl(Patch *Pat);
void d2Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT); MyList<gridseg> *build_complete_gsl(MyList<Patch> *PatL);
void d2Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd); MyList<gridseg> *build_complete_gsl_virtual(MyList<Patch> *PatL);
void Dump_Data0(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT); MyList<gridseg> *build_complete_gsl_virtual2(MyList<Patch> *PatL); // - buffer
double global_interp(int DIM, int *ext, double **CoX, double *datain, MyList<gridseg> *build_owned_gsl0(Patch *Pat, int rank_in); // - ghost without extension, special for Sync usage
double *poX, int ordn, double *SoA, int Symmetry); MyList<gridseg> *build_owned_gsl1(Patch *Pat, int rank_in); // - ghost, similar to build_owned_gsl0 but extend one point on left side for vertex grid
double global_interp(int DIM, int *ext, double **CoX, double *datain, MyList<gridseg> *build_owned_gsl2(Patch *Pat, int rank_in); // - buffer - ghost
double *poX, int ordn); MyList<gridseg> *build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry); // - ghost - BD ghost
double Lagrangian_Int(double x, int npts, double *xpts, double *funcvals); MyList<gridseg> *build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry); // - buffer - ghost - BD ghost
double LagrangePoly(double x, int pt, int npts, double *xpts); MyList<gridseg> *build_owned_gsl5(Patch *Pat, int rank_in); // similar to build_owned_gsl2 but no extension
MyList<gridseg> *build_complete_gsl(Patch *Pat); MyList<gridseg> *build_owned_gsl(MyList<Patch> *PatL, int rank_in, int type, int Symmetry);
MyList<gridseg> *build_complete_gsl(MyList<Patch> *PatL); void build_gstl(MyList<gridseg> *srci, MyList<gridseg> *dsti, MyList<gridseg> **out_src, MyList<gridseg> **out_dst);
MyList<gridseg> *build_complete_gsl_virtual(MyList<Patch> *PatL); int data_packer(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir,
MyList<gridseg> *build_complete_gsl_virtual2(MyList<Patch> *PatL); // - buffer MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry);
MyList<gridseg> *build_owned_gsl0(Patch *Pat, int rank_in); // - ghost without extension, special for Sync usage void transfer(MyList<gridseg> **src, MyList<gridseg> **dst,
MyList<gridseg> *build_owned_gsl1(Patch *Pat, int rank_in); // - ghost, similar to build_owned_gsl0 but extend one point on left side for vertex grid MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
MyList<gridseg> *build_owned_gsl2(Patch *Pat, int rank_in); // - buffer - ghost int Symmetry);
MyList<gridseg> *build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry); // - ghost - BD ghost int data_packermix(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir,
MyList<gridseg> *build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry); // - buffer - ghost - BD ghost MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry);
MyList<gridseg> *build_owned_gsl5(Patch *Pat, int rank_in); // similar to build_owned_gsl2 but no extension void transfermix(MyList<gridseg> **src, MyList<gridseg> **dst,
MyList<gridseg> *build_owned_gsl(MyList<Patch> *PatL, int rank_in, int type, int Symmetry); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
void build_gstl(MyList<gridseg> *srci, MyList<gridseg> *dsti, MyList<gridseg> **out_src, MyList<gridseg> **out_dst); int Symmetry);
int data_packer(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir, void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry);
MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry); void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
void transfer(MyList<gridseg> **src, MyList<gridseg> **dst, void Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
int Symmetry); struct SyncCache {
int data_packermix(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir, bool valid;
MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry); int cpusize;
void transfermix(MyList<gridseg> **src, MyList<gridseg> **dst, MyList<gridseg> **combined_src;
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */, MyList<gridseg> **combined_dst;
int Symmetry); int *send_lengths;
void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry); int *recv_lengths;
void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry); double **send_bufs;
void Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry); double **recv_bufs;
int *send_buf_caps;
struct SyncCache { int *recv_buf_caps;
bool valid; MPI_Request *reqs;
int cpusize; MPI_Status *stats;
MyList<gridseg> **combined_src; int max_reqs;
MyList<gridseg> **combined_dst; bool lengths_valid;
int *send_lengths; SyncCache();
int *recv_lengths; void invalidate();
double **send_bufs; void destroy();
double **recv_bufs; };
int *send_buf_caps;
int *recv_buf_caps; void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
MPI_Request *reqs; void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
MPI_Status *stats; MyList<var> *VarList1, MyList<var> *VarList2,
int max_reqs; int Symmetry, SyncCache &cache);
bool lengths_valid;
SyncCache(); struct AsyncSyncState {
void invalidate(); int req_no;
void destroy(); bool active;
}; AsyncSyncState() : req_no(0), active(false) {}
};
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst, void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
MyList<var> *VarList1, MyList<var> *VarList2, SyncCache &cache, AsyncSyncState &state);
int Symmetry, SyncCache &cache); void Sync_finish(SyncCache &cache, AsyncSyncState &state,
MyList<var> *VarList, int Symmetry);
struct AsyncSyncState { void OutBdLow2Hi(Patch *Patc, Patch *Patf,
int req_no; MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
bool active; int Symmetry);
AsyncSyncState() : req_no(0), active(false) {} void OutBdLow2Hi(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
}; MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
int Symmetry);
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, void OutBdLow2Himix(Patch *Patc, Patch *Patf,
SyncCache &cache, AsyncSyncState &state); MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
void Sync_finish(SyncCache &cache, AsyncSyncState &state, int Symmetry);
MyList<var> *VarList, int Symmetry); void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
void OutBdLow2Hi(Patch *Patc, Patch *Patf, MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, int Symmetry);
int Symmetry); void Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
void OutBdLow2Hi(MyList<Patch> *PatcL, MyList<Patch> *PatfL, MyList<var> *VarList1, MyList<var> *VarList2,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, int Symmetry, SyncCache &cache);
int Symmetry); void OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
void OutBdLow2Himix(Patch *Patc, Patch *Patf, MyList<var> *VarList1, MyList<var> *VarList2,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, int Symmetry, SyncCache &cache);
int Symmetry); void OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL, MyList<var> *VarList1, MyList<var> *VarList2,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, int Symmetry, SyncCache &cache);
int Symmetry); void Prolong(Patch *Patc, Patch *Patf,
void Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL, MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
MyList<var> *VarList1, MyList<var> *VarList2, int Symmetry);
int Symmetry, SyncCache &cache); void Prolongint(Patch *Patc, Patch *Patf,
void OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL, MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
MyList<var> *VarList1, MyList<var> *VarList2, int Symmetry);
int Symmetry, SyncCache &cache); void Restrict(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
void OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL, MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
MyList<var> *VarList1, MyList<var> *VarList2, int Symmetry);
int Symmetry, SyncCache &cache); void Restrict_after(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
void Prolong(Patch *Patc, Patch *Patf, MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, int Symmetry); // for -ghost - BDghost
int Symmetry); MyList<Parallel::gridseg> *build_PhysBD_gsl(Patch *Pat);
void Prolongint(Patch *Patc, Patch *Patf, MyList<Parallel::gridseg> *build_ghost_gsl(MyList<Patch> *PatL);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, MyList<Parallel::gridseg> *build_ghost_gsl(Patch *Pat);
int Symmetry); MyList<Parallel::gridseg> *build_buffer_gsl(Patch *Pat);
void Restrict(MyList<Patch> *PatcL, MyList<Patch> *PatfL, MyList<Parallel::gridseg> *build_buffer_gsl(MyList<Patch> *PatL);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, MyList<Parallel::gridseg> *gsl_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
int Symmetry); MyList<Parallel::gridseg> *gs_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
void Restrict_after(MyList<Patch> *PatcL, MyList<Patch> *PatfL, MyList<Parallel::gridseg> *gsl_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */, MyList<Parallel::gridseg> *gs_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
int Symmetry); // for -ghost - BDghost MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only);
MyList<Parallel::gridseg> *build_PhysBD_gsl(Patch *Pat); MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue
MyList<Parallel::gridseg> *build_ghost_gsl(MyList<Patch> *PatL); MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat);
MyList<Parallel::gridseg> *build_ghost_gsl(Patch *Pat); void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
MyList<Parallel::gridseg> *build_buffer_gsl(Patch *Pat); MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
MyList<Parallel::gridseg> *build_buffer_gsl(MyList<Patch> *PatL); void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
MyList<Parallel::gridseg> *gsl_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); double L2Norm(Patch *Pat, var *vf);
MyList<Parallel::gridseg> *gs_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
MyList<Parallel::gridseg> *gsl_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); void checkvarl(MyList<var> *pp, bool first_only);
MyList<Parallel::gridseg> *gs_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B); MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only); MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat);
MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue void prepare_inter_time_level(Patch *Pat,
MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat); MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti, MyList<var> *VarList3 /* target (t+a*dt) */, int tindex);
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst); void prepare_inter_time_level(Patch *Pat,
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry); MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
double L2Norm(Patch *Pat, var *vf); MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex);
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only); void prepare_inter_time_level(MyList<Patch> *PatL,
void checkvarl(MyList<var> *pp, bool first_only); MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat); MyList<var> *VarList3 /* target (t+a*dt) */, int tindex);
MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat); void prepare_inter_time_level(MyList<Patch> *Pat,
void prepare_inter_time_level(Patch *Pat, MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */, MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex);
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex); void merge_gsl(MyList<gridseg> *&A, const double ratio);
void prepare_inter_time_level(Patch *Pat, bool merge_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C, const double ratio);
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */, // Add ghost region to tangent plane
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex); // we assume the grids have the same resolution
void prepare_inter_time_level(MyList<Patch> *PatL, void add_ghost_touch(MyList<gridseg> *&A);
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */, void cut_gsl(MyList<gridseg> *&A);
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex); bool cut_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C);
void prepare_inter_time_level(MyList<Patch> *Pat, MyList<Parallel::gridseg> *gs_subtract_virtual(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */, void fill_level_data(MyList<Patch> *PatLd, MyList<Patch> *PatLs, MyList<Patch> *PatcL,
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex); MyList<var> *OldList, MyList<var> *StateList, MyList<var> *FutureList,
void merge_gsl(MyList<gridseg> *&A, const double ratio); MyList<var> *tmList, int Symmetry, bool BB, bool CC);
bool merge_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C, const double ratio); bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
// Add ghost region to tangent plane int NN, double **XX,
// we assume the grids have the same resolution double *Shellf, int Symmetry);
void add_ghost_touch(MyList<gridseg> *&A); void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape);
void cut_gsl(MyList<gridseg> *&A); bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl);
bool cut_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C); void checkpatchlist(MyList<Patch> *PatL, bool buflog);
MyList<Parallel::gridseg> *gs_subtract_virtual(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
void fill_level_data(MyList<Patch> *PatLd, MyList<Patch> *PatLs, MyList<Patch> *PatcL, double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *FutureList, bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
MyList<var> *tmList, int Symmetry, bool BB, bool CC); int NN, double **XX,
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList, double *Shellf, int Symmetry, MPI_Comm Comm_here);
int NN, double **XX, #if (PSTR == 1 || PSTR == 2 || PSTR == 3)
double *Shellf, int Symmetry); MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape); bool periodic, int start_rank, int end_rank, int nodes = 0);
bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl); #endif
void checkpatchlist(MyList<Patch> *PatL, bool buflog); }
#endif /*PARALLEL_H */
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry, MPI_Comm Comm_here);
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
bool periodic, int start_rank, int end_rank, int nodes = 0);
// Redistribute blocks with time statistics for load balancing
MyList<Block> *distribute(MyList<Patch> *PatchLIST, MyList<Block> *OldBlockL,
int cpusize, int ingfsi, int fngfsi,
bool periodic, int start_rank, int end_rank, int nodes = 0);
#endif
// Dynamic load balancing: split blocks for heavy ranks
void split_heavy_blocks(MyList<Patch> *PatL, int *heavy_ranks, int num_heavy,
int split_factor, int cpusize, int ingfsi, int fngfsi);
// Check if load balancing is needed based on interpolation times
bool check_load_balance_need(double *rank_times, int nprocs, int &num_heavy, int *heavy_ranks);
}
#endif /*PARALLEL_H */

View File

@@ -321,22 +321,7 @@ void Z4c_class::Step(int lev, int YN)
} }
Pp = Pp->next; Pp = Pp->next;
} }
// check error information // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
{
int erh = ERROR;
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
}
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#ifdef WithShell #ifdef WithShell
// evolve Shell Patches // evolve Shell Patches
@@ -354,9 +339,9 @@ void Z4c_class::Step(int lev, int YN)
{ {
#if (AGM == 0) #if (AGM == 0)
f_enforce_ga(cg->shape, f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif #endif
@@ -468,24 +453,16 @@ void Z4c_class::Step(int lev, int YN)
sPp = sPp->next; sPp = sPp->next;
} }
} }
// check error information // Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req_pre;
{ {
int erh = ERROR; int erh = ERROR;
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_pre);
}
if (ERROR)
{
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); Parallel::AsyncSyncState async_pre;
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -498,12 +475,30 @@ void Z4c_class::Step(int lev, int YN)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = clock(); curr_clock = clock();
cout << " Shell stuff synchronization used " cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl; << " seconds! " << endl;
} }
} }
#endif #endif
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#ifdef WithShell
// Complete non-blocking error reduction and check
MPI_Wait(&err_req_pre, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
@@ -693,23 +688,7 @@ void Z4c_class::Step(int lev, int YN)
Pp = Pp->next; Pp = Pp->next;
} }
// check error information // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
{
int erh = ERROR;
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
}
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#ifdef WithShell #ifdef WithShell
// evolve Shell Patches // evolve Shell Patches
@@ -850,25 +829,16 @@ void Z4c_class::Step(int lev, int YN)
sPp = sPp->next; sPp = sPp->next;
} }
} }
// check error information // Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req_cor;
{ {
int erh = ERROR; int erh = ERROR;
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
}
if (ERROR)
{
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); Parallel::AsyncSyncState async_cor;
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -881,11 +851,30 @@ void Z4c_class::Step(int lev, int YN)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = clock(); curr_clock = clock();
cout << " Shell stuff synchronization used " cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl; << " seconds! " << endl;
} }
} }
#endif
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#ifdef WithShell
// Complete non-blocking error reduction and check
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
#endif #endif
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
@@ -1252,22 +1241,7 @@ void Z4c_class::Step(int lev, int YN)
} }
} }
#endif #endif
// check error information // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
{
int erh = ERROR;
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
}
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// evolve Shell Patches // evolve Shell Patches
if (lev == 0) if (lev == 0)
@@ -1542,23 +1516,15 @@ void Z4c_class::Step(int lev, int YN)
} }
#endif #endif
} }
// check error information // Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req_pre;
{ {
int erh = ERROR; int erh = ERROR;
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_pre);
}
if (ERROR)
{
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
} }
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); Parallel::AsyncSyncState async_pre;
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
if (lev == 0) if (lev == 0)
{ {
@@ -1570,8 +1536,8 @@ void Z4c_class::Step(int lev, int YN)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = clock(); curr_clock = clock();
cout << " Shell stuff synchronization used " cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl; << " seconds! " << endl;
} }
@@ -1620,6 +1586,22 @@ void Z4c_class::Step(int lev, int YN)
} }
#endif #endif
} }
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
// Complete non-blocking error reduction and check
MPI_Wait(&err_req_pre, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
@@ -1841,23 +1823,7 @@ void Z4c_class::Step(int lev, int YN)
Pp = Pp->next; Pp = Pp->next;
} }
// check error information // NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
{
int erh = ERROR;
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
}
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// evolve Shell Patches // evolve Shell Patches
if (lev == 0) if (lev == 0)
@@ -2103,24 +2069,15 @@ void Z4c_class::Step(int lev, int YN)
sPp = sPp->next; sPp = sPp->next;
} }
} }
// check error information // Non-blocking error reduction overlapped with Sync to hide Allreduce latency
MPI_Request err_req_cor;
{ {
int erh = ERROR; int erh = ERROR;
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
}
if (ERROR)
{
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
} }
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); Parallel::AsyncSyncState async_cor;
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
if (lev == 0) if (lev == 0)
{ {
@@ -2132,8 +2089,8 @@ void Z4c_class::Step(int lev, int YN)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = clock(); curr_clock = clock();
cout << " Shell stuff synchronization used " cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl; << " seconds! " << endl;
} }
} }
@@ -2170,6 +2127,23 @@ void Z4c_class::Step(int lev, int YN)
} }
// end smooth // end smooth
#endif #endif
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
// Complete non-blocking error reduction and check
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
if (ERROR)
{
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
if (myrank == 0)
{
if (ErrorMonitor->outfile)
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
<< " variables at t = " << PhysTime
<< ", lev = " << lev << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)

File diff suppressed because it is too large Load Diff

View File

@@ -1,107 +1,92 @@
#ifndef CGH_H #ifndef CGH_H
#define CGH_H #define CGH_H
#include <mpi.h> #include <mpi.h>
#include "MyList.h" #include "MyList.h"
#include "MPatch.h" #include "MPatch.h"
#include "macrodef.h" #include "macrodef.h"
#include "monitor.h" #include "monitor.h"
#include "Parallel.h" #include "Parallel.h"
class cgh class cgh
{ {
public: public:
int levels, movls, BH_num_in; int levels, movls, BH_num_in;
// information of boxes // information of boxes
int *grids; int *grids;
double ***bbox; double ***bbox;
int ***shape; int ***shape;
double ***handle; double ***handle;
double ***Porgls; double ***Porgls;
double *Lt; double *Lt;
// information of Patch list // information of Patch list
MyList<Patch> **PatL; MyList<Patch> **PatL;
// information of OutBdLow2Hi point list and Restrict point list // information of OutBdLow2Hi point list and Restrict point list
#if (RPB == 1) #if (RPB == 1)
MyList<Parallel::pointstru_bam> **bdsul, **rsul; MyList<Parallel::pointstru_bam> **bdsul, **rsul;
#endif #endif
#if (PSTR == 1 || PSTR == 2 || PSTR == 3) #if (PSTR == 1 || PSTR == 2 || PSTR == 3)
int mylev; int mylev;
int *start_rank, *end_rank; int *start_rank, *end_rank;
MPI_Comm *Commlev; MPI_Comm *Commlev;
#endif #endif
protected: protected:
int ingfs, fngfs; int ingfs, fngfs;
static constexpr double ratio = 0.75; static constexpr double ratio = 0.75;
int trfls; int trfls;
public: public:
cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, monitor *ErrorMonitor); cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, monitor *ErrorMonitor);
~cgh(); ~cgh();
void compose_cgh(int nprocs); void compose_cgh(int nprocs);
void sethandle(monitor *ErrorMonitor); void sethandle(monitor *ErrorMonitor);
void checkPatchList(MyList<Patch> *PatL, bool buflog); void checkPatchList(MyList<Patch> *PatL, bool buflog);
void Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);
void Regrid_fake(int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid_fake(int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);
void recompose_cgh(int nprocs, bool *lev_flag, void recompose_cgh(int nprocs, bool *lev_flag,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB); int Symmetry, bool BB);
void recompose_cgh_fake(int nprocs, bool *lev_flag, void recompose_cgh_fake(int nprocs, bool *lev_flag,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB); int Symmetry, bool BB);
void read_bbox(int Symmetry, char *filename); void read_bbox(int Symmetry, char *filename);
MyList<Patch> *construct_patchlist(int lev, int Symmetry); MyList<Patch> *construct_patchlist(int lev, int Symmetry);
bool Interp_One_Point(MyList<var> *VarList, bool Interp_One_Point(MyList<var> *VarList,
double *XX, /*input global Cartesian coordinate*/ double *XX, /*input global Cartesian coordinate*/
double *Shellf, int Symmetry); double *Shellf, int Symmetry);
void recompose_cgh_Onelevel(int nprocs, int lev, void recompose_cgh_Onelevel(int nprocs, int lev,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB); int Symmetry, bool BB);
void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);
void Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, void Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList, bool BB, MyList<var> *FutureList, MyList<var> *tmList, bool BB,
monitor *ErrorMonitor); monitor *ErrorMonitor);
void settrfls(const int lev); void settrfls(const int lev);
#if (PSTR == 1 || PSTR == 2 || PSTR == 3) #if (PSTR == 1 || PSTR == 2 || PSTR == 3)
void construct_mylev(int nprocs); void construct_mylev(int nprocs);
#endif #endif
};
// Load balancing support
bool enable_load_balance; // Enable load balancing #endif /* CGH_H */
int load_balance_check_interval; // Check interval (in time steps)
int current_time_step; // Current time step counter
double *rank_interp_times; // Store interpolation times for each rank
int *heavy_ranks; // Store heavy rank numbers
int num_heavy_ranks; // Number of heavy ranks
void init_load_balance(int nprocs);
void update_interp_time(int rank, double time);
bool check_and_rebalance(int nprocs, int lev,
MyList<var> *OldList, MyList<var> *StateList,
MyList<var> *FutureList, MyList<var> *tmList,
int Symmetry, bool BB);
};
#endif /* CGH_H */

View File

@@ -69,12 +69,10 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
! x direction ! x direction
if(i+1 <= imax .and. i-1 >= imin)then if(i+1 <= imax .and. i-1 >= imin)then
! !
! - f(i-1) + f(i+1) ! - f(i-1) + f(i+1)
@@ -373,8 +371,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1

View File

@@ -883,17 +883,13 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i integer::i
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
enddo enddo
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1 do i=0,ord-1
funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2) funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2)
enddo enddo
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1 do i=0,ord-1
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3) funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
enddo enddo
@@ -1116,7 +1112,6 @@ end subroutine d2dump
! Lagrangian polynomial interpolation ! Lagrangian polynomial interpolation
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
!DIR$ ATTRIBUTES FORCEINLINE :: polint
subroutine polint(xa, ya, x, y, dy, ordn) subroutine polint(xa, ya, x, y, dy, ordn)
implicit none implicit none

View File

@@ -65,8 +65,6 @@ real*8,intent(in) :: eps
! dx^4 ! dx^4
! note the sign (-1)^r-1, now r=2 ! note the sign (-1)^r-1, now r=2
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
!DIR$ UNROLL PARTIAL(4)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)

View File

@@ -24,7 +24,7 @@ f90 = ifx
f77 = ifx f77 = ifx
CXX = icpx CXX = icpx
CC = icx CC = icx
CLINKER = mpiicpx CLINKER = mpiicpx
Cu = nvcc Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include

File diff suppressed because it is too large Load Diff