Optimize BSSN CUDA resident state and CUDA-aware MPI
This commit is contained in:
@@ -100,29 +100,36 @@ namespace Parallel
|
||||
MyList<gridseg> **combined_dst;
|
||||
int *send_lengths;
|
||||
int *recv_lengths;
|
||||
double **send_bufs;
|
||||
double **recv_bufs;
|
||||
int *send_buf_caps;
|
||||
int *recv_buf_caps;
|
||||
unsigned char *send_buf_pinned;
|
||||
unsigned char *recv_buf_pinned;
|
||||
MPI_Request *reqs;
|
||||
MPI_Status *stats;
|
||||
double **send_bufs;
|
||||
double **recv_bufs;
|
||||
int *send_buf_caps;
|
||||
int *recv_buf_caps;
|
||||
unsigned char *send_buf_pinned;
|
||||
unsigned char *recv_buf_pinned;
|
||||
unsigned char *send_buf_is_dev;
|
||||
unsigned char *recv_buf_is_dev;
|
||||
int *send_buf_caps_dev;
|
||||
int *recv_buf_caps_dev;
|
||||
double **send_bufs_dev;
|
||||
double **recv_bufs_dev;
|
||||
MPI_Request *reqs;
|
||||
MPI_Status *stats;
|
||||
int max_reqs;
|
||||
bool lengths_valid;
|
||||
int *tc_req_node;
|
||||
int *tc_req_is_recv;
|
||||
int *tc_completed;
|
||||
bool cuda_aware_mode;
|
||||
SyncCache();
|
||||
void invalidate();
|
||||
void destroy();
|
||||
};
|
||||
|
||||
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
|
||||
void Sync_ensure_cache(MyList<Patch> *PatL, int Symmetry, SyncCache &cache);
|
||||
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
|
||||
void Sync_ensure_cache(MyList<Patch> *PatL, int Symmetry, SyncCache &cache);
|
||||
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
|
||||
struct AsyncSyncState {
|
||||
int req_no;
|
||||
@@ -182,13 +189,13 @@ namespace Parallel
|
||||
MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only);
|
||||
MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue
|
||||
MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat);
|
||||
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
||||
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||
double L2Norm(Patch *Pat, var *vf);
|
||||
void L2Norm7(Patch *Pat, var **vf, double *norms);
|
||||
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
||||
void checkvarl(MyList<var> *pp, bool first_only);
|
||||
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
||||
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||
double L2Norm(Patch *Pat, var *vf);
|
||||
void L2Norm7(Patch *Pat, var **vf, double *norms);
|
||||
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
||||
void checkvarl(MyList<var> *pp, bool first_only);
|
||||
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||
MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||
void prepare_inter_time_level(Patch *Pat,
|
||||
@@ -220,12 +227,12 @@ namespace Parallel
|
||||
void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape);
|
||||
bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl);
|
||||
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
|
||||
|
||||
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
||||
void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here);
|
||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||
|
||||
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
||||
void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here);
|
||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||
bool periodic, int start_rank, int end_rank, int nodes = 0);
|
||||
|
||||
Reference in New Issue
Block a user