Compare commits
7 Commits
yx-vacatio
...
chb-copilo
| Author | SHA1 | Date | |
|---|---|---|---|
|
8b68b5d782
|
|||
|
dd2443c926
|
|||
|
2d7ba5c60c
|
|||
|
4777cad4ed
|
|||
|
afd4006da2
|
|||
|
|
a918dc103e
|
||
|
|
38c2c30186
|
@@ -66,8 +66,7 @@ if os.path.exists(File_directory):
|
|||||||
## Prompt whether to overwrite the existing directory
|
## Prompt whether to overwrite the existing directory
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
## inputvalue = input()
|
inputvalue = input()
|
||||||
inputvalue = "continue"
|
|
||||||
## If the user agrees to overwrite, proceed and remove the existing directory
|
## If the user agrees to overwrite, proceed and remove the existing directory
|
||||||
if ( inputvalue == "continue" ):
|
if ( inputvalue == "continue" ):
|
||||||
print( " Continue the calculation !!! " )
|
print( " Continue the calculation !!! " )
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -39,10 +39,6 @@ public:
|
|||||||
|
|
||||||
bool Find_Point(double *XX);
|
bool Find_Point(double *XX);
|
||||||
|
|
||||||
void Interp_Points(MyList<var> *VarList,
|
|
||||||
int NN, double **XX,
|
|
||||||
double *Shellf, int Symmetry,
|
|
||||||
int Nmin_consumer, int Nmax_consumer);
|
|
||||||
void Interp_Points(MyList<var> *VarList,
|
void Interp_Points(MyList<var> *VarList,
|
||||||
int NN, double **XX,
|
int NN, double **XX,
|
||||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ using namespace std;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#include <memory.h>
|
|
||||||
#include "MyList.h"
|
#include "MyList.h"
|
||||||
#include "Block.h"
|
#include "Block.h"
|
||||||
#include "Parallel.h"
|
#include "Parallel.h"
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,235 +1,214 @@
|
|||||||
|
|
||||||
#ifndef PARALLEL_H
|
#ifndef PARALLEL_H
|
||||||
#define PARALLEL_H
|
#define PARALLEL_H
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <new>
|
#include <new>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
#include <memory.h>
|
|
||||||
#include "Parallel_bam.h"
|
#include "Parallel_bam.h"
|
||||||
#include "var.h"
|
#include "var.h"
|
||||||
#include "MPatch.h"
|
#include "MPatch.h"
|
||||||
#include "Block.h"
|
#include "Block.h"
|
||||||
#include "MyList.h"
|
#include "MyList.h"
|
||||||
#include "macrodef.h" //need dim; ghost_width; CONTRACT
|
#include "macrodef.h" //need dim; ghost_width; CONTRACT
|
||||||
namespace Parallel
|
namespace Parallel
|
||||||
{
|
{
|
||||||
struct gridseg
|
struct gridseg
|
||||||
{
|
{
|
||||||
double llb[dim];
|
double llb[dim];
|
||||||
double uub[dim];
|
double uub[dim];
|
||||||
int shape[dim];
|
int shape[dim];
|
||||||
double illb[dim], iuub[dim]; // only use for OutBdLow2Hi
|
double illb[dim], iuub[dim]; // only use for OutBdLow2Hi
|
||||||
Block *Bg;
|
Block *Bg;
|
||||||
};
|
};
|
||||||
int partition1(int &nx, int split_size, int min_width, int cpusize, int shape); // special for 1 diemnsion
|
int partition1(int &nx, int split_size, int min_width, int cpusize, int shape); // special for 1 diemnsion
|
||||||
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
|
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
|
||||||
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
|
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
|
||||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
||||||
MyList<Block> *distribute_hard(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
void KillBlocks(MyList<Patch> *PatchLIST);
|
||||||
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
|
||||||
int ib0_orig, int ib3_orig,
|
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
||||||
int jb1_orig, int jb4_orig,
|
void setfunction(int rank, MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
||||||
int kb2_orig, int kb5_orig,
|
void writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax,
|
||||||
Patch* PP, int r_left, int r_right,
|
double zmin, double zmax, char *filename, double *data_out);
|
||||||
int ingfsi, int fngfsi, bool periodic,
|
void writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax,
|
||||||
Block* &split_first_block, Block* &split_last_block);
|
char *filename, double *datain);
|
||||||
Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
void getarrayindex(int DIM, int *shape, int *index, int n);
|
||||||
int block_id, int ingfsi, int fngfsi, int lev);
|
int getarraylocation(int DIM, int *shape, int *index);
|
||||||
void KillBlocks(MyList<Patch> *PatchLIST);
|
void copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin,
|
||||||
|
int *shape, double *datain, double *llb, double *uub);
|
||||||
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
void Dump_CPU_Data(MyList<Block> *BlL, MyList<var> *DumpList, char *tag, double time, double dT);
|
||||||
void setfunction(int rank, MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
void Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT);
|
||||||
void writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax,
|
void Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd);
|
||||||
double zmin, double zmax, char *filename, double *data_out);
|
double *Collect_Data(Patch *PP, var *VP);
|
||||||
void writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax,
|
void d2Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT);
|
||||||
char *filename, double *datain);
|
void d2Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd);
|
||||||
void getarrayindex(int DIM, int *shape, int *index, int n);
|
void Dump_Data0(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT);
|
||||||
int getarraylocation(int DIM, int *shape, int *index);
|
double global_interp(int DIM, int *ext, double **CoX, double *datain,
|
||||||
void copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin,
|
double *poX, int ordn, double *SoA, int Symmetry);
|
||||||
int *shape, double *datain, double *llb, double *uub);
|
double global_interp(int DIM, int *ext, double **CoX, double *datain,
|
||||||
void Dump_CPU_Data(MyList<Block> *BlL, MyList<var> *DumpList, char *tag, double time, double dT);
|
double *poX, int ordn);
|
||||||
void Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT);
|
double Lagrangian_Int(double x, int npts, double *xpts, double *funcvals);
|
||||||
void Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd);
|
double LagrangePoly(double x, int pt, int npts, double *xpts);
|
||||||
double *Collect_Data(Patch *PP, var *VP);
|
MyList<gridseg> *build_complete_gsl(Patch *Pat);
|
||||||
void d2Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT);
|
MyList<gridseg> *build_complete_gsl(MyList<Patch> *PatL);
|
||||||
void d2Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd);
|
MyList<gridseg> *build_complete_gsl_virtual(MyList<Patch> *PatL);
|
||||||
void Dump_Data0(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT);
|
MyList<gridseg> *build_complete_gsl_virtual2(MyList<Patch> *PatL); // - buffer
|
||||||
double global_interp(int DIM, int *ext, double **CoX, double *datain,
|
MyList<gridseg> *build_owned_gsl0(Patch *Pat, int rank_in); // - ghost without extension, special for Sync usage
|
||||||
double *poX, int ordn, double *SoA, int Symmetry);
|
MyList<gridseg> *build_owned_gsl1(Patch *Pat, int rank_in); // - ghost, similar to build_owned_gsl0 but extend one point on left side for vertex grid
|
||||||
double global_interp(int DIM, int *ext, double **CoX, double *datain,
|
MyList<gridseg> *build_owned_gsl2(Patch *Pat, int rank_in); // - buffer - ghost
|
||||||
double *poX, int ordn);
|
MyList<gridseg> *build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry); // - ghost - BD ghost
|
||||||
double Lagrangian_Int(double x, int npts, double *xpts, double *funcvals);
|
MyList<gridseg> *build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry); // - buffer - ghost - BD ghost
|
||||||
double LagrangePoly(double x, int pt, int npts, double *xpts);
|
MyList<gridseg> *build_owned_gsl5(Patch *Pat, int rank_in); // similar to build_owned_gsl2 but no extension
|
||||||
MyList<gridseg> *build_complete_gsl(Patch *Pat);
|
MyList<gridseg> *build_owned_gsl(MyList<Patch> *PatL, int rank_in, int type, int Symmetry);
|
||||||
MyList<gridseg> *build_complete_gsl(MyList<Patch> *PatL);
|
void build_gstl(MyList<gridseg> *srci, MyList<gridseg> *dsti, MyList<gridseg> **out_src, MyList<gridseg> **out_dst);
|
||||||
MyList<gridseg> *build_complete_gsl_virtual(MyList<Patch> *PatL);
|
int data_packer(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir,
|
||||||
MyList<gridseg> *build_complete_gsl_virtual2(MyList<Patch> *PatL); // - buffer
|
MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry);
|
||||||
MyList<gridseg> *build_owned_gsl0(Patch *Pat, int rank_in); // - ghost without extension, special for Sync usage
|
void transfer(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||||
MyList<gridseg> *build_owned_gsl1(Patch *Pat, int rank_in); // - ghost, similar to build_owned_gsl0 but extend one point on left side for vertex grid
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
||||||
MyList<gridseg> *build_owned_gsl2(Patch *Pat, int rank_in); // - buffer - ghost
|
int Symmetry);
|
||||||
MyList<gridseg> *build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry); // - ghost - BD ghost
|
int data_packermix(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir,
|
||||||
MyList<gridseg> *build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry); // - buffer - ghost - BD ghost
|
MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry);
|
||||||
MyList<gridseg> *build_owned_gsl5(Patch *Pat, int rank_in); // similar to build_owned_gsl2 but no extension
|
void transfermix(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||||
MyList<gridseg> *build_owned_gsl(MyList<Patch> *PatL, int rank_in, int type, int Symmetry);
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
||||||
void build_gstl(MyList<gridseg> *srci, MyList<gridseg> *dsti, MyList<gridseg> **out_src, MyList<gridseg> **out_dst);
|
int Symmetry);
|
||||||
int data_packer(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir,
|
void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||||
MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry);
|
void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
||||||
void transfer(MyList<gridseg> **src, MyList<gridseg> **dst,
|
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
// Async Sync: overlap MPI communication with computation
|
||||||
int Symmetry);
|
struct TransferState
|
||||||
int data_packermix(double *data, MyList<gridseg> *src, MyList<gridseg> *dst, int rank_in, int dir,
|
{
|
||||||
MyList<var> *VarLists, MyList<var> *VarListd, int Symmetry);
|
MPI_Request *reqs;
|
||||||
void transfermix(MyList<gridseg> **src, MyList<gridseg> **dst,
|
MPI_Status *stats;
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
int req_no;
|
||||||
int Symmetry);
|
double **send_data;
|
||||||
void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
double **rec_data;
|
||||||
void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
int cpusize;
|
||||||
void Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
MyList<gridseg> **transfer_src;
|
||||||
|
MyList<gridseg> **transfer_dst;
|
||||||
struct SyncCache {
|
MyList<gridseg> **src;
|
||||||
bool valid;
|
MyList<gridseg> *dst;
|
||||||
int cpusize;
|
MyList<var> *VarList1;
|
||||||
MyList<gridseg> **combined_src;
|
MyList<var> *VarList2;
|
||||||
MyList<gridseg> **combined_dst;
|
int Symmetry;
|
||||||
int *send_lengths;
|
bool owns_gsl; // true if this state owns and should free the GSLs
|
||||||
int *recv_lengths;
|
};
|
||||||
double **send_bufs;
|
struct SyncHandle
|
||||||
double **recv_bufs;
|
{
|
||||||
int *send_buf_caps;
|
TransferState *states;
|
||||||
int *recv_buf_caps;
|
int num_states;
|
||||||
MPI_Request *reqs;
|
};
|
||||||
MPI_Status *stats;
|
SyncHandle *SyncBegin(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||||
int max_reqs;
|
SyncHandle *SyncBegin(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
||||||
bool lengths_valid;
|
void SyncEnd(SyncHandle *handle);
|
||||||
SyncCache();
|
|
||||||
void invalidate();
|
// Cached GSL plan: pre-build grid segment lists once, reuse across multiple Sync calls
|
||||||
void destroy();
|
struct SyncPlanEntry
|
||||||
};
|
{
|
||||||
|
int cpusize;
|
||||||
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
|
MyList<gridseg> **transfer_src;
|
||||||
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
|
MyList<gridseg> **transfer_dst;
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
MyList<gridseg> **src;
|
||||||
int Symmetry, SyncCache &cache);
|
MyList<gridseg> *dst;
|
||||||
|
};
|
||||||
struct AsyncSyncState {
|
struct SyncPlan
|
||||||
int req_no;
|
{
|
||||||
bool active;
|
SyncPlanEntry *entries;
|
||||||
AsyncSyncState() : req_no(0), active(false) {}
|
int num_entries;
|
||||||
};
|
int Symmetry;
|
||||||
|
};
|
||||||
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
|
SyncPlan *SyncPreparePlan(MyList<Patch> *PatL, int Symmetry);
|
||||||
SyncCache &cache, AsyncSyncState &state);
|
void SyncFreePlan(SyncPlan *plan);
|
||||||
void Sync_finish(SyncCache &cache, AsyncSyncState &state,
|
SyncHandle *SyncBeginWithPlan(SyncPlan *plan, MyList<var> *VarList);
|
||||||
MyList<var> *VarList, int Symmetry);
|
SyncHandle *SyncBeginWithPlan(SyncPlan *plan, MyList<var> *VarList1, MyList<var> *VarList2);
|
||||||
void OutBdLow2Hi(Patch *Patc, Patch *Patf,
|
void OutBdLow2Hi(Patch *Patc, Patch *Patf,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry);
|
int Symmetry);
|
||||||
void OutBdLow2Hi(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
void OutBdLow2Hi(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry);
|
int Symmetry);
|
||||||
void OutBdLow2Himix(Patch *Patc, Patch *Patf,
|
void OutBdLow2Himix(Patch *Patc, Patch *Patf,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry);
|
int Symmetry);
|
||||||
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry);
|
int Symmetry);
|
||||||
void Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
void Prolong(Patch *Patc, Patch *Patf,
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry, SyncCache &cache);
|
int Symmetry);
|
||||||
void OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
void Prolongint(Patch *Patc, Patch *Patf,
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry, SyncCache &cache);
|
int Symmetry);
|
||||||
void OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
void Restrict(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry, SyncCache &cache);
|
int Symmetry);
|
||||||
void Prolong(Patch *Patc, Patch *Patf,
|
void Restrict_after(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry);
|
int Symmetry); // for -ghost - BDghost
|
||||||
void Prolongint(Patch *Patc, Patch *Patf,
|
MyList<Parallel::gridseg> *build_PhysBD_gsl(Patch *Pat);
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<Parallel::gridseg> *build_ghost_gsl(MyList<Patch> *PatL);
|
||||||
int Symmetry);
|
MyList<Parallel::gridseg> *build_ghost_gsl(Patch *Pat);
|
||||||
void Restrict(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
MyList<Parallel::gridseg> *build_buffer_gsl(Patch *Pat);
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<Parallel::gridseg> *build_buffer_gsl(MyList<Patch> *PatL);
|
||||||
int Symmetry);
|
MyList<Parallel::gridseg> *gsl_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
||||||
void Restrict_after(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
MyList<Parallel::gridseg> *gs_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<Parallel::gridseg> *gsl_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
||||||
int Symmetry); // for -ghost - BDghost
|
MyList<Parallel::gridseg> *gs_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
||||||
MyList<Parallel::gridseg> *build_PhysBD_gsl(Patch *Pat);
|
MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only);
|
||||||
MyList<Parallel::gridseg> *build_ghost_gsl(MyList<Patch> *PatL);
|
MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue
|
||||||
MyList<Parallel::gridseg> *build_ghost_gsl(Patch *Pat);
|
MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat);
|
||||||
MyList<Parallel::gridseg> *build_buffer_gsl(Patch *Pat);
|
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||||||
MyList<Parallel::gridseg> *build_buffer_gsl(MyList<Patch> *PatL);
|
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
||||||
MyList<Parallel::gridseg> *gsl_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||||
MyList<Parallel::gridseg> *gs_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
double L2Norm(Patch *Pat, var *vf);
|
||||||
MyList<Parallel::gridseg> *gsl_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
||||||
MyList<Parallel::gridseg> *gs_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
void checkvarl(MyList<var> *pp, bool first_only);
|
||||||
MyList<Parallel::gridseg> *clone_gsl(MyList<Parallel::gridseg> *p, bool first_only);
|
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||||
MyList<Parallel::gridseg> *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue
|
MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||||
MyList<Parallel::gridseg> *build_bulk_gsl(Block *bp, Patch *Pat);
|
void prepare_inter_time_level(Patch *Pat,
|
||||||
void build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
||||||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex);
|
||||||
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
void prepare_inter_time_level(Patch *Pat,
|
||||||
double L2Norm(Patch *Pat, var *vf);
|
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
||||||
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex);
|
||||||
void checkvarl(MyList<var> *pp, bool first_only);
|
void prepare_inter_time_level(MyList<Patch> *PatL,
|
||||||
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
|
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
||||||
MyList<Parallel::gridseg> *divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat);
|
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex);
|
||||||
void prepare_inter_time_level(Patch *Pat,
|
void prepare_inter_time_level(MyList<Patch> *Pat,
|
||||||
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
||||||
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex);
|
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex);
|
||||||
void prepare_inter_time_level(Patch *Pat,
|
void merge_gsl(MyList<gridseg> *&A, const double ratio);
|
||||||
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
bool merge_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C, const double ratio);
|
||||||
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex);
|
// Add ghost region to tangent plane
|
||||||
void prepare_inter_time_level(MyList<Patch> *PatL,
|
// we assume the grids have the same resolution
|
||||||
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
void add_ghost_touch(MyList<gridseg> *&A);
|
||||||
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex);
|
void cut_gsl(MyList<gridseg> *&A);
|
||||||
void prepare_inter_time_level(MyList<Patch> *Pat,
|
bool cut_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C);
|
||||||
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
MyList<Parallel::gridseg> *gs_subtract_virtual(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
||||||
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex);
|
void fill_level_data(MyList<Patch> *PatLd, MyList<Patch> *PatLs, MyList<Patch> *PatcL,
|
||||||
void merge_gsl(MyList<gridseg> *&A, const double ratio);
|
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *FutureList,
|
||||||
bool merge_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C, const double ratio);
|
MyList<var> *tmList, int Symmetry, bool BB, bool CC);
|
||||||
// Add ghost region to tangent plane
|
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||||
// we assume the grids have the same resolution
|
int NN, double **XX,
|
||||||
void add_ghost_touch(MyList<gridseg> *&A);
|
double *Shellf, int Symmetry);
|
||||||
void cut_gsl(MyList<gridseg> *&A);
|
void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape);
|
||||||
bool cut_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C);
|
bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl);
|
||||||
MyList<Parallel::gridseg> *gs_subtract_virtual(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B);
|
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
|
||||||
void fill_level_data(MyList<Patch> *PatLd, MyList<Patch> *PatLs, MyList<Patch> *PatcL,
|
|
||||||
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *FutureList,
|
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
||||||
MyList<var> *tmList, int Symmetry, bool BB, bool CC);
|
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
int NN, double **XX,
|
||||||
int NN, double **XX,
|
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||||
double *Shellf, int Symmetry);
|
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||||
void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape);
|
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||||
bool point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl);
|
bool periodic, int start_rank, int end_rank, int nodes = 0);
|
||||||
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
|
#endif
|
||||||
|
}
|
||||||
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
#endif /*PARALLEL_H */
|
||||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
|
||||||
int NN, double **XX,
|
|
||||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
|
||||||
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
|
||||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
|
||||||
bool periodic, int start_rank, int end_rank, int nodes = 0);
|
|
||||||
|
|
||||||
// Redistribute blocks with time statistics for load balancing
|
|
||||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, MyList<Block> *OldBlockL,
|
|
||||||
int cpusize, int ingfsi, int fngfsi,
|
|
||||||
bool periodic, int start_rank, int end_rank, int nodes = 0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Dynamic load balancing: split blocks for heavy ranks
|
|
||||||
void split_heavy_blocks(MyList<Patch> *PatL, int *heavy_ranks, int num_heavy,
|
|
||||||
int split_factor, int cpusize, int ingfsi, int fngfsi);
|
|
||||||
|
|
||||||
// Check if load balancing is needed based on interpolation times
|
|
||||||
bool check_load_balance_need(double *rank_times, int nprocs, int &num_heavy, int *heavy_ranks);
|
|
||||||
}
|
|
||||||
#endif /*PARALLEL_H */
|
|
||||||
|
|||||||
@@ -186,6 +186,12 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
|
||||||
|
// Pre-build grid segment lists once for this level's patches.
|
||||||
|
// These are reused across predictor + 3 corrector SyncBegin calls,
|
||||||
|
// avoiding O(cpusize * blocks^2) rebuild each time.
|
||||||
|
Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
|
||||||
|
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -321,13 +327,17 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
// check error information
|
// Start async ghost zone exchange - overlaps with error check and Shell computation
|
||||||
|
Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);
|
||||||
|
|
||||||
|
// check error information (overlaps with MPI transfer)
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
|
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
||||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -475,6 +485,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
|
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
||||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -485,7 +496,8 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
// Complete async ghost zone exchange
|
||||||
|
if (sync_pre) Parallel::SyncEnd(sync_pre);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -693,13 +705,17 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check error information
|
// Start async ghost zone exchange - overlaps with error check and Shell computation
|
||||||
|
Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);
|
||||||
|
|
||||||
|
// check error information (overlaps with MPI transfer)
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
|
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
||||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -857,6 +873,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
|
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
||||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -868,7 +885,8 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
// Complete async ghost zone exchange
|
||||||
|
if (sync_cor) Parallel::SyncEnd(sync_cor);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -1042,6 +1060,8 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Parallel::SyncFreePlan(sync_plan);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// for constraint preserving boundary (CPBC)
|
// for constraint preserving boundary (CPBC)
|
||||||
@@ -1075,6 +1095,10 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
|
||||||
|
// Pre-build grid segment lists once for this level's patches.
|
||||||
|
Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
|
||||||
|
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -1542,13 +1566,17 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
// check error information
|
// Start async ghost zone exchange - overlaps with error check
|
||||||
|
Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);
|
||||||
|
|
||||||
|
// check error information (overlaps with MPI transfer)
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
|
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
||||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -1558,7 +1586,8 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
// Complete async ghost zone exchange
|
||||||
|
if (sync_pre) Parallel::SyncEnd(sync_pre);
|
||||||
|
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
{
|
{
|
||||||
@@ -2103,13 +2132,17 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
sPp = sPp->next;
|
sPp = sPp->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// check error information
|
// Start async ghost zone exchange - overlaps with error check
|
||||||
|
Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);
|
||||||
|
|
||||||
|
// check error information (overlaps with MPI transfer)
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
}
|
}
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
|
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
||||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -2120,7 +2153,8 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
// Complete async ghost zone exchange
|
||||||
|
if (sync_cor) Parallel::SyncEnd(sync_cor);
|
||||||
|
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
{
|
{
|
||||||
@@ -2346,6 +2380,8 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
DG_List->clearList();
|
DG_List->clearList();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Parallel::SyncFreePlan(sync_plan);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#undef MRBD
|
#undef MRBD
|
||||||
|
|||||||
@@ -730,12 +730,6 @@ void bssn_class::Initialize()
|
|||||||
PhysTime = StartTime;
|
PhysTime = StartTime;
|
||||||
Setup_Black_Hole_position();
|
Setup_Black_Hole_position();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize sync caches (per-level, for predictor and corrector)
|
|
||||||
sync_cache_pre = new Parallel::SyncCache[GH->levels];
|
|
||||||
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
|
||||||
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
|
||||||
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -987,32 +981,6 @@ bssn_class::~bssn_class()
|
|||||||
delete Azzz;
|
delete Azzz;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Destroy sync caches before GH
|
|
||||||
if (sync_cache_pre)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < GH->levels; i++)
|
|
||||||
sync_cache_pre[i].destroy();
|
|
||||||
delete[] sync_cache_pre;
|
|
||||||
}
|
|
||||||
if (sync_cache_cor)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < GH->levels; i++)
|
|
||||||
sync_cache_cor[i].destroy();
|
|
||||||
delete[] sync_cache_cor;
|
|
||||||
}
|
|
||||||
if (sync_cache_rp_coarse)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < GH->levels; i++)
|
|
||||||
sync_cache_rp_coarse[i].destroy();
|
|
||||||
delete[] sync_cache_rp_coarse;
|
|
||||||
}
|
|
||||||
if (sync_cache_rp_fine)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < GH->levels; i++)
|
|
||||||
sync_cache_rp_fine[i].destroy();
|
|
||||||
delete[] sync_cache_rp_fine;
|
|
||||||
}
|
|
||||||
|
|
||||||
delete GH;
|
delete GH;
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
delete SH;
|
delete SH;
|
||||||
@@ -2213,7 +2181,6 @@ void bssn_class::Evolve(int Steps)
|
|||||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
|
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
|
||||||
@@ -2429,7 +2396,6 @@ void bssn_class::RecursiveStep(int lev)
|
|||||||
GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2608,7 +2574,6 @@ void bssn_class::ParallelStep()
|
|||||||
GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2775,7 +2740,6 @@ void bssn_class::ParallelStep()
|
|||||||
GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor);
|
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -2790,7 +2754,6 @@ void bssn_class::ParallelStep()
|
|||||||
GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -2809,7 +2772,6 @@ void bssn_class::ParallelStep()
|
|||||||
GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor);
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -2825,7 +2787,6 @@ void bssn_class::ParallelStep()
|
|||||||
GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor);
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -3074,6 +3035,12 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
|
||||||
|
// Pre-build grid segment lists once for this level's patches.
|
||||||
|
// These are reused across predictor + 3 corrector SyncBegin calls,
|
||||||
|
// avoiding O(cpusize * blocks^2) rebuild each time.
|
||||||
|
Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
|
||||||
|
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -3197,7 +3164,26 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
|
|
||||||
|
// Start async ghost zone exchange - overlaps with error check and Shell computation
|
||||||
|
Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);
|
||||||
|
|
||||||
|
// check error information (overlaps with MPI transfer)
|
||||||
|
{
|
||||||
|
int erh = ERROR;
|
||||||
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
||||||
|
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
// evolve Shell Patches
|
// evolve Shell Patches
|
||||||
@@ -3215,9 +3201,9 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
#if (AGM == 0)
|
#if (AGM == 0)
|
||||||
f_enforce_ga(cg->shape,
|
f_enforce_ga(cg->shape,
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
||||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -3341,16 +3327,27 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
// check error information
|
||||||
MPI_Request err_req;
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
||||||
|
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::AsyncSyncState async_pre;
|
// Complete async ghost zone exchange
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
if (sync_pre) Parallel::SyncEnd(sync_pre);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -3363,29 +3360,12 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
prev_clock = curr_clock;
|
prev_clock = curr_clock;
|
||||||
curr_clock = clock();
|
curr_clock = clock();
|
||||||
cout << " Shell stuff synchronization used "
|
cout << " Shell stuff synchronization used "
|
||||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||||
<< " seconds! " << endl;
|
<< " seconds! " << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
|
||||||
// Complete non-blocking error reduction and check
|
|
||||||
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
|
||||||
if (ERROR)
|
|
||||||
{
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
|
||||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
|
||||||
if (myrank == 0)
|
|
||||||
{
|
|
||||||
if (ErrorMonitor->outfile)
|
|
||||||
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (MAPBH == 0)
|
#if (MAPBH == 0)
|
||||||
// for black hole position
|
// for black hole position
|
||||||
@@ -3561,7 +3541,28 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
|
// Start async ghost zone exchange - overlaps with error check and Shell computation
|
||||||
|
Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);
|
||||||
|
|
||||||
|
// check error information (overlaps with MPI transfer)
|
||||||
|
{
|
||||||
|
int erh = ERROR;
|
||||||
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
||||||
|
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
||||||
|
<< " variables at t = " << PhysTime
|
||||||
|
<< ", lev = " << lev << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
// evolve Shell Patches
|
// evolve Shell Patches
|
||||||
@@ -3579,9 +3580,9 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
#if (AGM == 0)
|
#if (AGM == 0)
|
||||||
f_enforce_ga(cg->shape,
|
f_enforce_ga(cg->shape,
|
||||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||||
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
||||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||||
#elif (AGM == 1)
|
#elif (AGM == 1)
|
||||||
if (iter_count == 3)
|
if (iter_count == 3)
|
||||||
@@ -3701,16 +3702,28 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
sPp = sPp->next;
|
sPp = sPp->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
// check error information
|
||||||
MPI_Request err_req_cor;
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
||||||
|
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#"
|
||||||
|
<< iter_count << " variables at t = "
|
||||||
|
<< PhysTime << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::AsyncSyncState async_cor;
|
// Complete async ghost zone exchange
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
if (sync_cor) Parallel::SyncEnd(sync_cor);
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -3723,31 +3736,12 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
prev_clock = curr_clock;
|
prev_clock = curr_clock;
|
||||||
curr_clock = clock();
|
curr_clock = clock();
|
||||||
cout << " Shell stuff synchronization used "
|
cout << " Shell stuff synchronization used "
|
||||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||||
<< " seconds! " << endl;
|
<< " seconds! " << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
|
||||||
// Complete non-blocking error reduction and check
|
|
||||||
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
|
|
||||||
if (ERROR)
|
|
||||||
{
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
|
||||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
|
||||||
if (myrank == 0)
|
|
||||||
{
|
|
||||||
if (ErrorMonitor->outfile)
|
|
||||||
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
|
||||||
<< " variables at t = " << PhysTime
|
|
||||||
<< ", lev = " << lev << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (MAPBH == 0)
|
#if (MAPBH == 0)
|
||||||
// for black hole position
|
// for black hole position
|
||||||
@@ -3920,6 +3914,8 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Parallel::SyncFreePlan(sync_plan);
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -4059,7 +4055,22 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
|
// check error information
|
||||||
|
{
|
||||||
|
int erh = ERROR;
|
||||||
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
|
||||||
|
<< ", lev = " << lev << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
// evolve Shell Patches
|
// evolve Shell Patches
|
||||||
@@ -4077,15 +4088,15 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
#if (AGM == 0)
|
#if (AGM == 0)
|
||||||
f_enforce_ga(cg->shape,
|
f_enforce_ga(cg->shape,
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
||||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[fngfs + ShellPatch::gx],
|
cg->fgfs[fngfs + ShellPatch::gx],
|
||||||
cg->fgfs[fngfs + ShellPatch::gy],
|
cg->fgfs[fngfs + ShellPatch::gy],
|
||||||
cg->fgfs[fngfs + ShellPatch::gz],
|
cg->fgfs[fngfs + ShellPatch::gz],
|
||||||
cg->fgfs[fngfs + ShellPatch::drhodx],
|
cg->fgfs[fngfs + ShellPatch::drhodx],
|
||||||
cg->fgfs[fngfs + ShellPatch::drhody],
|
cg->fgfs[fngfs + ShellPatch::drhody],
|
||||||
@@ -4200,16 +4211,25 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
// check error information
|
||||||
MPI_Request err_req;
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = "
|
||||||
|
<< PhysTime << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::AsyncSyncState async_pre;
|
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -4222,27 +4242,9 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
prev_clock = curr_clock;
|
prev_clock = curr_clock;
|
||||||
curr_clock = clock();
|
curr_clock = clock();
|
||||||
cout << " Shell stuff synchronization used "
|
cout << " Shell stuff synchronization used "
|
||||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||||
<< " seconds! " << endl;
|
<< " seconds! " << endl;
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
|
||||||
// Complete non-blocking error reduction and check
|
|
||||||
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
|
||||||
if (ERROR)
|
|
||||||
{
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
|
||||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
|
||||||
if (myrank == 0)
|
|
||||||
{
|
|
||||||
if (ErrorMonitor->outfile)
|
|
||||||
ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime
|
|
||||||
<< ", lev = " << lev << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -4405,7 +4407,23 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
Pp = Pp->next;
|
Pp = Pp->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: error check deferred to after Shell Patch computation to reduce MPI_Allreduce calls
|
// check error information
|
||||||
|
{
|
||||||
|
int erh = ERROR;
|
||||||
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
||||||
|
<< " variables at t = " << PhysTime
|
||||||
|
<< ", lev = " << lev << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
// evolve Shell Patches
|
// evolve Shell Patches
|
||||||
@@ -4423,9 +4441,9 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
#if (AGM == 0)
|
#if (AGM == 0)
|
||||||
f_enforce_ga(cg->shape,
|
f_enforce_ga(cg->shape,
|
||||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||||
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
||||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||||
#elif (AGM == 1)
|
#elif (AGM == 1)
|
||||||
if (iter_count == 3)
|
if (iter_count == 3)
|
||||||
@@ -4545,16 +4563,25 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
sPp = sPp->next;
|
sPp = sPp->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
// check error information
|
||||||
MPI_Request err_req_cor;
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count
|
||||||
|
<< " variables at t = " << PhysTime << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::AsyncSyncState async_cor;
|
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
@@ -4567,30 +4594,11 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
prev_clock = curr_clock;
|
prev_clock = curr_clock;
|
||||||
curr_clock = clock();
|
curr_clock = clock();
|
||||||
cout << " Shell stuff synchronization used "
|
cout << " Shell stuff synchronization used "
|
||||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||||
<< " seconds! " << endl;
|
<< " seconds! " << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
|
||||||
|
|
||||||
#ifdef WithShell
|
|
||||||
// Complete non-blocking error reduction and check
|
|
||||||
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
|
|
||||||
if (ERROR)
|
|
||||||
{
|
|
||||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
|
||||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
|
||||||
if (myrank == 0)
|
|
||||||
{
|
|
||||||
if (ErrorMonitor->outfile)
|
|
||||||
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
|
||||||
<< " variables at t = " << PhysTime
|
|
||||||
<< ", lev = " << lev << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
// for black hole position
|
// for black hole position
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
@@ -4830,6 +4838,12 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
|
||||||
|
// Pre-build grid segment lists once for this level's patches.
|
||||||
|
// These are reused across predictor + 3 corrector SyncBegin calls,
|
||||||
|
// avoiding O(cpusize * blocks^2) rebuild each time.
|
||||||
|
Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
|
||||||
|
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -4956,21 +4970,17 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation");
|
||||||
|
|
||||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
// Start async ghost zone exchange - overlaps with error check and BH position
|
||||||
MPI_Request err_req;
|
Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);
|
||||||
|
|
||||||
|
// check error information (overlaps with MPI transfer)
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
|
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
|
|
||||||
|
|
||||||
// Complete non-blocking error reduction and check
|
|
||||||
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
|
Parallel::SyncEnd(sync_pre); sync_pre = 0;
|
||||||
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
@@ -4980,6 +4990,11 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
|
||||||
|
|
||||||
|
// Complete async ghost zone exchange
|
||||||
|
if (sync_pre) Parallel::SyncEnd(sync_pre);
|
||||||
|
|
||||||
#if (MAPBH == 0)
|
#if (MAPBH == 0)
|
||||||
// for black hole position
|
// for black hole position
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
@@ -5157,34 +5172,35 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check");
|
||||||
|
|
||||||
// Non-blocking error reduction overlapped with Sync to hide Allreduce latency
|
// Start async ghost zone exchange - overlaps with error check and BH position
|
||||||
MPI_Request err_req_cor;
|
Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);
|
||||||
|
|
||||||
|
// check error information (overlaps with MPI transfer)
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req_cor);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
|
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
|
|
||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
|
|
||||||
|
|
||||||
// Complete non-blocking error reduction and check
|
|
||||||
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
|
|
||||||
if (ERROR)
|
if (ERROR)
|
||||||
{
|
{
|
||||||
|
Parallel::SyncEnd(sync_cor); sync_cor = 0;
|
||||||
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
if (ErrorMonitor->outfile)
|
if (ErrorMonitor->outfile)
|
||||||
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count
|
||||||
<< " variables at t = " << PhysTime
|
<< " variables at t = " << PhysTime
|
||||||
<< ", lev = " << lev << endl;
|
<< ", lev = " << lev << endl;
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
|
||||||
|
|
||||||
|
// Complete async ghost zone exchange
|
||||||
|
if (sync_cor) Parallel::SyncEnd(sync_cor);
|
||||||
|
|
||||||
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
|
||||||
|
|
||||||
#if (MAPBH == 0)
|
#if (MAPBH == 0)
|
||||||
// for black hole position
|
// for black hole position
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
@@ -5297,6 +5313,8 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<endl;
|
// if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<endl;
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"complet GH Step");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"complet GH Step");
|
||||||
|
|
||||||
|
Parallel::SyncFreePlan(sync_plan);
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -5468,11 +5486,21 @@ void bssn_class::SHStep()
|
|||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor's error check");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor's error check");
|
||||||
#endif
|
#endif
|
||||||
// Non-blocking error reduction overlapped with Synch to hide Allreduce latency
|
// check error information
|
||||||
MPI_Request err_req;
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -5484,25 +5512,12 @@ void bssn_class::SHStep()
|
|||||||
{
|
{
|
||||||
prev_clock = curr_clock;
|
prev_clock = curr_clock;
|
||||||
curr_clock = clock();
|
curr_clock = clock();
|
||||||
cout << " Shell stuff synchronization used "
|
cout << " Shell stuff synchronization used "
|
||||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||||
<< " seconds! " << endl;
|
<< " seconds! " << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Complete non-blocking error reduction and check
|
|
||||||
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
|
||||||
if (ERROR)
|
|
||||||
{
|
|
||||||
SH->Dump_Data(StateList, 0, PhysTime, dT_lev);
|
|
||||||
if (myrank == 0)
|
|
||||||
{
|
|
||||||
if (ErrorMonitor->outfile)
|
|
||||||
ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// corrector
|
// corrector
|
||||||
for (iter_count = 1; iter_count < 4; iter_count++)
|
for (iter_count = 1; iter_count < 4; iter_count++)
|
||||||
{
|
{
|
||||||
@@ -5645,11 +5660,21 @@ void bssn_class::SHStep()
|
|||||||
sPp = sPp->next;
|
sPp = sPp->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Non-blocking error reduction overlapped with Synch to hide Allreduce latency
|
// check error information
|
||||||
MPI_Request err_req_cor;
|
|
||||||
{
|
{
|
||||||
int erh = ERROR;
|
int erh = ERROR;
|
||||||
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor);
|
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
}
|
||||||
|
if (ERROR)
|
||||||
|
{
|
||||||
|
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
if (ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count
|
||||||
|
<< " variables at t = " << PhysTime << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -5661,26 +5686,12 @@ void bssn_class::SHStep()
|
|||||||
{
|
{
|
||||||
prev_clock = curr_clock;
|
prev_clock = curr_clock;
|
||||||
curr_clock = clock();
|
curr_clock = clock();
|
||||||
cout << " Shell stuff synchronization used "
|
cout << " Shell stuff synchronization used "
|
||||||
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||||
<< " seconds! " << endl;
|
<< " seconds! " << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Complete non-blocking error reduction and check
|
|
||||||
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
|
|
||||||
if (ERROR)
|
|
||||||
{
|
|
||||||
SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev);
|
|
||||||
if (myrank == 0)
|
|
||||||
{
|
|
||||||
if (ErrorMonitor->outfile)
|
|
||||||
ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count
|
|
||||||
<< " variables at t = " << PhysTime << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sPp = SH->PatL;
|
sPp = SH->PatL;
|
||||||
while (sPp)
|
while (sPp)
|
||||||
{
|
{
|
||||||
@@ -5809,7 +5820,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||||
|
|
||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -5819,11 +5830,21 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
||||||
@@ -5860,7 +5881,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
|
||||||
|
|
||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -5870,11 +5891,21 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||||
@@ -5888,7 +5919,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
|
||||||
|
|
||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -5946,14 +5977,24 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
||||||
@@ -5968,21 +6009,31 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6033,14 +6084,24 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
@@ -6057,21 +6118,31 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6101,11 +6172,21 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
@@ -6114,11 +6195,21 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
else // no time refinement levels and for all same time levels
|
else // no time refinement levels and for all same time levels
|
||||||
{
|
{
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
@@ -6134,10 +6225,10 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
#else
|
#else
|
||||||
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
|
||||||
}
|
}
|
||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef MIXOUTB
|
#undef MIXOUTB
|
||||||
|
|||||||
@@ -126,11 +126,6 @@ public:
|
|||||||
MyList<var> *OldStateList, *DumpList;
|
MyList<var> *OldStateList, *DumpList;
|
||||||
MyList<var> *ConstraintList;
|
MyList<var> *ConstraintList;
|
||||||
|
|
||||||
Parallel::SyncCache *sync_cache_pre; // per-level cache for predictor sync
|
|
||||||
Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync
|
|
||||||
Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1]
|
|
||||||
Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev]
|
|
||||||
|
|
||||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||||
monitor *ConVMonitor;
|
monitor *ConVMonitor;
|
||||||
surface_integral *Waveshell;
|
surface_integral *Waveshell;
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,107 +1,92 @@
|
|||||||
|
|
||||||
#ifndef CGH_H
|
#ifndef CGH_H
|
||||||
#define CGH_H
|
#define CGH_H
|
||||||
|
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#include "MyList.h"
|
#include "MyList.h"
|
||||||
#include "MPatch.h"
|
#include "MPatch.h"
|
||||||
#include "macrodef.h"
|
#include "macrodef.h"
|
||||||
#include "monitor.h"
|
#include "monitor.h"
|
||||||
#include "Parallel.h"
|
#include "Parallel.h"
|
||||||
|
|
||||||
class cgh
|
class cgh
|
||||||
{
|
{
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int levels, movls, BH_num_in;
|
int levels, movls, BH_num_in;
|
||||||
// information of boxes
|
// information of boxes
|
||||||
int *grids;
|
int *grids;
|
||||||
double ***bbox;
|
double ***bbox;
|
||||||
int ***shape;
|
int ***shape;
|
||||||
double ***handle;
|
double ***handle;
|
||||||
double ***Porgls;
|
double ***Porgls;
|
||||||
double *Lt;
|
double *Lt;
|
||||||
|
|
||||||
// information of Patch list
|
// information of Patch list
|
||||||
MyList<Patch> **PatL;
|
MyList<Patch> **PatL;
|
||||||
|
|
||||||
// information of OutBdLow2Hi point list and Restrict point list
|
// information of OutBdLow2Hi point list and Restrict point list
|
||||||
#if (RPB == 1)
|
#if (RPB == 1)
|
||||||
MyList<Parallel::pointstru_bam> **bdsul, **rsul;
|
MyList<Parallel::pointstru_bam> **bdsul, **rsul;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||||
int mylev;
|
int mylev;
|
||||||
int *start_rank, *end_rank;
|
int *start_rank, *end_rank;
|
||||||
MPI_Comm *Commlev;
|
MPI_Comm *Commlev;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int ingfs, fngfs;
|
int ingfs, fngfs;
|
||||||
static constexpr double ratio = 0.75;
|
static constexpr double ratio = 0.75;
|
||||||
int trfls;
|
int trfls;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, monitor *ErrorMonitor);
|
cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, monitor *ErrorMonitor);
|
||||||
|
|
||||||
~cgh();
|
~cgh();
|
||||||
|
|
||||||
void compose_cgh(int nprocs);
|
void compose_cgh(int nprocs);
|
||||||
void sethandle(monitor *ErrorMonitor);
|
void sethandle(monitor *ErrorMonitor);
|
||||||
void checkPatchList(MyList<Patch> *PatL, bool buflog);
|
void checkPatchList(MyList<Patch> *PatL, bool buflog);
|
||||||
void Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
void Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
||||||
monitor *ErrorMonitor);
|
monitor *ErrorMonitor);
|
||||||
void Regrid_fake(int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
void Regrid_fake(int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
||||||
monitor *ErrorMonitor);
|
monitor *ErrorMonitor);
|
||||||
void recompose_cgh(int nprocs, bool *lev_flag,
|
void recompose_cgh(int nprocs, bool *lev_flag,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList,
|
MyList<var> *FutureList, MyList<var> *tmList,
|
||||||
int Symmetry, bool BB);
|
int Symmetry, bool BB);
|
||||||
void recompose_cgh_fake(int nprocs, bool *lev_flag,
|
void recompose_cgh_fake(int nprocs, bool *lev_flag,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList,
|
MyList<var> *FutureList, MyList<var> *tmList,
|
||||||
int Symmetry, bool BB);
|
int Symmetry, bool BB);
|
||||||
void read_bbox(int Symmetry, char *filename);
|
void read_bbox(int Symmetry, char *filename);
|
||||||
MyList<Patch> *construct_patchlist(int lev, int Symmetry);
|
MyList<Patch> *construct_patchlist(int lev, int Symmetry);
|
||||||
bool Interp_One_Point(MyList<var> *VarList,
|
bool Interp_One_Point(MyList<var> *VarList,
|
||||||
double *XX, /*input global Cartesian coordinate*/
|
double *XX, /*input global Cartesian coordinate*/
|
||||||
double *Shellf, int Symmetry);
|
double *Shellf, int Symmetry);
|
||||||
void recompose_cgh_Onelevel(int nprocs, int lev,
|
void recompose_cgh_Onelevel(int nprocs, int lev,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList,
|
MyList<var> *FutureList, MyList<var> *tmList,
|
||||||
int Symmetry, bool BB);
|
int Symmetry, bool BB);
|
||||||
void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
||||||
monitor *ErrorMonitor);
|
monitor *ErrorMonitor);
|
||||||
void Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
void Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
||||||
monitor *ErrorMonitor);
|
monitor *ErrorMonitor);
|
||||||
void settrfls(const int lev);
|
void settrfls(const int lev);
|
||||||
|
|
||||||
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||||
void construct_mylev(int nprocs);
|
void construct_mylev(int nprocs);
|
||||||
#endif
|
#endif
|
||||||
|
};
|
||||||
// Load balancing support
|
|
||||||
bool enable_load_balance; // Enable load balancing
|
#endif /* CGH_H */
|
||||||
int load_balance_check_interval; // Check interval (in time steps)
|
|
||||||
int current_time_step; // Current time step counter
|
|
||||||
double *rank_interp_times; // Store interpolation times for each rank
|
|
||||||
int *heavy_ranks; // Store heavy rank numbers
|
|
||||||
int num_heavy_ranks; // Number of heavy ranks
|
|
||||||
|
|
||||||
void init_load_balance(int nprocs);
|
|
||||||
void update_interp_time(int rank, double time);
|
|
||||||
bool check_and_rebalance(int nprocs, int lev,
|
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
|
||||||
MyList<var> *FutureList, MyList<var> *tmList,
|
|
||||||
int Symmetry, bool BB);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* CGH_H */
|
|
||||||
|
|||||||
@@ -69,12 +69,10 @@
|
|||||||
fy = ZEO
|
fy = ZEO
|
||||||
fz = ZEO
|
fz = ZEO
|
||||||
|
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
!DIR$ UNROLL PARTIAL(4)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
! x direction
|
! x direction
|
||||||
if(i+1 <= imax .and. i-1 >= imin)then
|
if(i+1 <= imax .and. i-1 >= imin)then
|
||||||
!
|
!
|
||||||
! - f(i-1) + f(i+1)
|
! - f(i-1) + f(i+1)
|
||||||
@@ -373,8 +371,6 @@
|
|||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
!DIR$ UNROLL PARTIAL(4)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
|
|||||||
@@ -883,17 +883,13 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
|
|||||||
|
|
||||||
integer::i
|
integer::i
|
||||||
|
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
|
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
|
||||||
enddo
|
enddo
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2)
|
funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2)
|
||||||
enddo
|
enddo
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
|
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
|
||||||
enddo
|
enddo
|
||||||
@@ -1116,7 +1112,6 @@ end subroutine d2dump
|
|||||||
! Lagrangian polynomial interpolation
|
! Lagrangian polynomial interpolation
|
||||||
!------------------------------------------------------------------------------
|
!------------------------------------------------------------------------------
|
||||||
|
|
||||||
!DIR$ ATTRIBUTES FORCEINLINE :: polint
|
|
||||||
subroutine polint(xa, ya, x, y, dy, ordn)
|
subroutine polint(xa, ya, x, y, dy, ordn)
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
|
|||||||
@@ -65,8 +65,6 @@ real*8,intent(in) :: eps
|
|||||||
! dx^4
|
! dx^4
|
||||||
|
|
||||||
! note the sign (-1)^r-1, now r=2
|
! note the sign (-1)^r-1, now r=2
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
!DIR$ UNROLL PARTIAL(4)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
|
|||||||
@@ -10,15 +10,14 @@ filein = -I/usr/include/ -I${MKLROOT}/include
|
|||||||
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
||||||
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
|
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
|
||||||
|
|
||||||
## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization)
|
## Aggressive optimization flags:
|
||||||
## -fprofile-instr-use: use collected profile data to guide optimization decisions
|
## -O3: Maximum optimization
|
||||||
## (branch prediction, basic block layout, inlining, loop unrolling)
|
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
|
||||||
PROFDATA = ../../pgo_profile/default.profdata
|
## -fp-model fast=2: Aggressive floating-point optimizations
|
||||||
|
## -fma: Enable fused multiply-add instructions
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
-fprofile-instr-use=$(PROFDATA) \
|
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
-fprofile-instr-use=$(PROFDATA) \
|
|
||||||
-align array64byte -fpp -I${MKLROOT}/include
|
-align array64byte -fpp -I${MKLROOT}/include
|
||||||
f90 = ifx
|
f90 = ifx
|
||||||
f77 = ifx
|
f77 = ifx
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -15,13 +15,12 @@ import time
|
|||||||
## taskset ensures all child processes inherit the CPU affinity mask
|
## taskset ensures all child processes inherit the CPU affinity mask
|
||||||
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
|
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
|
||||||
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
|
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
|
||||||
#NUMACTL_CPU_BIND = "taskset -c 0-111"
|
NUMACTL_CPU_BIND = "taskset -c 0-111"
|
||||||
NUMACTL_CPU_BIND = "taskset -c 16-47,64-95"
|
|
||||||
|
|
||||||
## Build parallelism configuration
|
## Build parallelism configuration
|
||||||
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
|
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
|
||||||
## Set make -j to utilize available cores for faster builds
|
## Set make -j to utilize available cores for faster builds
|
||||||
BUILD_JOBS = 96
|
BUILD_JOBS = 104
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
@@ -118,7 +117,6 @@ def run_ABE():
|
|||||||
|
|
||||||
if (input_data.GPU_Calculation == "no"):
|
if (input_data.GPU_Calculation == "no"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
|
||||||
mpi_command_outfile = "ABE_out.log"
|
mpi_command_outfile = "ABE_out.log"
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||||
@@ -160,8 +158,7 @@ def run_TwoPunctureABE():
|
|||||||
print( )
|
print( )
|
||||||
|
|
||||||
## Define the command to run
|
## Define the command to run
|
||||||
#TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
|
TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
|
||||||
TwoPuncture_command = " ./TwoPunctureABE"
|
|
||||||
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
|
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
|
||||||
|
|
||||||
## Execute the command with subprocess.Popen and stream output
|
## Execute the command with subprocess.Popen and stream output
|
||||||
|
|||||||
@@ -1,97 +0,0 @@
|
|||||||
# AMSS-NCKU PGO Profile Analysis Report
|
|
||||||
|
|
||||||
## 1. Profiling Environment
|
|
||||||
|
|
||||||
| Item | Value |
|
|
||||||
|------|-------|
|
|
||||||
| Compiler | Intel oneAPI DPC++/C++ 2025.3.0 (icpx/ifx) |
|
|
||||||
| Instrumentation Flag | `-fprofile-instr-generate` |
|
|
||||||
| Optimization Level (instrumented) | `-O2 -xHost -fma` |
|
|
||||||
| MPI Processes | 1 (single process to avoid MPI+instrumentation deadlock) |
|
|
||||||
| Profile File | `default_9725750769337483397_0.profraw` (327 KB) |
|
|
||||||
| Merged Profile | `default.profdata` (394 KB) |
|
|
||||||
| llvm-profdata | `/home/intel/oneapi/compiler/2025.3/bin/compiler/llvm-profdata` |
|
|
||||||
|
|
||||||
## 2. Reduced Simulation Parameters (for profiling run)
|
|
||||||
|
|
||||||
| Parameter | Production Value | Profiling Value |
|
|
||||||
|-----------|-----------------|-----------------|
|
|
||||||
| MPI_processes | 64 | 1 |
|
|
||||||
| grid_level | 9 | 4 |
|
|
||||||
| static_grid_level | 5 | 3 |
|
|
||||||
| static_grid_number | 96 | 24 |
|
|
||||||
| moving_grid_number | 48 | 16 |
|
|
||||||
| largest_box_xyz_max | 320^3 | 160^3 |
|
|
||||||
| Final_Evolution_Time | 1000.0 | 10.0 |
|
|
||||||
| Evolution_Step_Number | 10,000,000 | 1,000 |
|
|
||||||
| Detector_Number | 12 | 2 |
|
|
||||||
|
|
||||||
## 3. Profile Summary
|
|
||||||
|
|
||||||
| Metric | Value |
|
|
||||||
|--------|-------|
|
|
||||||
| Total instrumented functions | 1,392 |
|
|
||||||
| Functions with non-zero counts | 117 (8.4%) |
|
|
||||||
| Functions with zero counts | 1,275 (91.6%) |
|
|
||||||
| Maximum function entry count | 386,459,248 |
|
|
||||||
| Maximum internal block count | 370,477,680 |
|
|
||||||
| Total block count | 4,198,023,118 |
|
|
||||||
|
|
||||||
## 4. Top 20 Hotspot Functions
|
|
||||||
|
|
||||||
| Rank | Total Count | Max Block Count | Function | Category |
|
|
||||||
|------|------------|-----------------|----------|----------|
|
|
||||||
| 1 | 1,241,601,732 | 370,477,680 | `polint_` | Interpolation |
|
|
||||||
| 2 | 755,994,435 | 230,156,640 | `prolong3_` | Grid prolongation |
|
|
||||||
| 3 | 667,964,095 | 3,697,792 | `compute_rhs_bssn_` | BSSN RHS evolution |
|
|
||||||
| 4 | 539,736,051 | 386,459,248 | `symmetry_bd_` | Symmetry boundary |
|
|
||||||
| 5 | 277,310,808 | 53,170,728 | `lopsided_` | Lopsided FD stencil |
|
|
||||||
| 6 | 155,534,488 | 94,535,040 | `decide3d_` | 3D grid decision |
|
|
||||||
| 7 | 119,267,712 | 19,266,048 | `rungekutta4_rout_` | RK4 time integrator |
|
|
||||||
| 8 | 91,574,616 | 48,824,160 | `kodis_` | Kreiss-Oliger dissipation |
|
|
||||||
| 9 | 67,555,389 | 43,243,680 | `fderivs_` | Finite differences |
|
|
||||||
| 10 | 55,296,000 | 42,246,144 | `misc::fact(int)` | Factorial utility |
|
|
||||||
| 11 | 43,191,071 | 27,663,328 | `fdderivs_` | 2nd-order FD derivatives |
|
|
||||||
| 12 | 36,233,965 | 22,429,440 | `restrict3_` | Grid restriction |
|
|
||||||
| 13 | 24,698,512 | 17,231,520 | `polin3_` | Polynomial interpolation |
|
|
||||||
| 14 | 22,962,942 | 20,968,768 | `copy_` | Data copy |
|
|
||||||
| 15 | 20,135,696 | 17,259,168 | `Ansorg::barycentric(...)` | Spectral interpolation |
|
|
||||||
| 16 | 14,650,224 | 7,224,768 | `Ansorg::barycentric_omega(...)` | Spectral weights |
|
|
||||||
| 17 | 13,242,296 | 2,871,920 | `global_interp_` | Global interpolation |
|
|
||||||
| 18 | 12,672,000 | 7,734,528 | `sommerfeld_rout_` | Sommerfeld boundary |
|
|
||||||
| 19 | 6,872,832 | 1,880,064 | `sommerfeld_routbam_` | Sommerfeld boundary (BAM) |
|
|
||||||
| 20 | 5,709,900 | 2,809,632 | `l2normhelper_` | L2 norm computation |
|
|
||||||
|
|
||||||
## 5. Hotspot Category Breakdown
|
|
||||||
|
|
||||||
Top 20 functions account for ~98% of total execution counts:
|
|
||||||
|
|
||||||
| Category | Functions | Combined Count | Share |
|
|
||||||
|----------|-----------|---------------|-------|
|
|
||||||
| Interpolation / Prolongation / Restriction | polint_, prolong3_, restrict3_, polin3_, global_interp_, Ansorg::* | ~2,093M | ~50% |
|
|
||||||
| BSSN RHS + FD stencils | compute_rhs_bssn_, lopsided_, fderivs_, fdderivs_ | ~1,056M | ~25% |
|
|
||||||
| Boundary conditions | symmetry_bd_, sommerfeld_rout_, sommerfeld_routbam_ | ~559M | ~13% |
|
|
||||||
| Time integration | rungekutta4_rout_ | ~119M | ~3% |
|
|
||||||
| Dissipation | kodis_ | ~92M | ~2% |
|
|
||||||
| Utilities | misc::fact, decide3d_, copy_, l2normhelper_ | ~256M | ~6% |
|
|
||||||
|
|
||||||
## 6. Conclusions
|
|
||||||
|
|
||||||
1. **Profile data is valid**: 1,392 functions instrumented, 117 exercised with ~4.2 billion total counts.
|
|
||||||
2. **Hotspot concentration is high**: Top 5 functions alone account for ~76% of all counts, which is ideal for PGO — the compiler has strong branch/layout optimization targets.
|
|
||||||
3. **Fortran numerical kernels dominate**: `polint_`, `prolong3_`, `compute_rhs_bssn_`, `symmetry_bd_`, `lopsided_` are all Fortran routines in the inner evolution loop. PGO will optimize their branch prediction and basic block layout.
|
|
||||||
4. **91.6% of functions have zero counts**: These are code paths for unused features (GPU, BSSN-EScalar, BSSN-EM, Z4C, etc.). PGO will deprioritize them, improving instruction cache utilization.
|
|
||||||
5. **Profile is representative**: Despite the reduced grid size, the code path coverage matches production — the same kernels (RHS, prolongation, restriction, boundary) are exercised. PGO branch probabilities from this profile will transfer well to full-scale runs.
|
|
||||||
|
|
||||||
## 7. PGO Phase 2 Usage
|
|
||||||
|
|
||||||
To apply the profile, use the following flags in `makefile.inc`:
|
|
||||||
|
|
||||||
```makefile
|
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
||||||
-fprofile-instr-use=/home/amss/AMSS-NCKU/pgo_profile/default.profdata \
|
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
||||||
-fprofile-instr-use=/home/amss/AMSS-NCKU/pgo_profile/default.profdata \
|
|
||||||
-align array64byte -fpp -I${MKLROOT}/include
|
|
||||||
```
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user