Add optional BSSN kernel profiling switches
This commit is contained in:
@@ -59,6 +59,14 @@ using namespace std;
|
|||||||
#define BSSN_FINE_TIMING_TOPN 8
|
#define BSSN_FINE_TIMING_TOPN 8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef BSSN_KERNEL_FINE_TIMING
|
||||||
|
#define BSSN_KERNEL_FINE_TIMING 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BSSN_ENABLE_STDIN_ABORT_POLL
|
||||||
|
#define BSSN_ENABLE_STDIN_ABORT_POLL 0
|
||||||
|
#endif
|
||||||
|
|
||||||
#if BSSN_FINE_TIMING
|
#if BSSN_FINE_TIMING
|
||||||
namespace step_timing
|
namespace step_timing
|
||||||
{
|
{
|
||||||
@@ -198,6 +206,74 @@ namespace step_timing
|
|||||||
#define STEP_TIMER_ADD(bucket_name, var_name)
|
#define STEP_TIMER_ADD(bucket_name, var_name)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if BSSN_KERNEL_FINE_TIMING
|
||||||
|
namespace rhs_kernel_timing_report
|
||||||
|
{
|
||||||
|
void report(int myrank, int nprocs, int step_index, double step_wall_seconds)
|
||||||
|
{
|
||||||
|
const int bucket_count = f_bssn_rhs_kernel_timing_bucket_count();
|
||||||
|
const double *local_bucket_seconds = f_bssn_rhs_kernel_timing_local_seconds();
|
||||||
|
|
||||||
|
if (bucket_count <= 0 || !local_bucket_seconds)
|
||||||
|
return;
|
||||||
|
|
||||||
|
double *max_bucket_seconds = new double[bucket_count];
|
||||||
|
double *avg_bucket_seconds = new double[bucket_count];
|
||||||
|
int *order = new int[bucket_count];
|
||||||
|
|
||||||
|
MPI_Reduce((void *)local_bucket_seconds, max_bucket_seconds, bucket_count, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
|
||||||
|
MPI_Reduce((void *)local_bucket_seconds, avg_bucket_seconds, bucket_count, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
double kernel_total = 0.0;
|
||||||
|
for (int i = 0; i < bucket_count; ++i)
|
||||||
|
{
|
||||||
|
avg_bucket_seconds[i] /= Mymax(1, nprocs);
|
||||||
|
order[i] = i;
|
||||||
|
kernel_total += max_bucket_seconds[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < bucket_count - 1; ++i)
|
||||||
|
for (int j = i + 1; j < bucket_count; ++j)
|
||||||
|
if (max_bucket_seconds[order[j]] > max_bucket_seconds[order[i]])
|
||||||
|
{
|
||||||
|
int tmp = order[i];
|
||||||
|
order[i] = order[j];
|
||||||
|
order[j] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
ios::fmtflags old_flags = cout.flags();
|
||||||
|
streamsize old_precision = cout.precision();
|
||||||
|
|
||||||
|
const double kernel_frac = (step_wall_seconds > 0.0) ? (100.0 * kernel_total / step_wall_seconds) : 0.0;
|
||||||
|
cout << " RHS kernel split (max-rank accumulated over step " << step_index << "): total "
|
||||||
|
<< setprecision(6) << kernel_total << " s (" << setprecision(4)
|
||||||
|
<< kernel_frac << "% of coarse step)" << endl;
|
||||||
|
|
||||||
|
const int topn = Mymin(BSSN_FINE_TIMING_TOPN, bucket_count);
|
||||||
|
for (int i = 0; i < topn; ++i)
|
||||||
|
{
|
||||||
|
const int ib = order[i];
|
||||||
|
const double frac = (kernel_total > 0.0) ? (100.0 * max_bucket_seconds[ib] / kernel_total) : 0.0;
|
||||||
|
cout << " "
|
||||||
|
<< setw(20) << left << f_bssn_rhs_kernel_timing_label(ib)
|
||||||
|
<< " = " << setw(10) << right << setprecision(6) << max_bucket_seconds[ib]
|
||||||
|
<< " s (" << setw(6) << setprecision(4) << frac << "% of kernel)" << endl;
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
cout.flags(old_flags);
|
||||||
|
cout.precision(old_precision);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] max_bucket_seconds;
|
||||||
|
delete[] avg_bucket_seconds;
|
||||||
|
delete[] order;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|
||||||
// define bssn_class
|
// define bssn_class
|
||||||
@@ -2325,7 +2401,12 @@ void bssn_class::Evolve(int Steps)
|
|||||||
{
|
{
|
||||||
#if BSSN_FINE_TIMING
|
#if BSSN_FINE_TIMING
|
||||||
step_timing::reset();
|
step_timing::reset();
|
||||||
STEP_TIMER_DECL(step_wall_start);
|
#endif
|
||||||
|
#if BSSN_KERNEL_FINE_TIMING
|
||||||
|
f_bssn_rhs_kernel_timing_reset();
|
||||||
|
#endif
|
||||||
|
#if (BSSN_FINE_TIMING || BSSN_KERNEL_FINE_TIMING)
|
||||||
|
const double step_wall_start = MPI_Wtime();
|
||||||
#endif
|
#endif
|
||||||
// special for large mass ratio consideration
|
// special for large mass ratio consideration
|
||||||
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
|
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
|
||||||
@@ -2449,10 +2530,13 @@ void bssn_class::Evolve(int Steps)
|
|||||||
<< endl;
|
<< endl;
|
||||||
}
|
}
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
#if BSSN_ENABLE_STDIN_ABORT_POLL
|
||||||
cout << " If you think the physical evolution time is enough for this simulation, please input 'stop' in the terminal to stop the MPI processes in the next evolution step ! " << endl;
|
cout << " If you think the physical evolution time is enough for this simulation, please input 'stop' in the terminal to stop the MPI processes in the next evolution step ! " << endl;
|
||||||
|
#endif
|
||||||
// cout << endl;
|
// cout << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if BSSN_ENABLE_STDIN_ABORT_POLL
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// If an "abort" command is detected on stdin, terminate MPI processes
|
// If an "abort" command is detected on stdin, terminate MPI processes
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
@@ -2480,6 +2564,7 @@ void bssn_class::Evolve(int Steps)
|
|||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
|
#endif
|
||||||
|
|
||||||
// When LastCheck >= CheckTime, perform runtime checks and output status data
|
// When LastCheck >= CheckTime, perform runtime checks and output status data
|
||||||
if (LastCheck >= CheckTime)
|
if (LastCheck >= CheckTime)
|
||||||
@@ -2496,9 +2581,16 @@ void bssn_class::Evolve(int Steps)
|
|||||||
STEP_TIMER_ADD(TB_CHECKPOINT, timer_checkpoint);
|
STEP_TIMER_ADD(TB_CHECKPOINT, timer_checkpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if (BSSN_FINE_TIMING || BSSN_KERNEL_FINE_TIMING)
|
||||||
|
const double step_wall_seconds = MPI_Wtime() - step_wall_start;
|
||||||
|
#endif
|
||||||
#if BSSN_FINE_TIMING
|
#if BSSN_FINE_TIMING
|
||||||
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
|
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
|
||||||
step_timing::report(myrank, nprocs, TimingMonitor, ncount, PhysTime, MPI_Wtime() - step_wall_start);
|
step_timing::report(myrank, nprocs, TimingMonitor, ncount, PhysTime, step_wall_seconds);
|
||||||
|
#endif
|
||||||
|
#if BSSN_KERNEL_FINE_TIMING
|
||||||
|
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
|
||||||
|
rhs_kernel_timing_report::report(myrank, nprocs, ncount, step_wall_seconds);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -32,6 +32,19 @@
|
|||||||
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
|
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
|
||||||
#define f_compute_constraint_fr compute_constraint_fr_
|
#define f_compute_constraint_fr compute_constraint_fr_
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
void f_bssn_rhs_kernel_timing_reset();
|
||||||
|
int f_bssn_rhs_kernel_timing_bucket_count();
|
||||||
|
const double *f_bssn_rhs_kernel_timing_local_seconds();
|
||||||
|
const char *f_bssn_rhs_kernel_timing_label(int);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
||||||
|
|||||||
@@ -2,12 +2,88 @@
|
|||||||
#include "bssn_rhs.h"
|
#include "bssn_rhs.h"
|
||||||
#include "share_func.h"
|
#include "share_func.h"
|
||||||
#include "tool.h"
|
#include "tool.h"
|
||||||
|
#include <time.h>
|
||||||
// 0-based i,j,k
|
// 0-based i,j,k
|
||||||
// #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny))
|
// #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny))
|
||||||
// ex(1)=nx, ex(2)=ny, ex(3)=nz
|
// ex(1)=nx, ex(2)=ny, ex(3)=nz
|
||||||
|
|
||||||
// 用法:a[ IDX_F(i,j,k,nx,ny) ]
|
// 用法:a[ IDX_F(i,j,k,nx,ny) ]
|
||||||
|
|
||||||
|
#ifndef BSSN_KERNEL_FINE_TIMING
|
||||||
|
#define BSSN_KERNEL_FINE_TIMING 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if BSSN_KERNEL_FINE_TIMING
|
||||||
|
namespace rhs_kernel_timing
|
||||||
|
{
|
||||||
|
enum Bucket
|
||||||
|
{
|
||||||
|
KB_SETUP_DERIVS = 0,
|
||||||
|
KB_GEOM_GAMMA,
|
||||||
|
KB_RICCI_METRIC,
|
||||||
|
KB_CHI_LAPSE,
|
||||||
|
KB_AIJ_TRK_GAUGE,
|
||||||
|
KB_KO_CONSTRAINT,
|
||||||
|
KB_COUNT
|
||||||
|
};
|
||||||
|
|
||||||
|
static double local_bucket_seconds[KB_COUNT];
|
||||||
|
|
||||||
|
static const char *bucket_labels[KB_COUNT] =
|
||||||
|
{
|
||||||
|
"setup_derivs",
|
||||||
|
"geom_gamma",
|
||||||
|
"ricci_metric",
|
||||||
|
"chi_lapse",
|
||||||
|
"aij_trk_gauge",
|
||||||
|
"ko_constraint"
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline double now_seconds()
|
||||||
|
{
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return double(ts.tv_sec) + 1.0e-9 * double(ts.tv_nsec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void f_bssn_rhs_kernel_timing_reset()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < rhs_kernel_timing::KB_COUNT; ++i)
|
||||||
|
rhs_kernel_timing::local_bucket_seconds[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int f_bssn_rhs_kernel_timing_bucket_count()
|
||||||
|
{
|
||||||
|
return rhs_kernel_timing::KB_COUNT;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" const double *f_bssn_rhs_kernel_timing_local_seconds()
|
||||||
|
{
|
||||||
|
return rhs_kernel_timing::local_bucket_seconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" const char *f_bssn_rhs_kernel_timing_label(int bucket_index)
|
||||||
|
{
|
||||||
|
if (bucket_index < 0 || bucket_index >= rhs_kernel_timing::KB_COUNT)
|
||||||
|
return "unknown";
|
||||||
|
return rhs_kernel_timing::bucket_labels[bucket_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
#define RHS_KERNEL_TIMER_DECL(var_name) const double var_name = rhs_kernel_timing::now_seconds()
|
||||||
|
#define RHS_KERNEL_TIMER_ADD(bucket_name, var_name) \
|
||||||
|
rhs_kernel_timing::local_bucket_seconds[int(rhs_kernel_timing::bucket_name)] += \
|
||||||
|
rhs_kernel_timing::now_seconds() - (var_name)
|
||||||
|
#else
|
||||||
|
extern "C" void f_bssn_rhs_kernel_timing_reset() {}
|
||||||
|
extern "C" int f_bssn_rhs_kernel_timing_bucket_count() { return 0; }
|
||||||
|
extern "C" const double *f_bssn_rhs_kernel_timing_local_seconds() { return 0; }
|
||||||
|
extern "C" const char *f_bssn_rhs_kernel_timing_label(int) { return "disabled"; }
|
||||||
|
|
||||||
|
#define RHS_KERNEL_TIMER_DECL(var_name)
|
||||||
|
#define RHS_KERNEL_TIMER_ADD(bucket_name, var_name)
|
||||||
|
#endif
|
||||||
|
|
||||||
// C function that calculates the right-hand side for BSSN equations
|
// C function that calculates the right-hand side for BSSN equations
|
||||||
int f_compute_rhs_bssn(int *ex, double &T,
|
int f_compute_rhs_bssn(int *ex, double &T,
|
||||||
double *X, double *Y, double *Z,
|
double *X, double *Y, double *Z,
|
||||||
@@ -102,6 +178,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
dY = Y[1] - Y[0];
|
dY = Y[1] - Y[0];
|
||||||
dZ = Z[1] - Z[0];
|
dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
|
RHS_KERNEL_TIMER_DECL(timer_setup_derivs);
|
||||||
// 1ms //
|
// 1ms //
|
||||||
for(int i=0;i<all;i+=1){
|
for(int i=0;i<all;i+=1){
|
||||||
alpn1[i] = Lap[i] + 1.0;
|
alpn1[i] = Lap[i] + 1.0;
|
||||||
@@ -141,6 +218,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
(dxx[i] + ONE) * betaxz[i] + gxy[i] * betayz[i] + gyz[i] * betayx[i]
|
(dxx[i] + ONE) * betaxz[i] + gxy[i] * betayz[i] + gyz[i] * betayx[i]
|
||||||
+ (dzz[i] + ONE) * betazx[i] - gxz[i] * betayy[i];
|
+ (dzz[i] + ONE) * betazx[i] - gxz[i] * betayy[i];
|
||||||
}
|
}
|
||||||
|
RHS_KERNEL_TIMER_ADD(KB_SETUP_DERIVS, timer_setup_derivs);
|
||||||
|
RHS_KERNEL_TIMER_DECL(timer_geom_gamma);
|
||||||
// Fused: inverse metric + Gamma constraint + Christoffel (3 loops -> 1)
|
// Fused: inverse metric + Gamma constraint + Christoffel (3 loops -> 1)
|
||||||
for(int i=0;i<all;i+=1){
|
for(int i=0;i<all;i+=1){
|
||||||
double det = (dxx[i] + ONE) * (dyy[i] + ONE) * (dzz[i] + ONE) + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i] -
|
double det = (dxx[i] + ONE) * (dyy[i] + ONE) * (dzz[i] + ONE) + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i] -
|
||||||
@@ -312,6 +391,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
+ TWO * ( Gamzxy[i]*axy + Gamzxz[i]*axz + Gamzyz[i]*ayz )
|
+ TWO * ( Gamzxy[i]*axy + Gamzxz[i]*axz + Gamzyz[i]*ayz )
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
RHS_KERNEL_TIMER_ADD(KB_GEOM_GAMMA, timer_geom_gamma);
|
||||||
|
RHS_KERNEL_TIMER_DECL(timer_ricci_metric);
|
||||||
// 22.3ms //
|
// 22.3ms //
|
||||||
fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
|
fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
|
||||||
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev);
|
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev);
|
||||||
@@ -682,7 +763,9 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
+ Gamxyz[i] * gzzx[i] + Gamyyz[i] * gzzy[i] + Gamzyz[i] * gzzz[i]
|
+ Gamxyz[i] * gzzx[i] + Gamyyz[i] * gzzy[i] + Gamzyz[i] * gzzz[i]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
RHS_KERNEL_TIMER_ADD(KB_RICCI_METRIC, timer_ricci_metric);
|
||||||
|
|
||||||
|
RHS_KERNEL_TIMER_DECL(timer_chi_lapse);
|
||||||
// 22.3ms //
|
// 22.3ms //
|
||||||
fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||||
|
|
||||||
@@ -761,6 +844,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
trK_rhs[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
trK_rhs[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||||
+ TWO * ( gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i] );
|
+ TWO * ( gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i] );
|
||||||
}
|
}
|
||||||
|
RHS_KERNEL_TIMER_ADD(KB_CHI_LAPSE, timer_chi_lapse);
|
||||||
|
RHS_KERNEL_TIMER_DECL(timer_aij_trk_gauge);
|
||||||
// 2.5ms //
|
// 2.5ms //
|
||||||
for (int i=0;i<all;i+=1) {
|
for (int i=0;i<all;i+=1) {
|
||||||
const double divb = betaxx[i] + betayy[i] + betazz[i];
|
const double divb = betaxx[i] + betayy[i] + betazz[i];
|
||||||
@@ -1061,6 +1146,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
|
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
RHS_KERNEL_TIMER_ADD(KB_AIJ_TRK_GAUGE, timer_aij_trk_gauge);
|
||||||
|
RHS_KERNEL_TIMER_DECL(timer_ko_constraint);
|
||||||
// advection + KO dissipation with shared symmetry buffer
|
// advection + KO dissipation with shared symmetry buffer
|
||||||
lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||||
lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
|
lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
|
||||||
@@ -1192,6 +1279,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i];
|
movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
RHS_KERNEL_TIMER_ADD(KB_KO_CONSTRAINT, timer_ko_constraint);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -29,12 +29,16 @@
|
|||||||
|
|
||||||
#define REGLEV 0
|
#define REGLEV 0
|
||||||
|
|
||||||
#define BSSN_FINE_TIMING 1
|
#define BSSN_FINE_TIMING 0
|
||||||
|
|
||||||
#define BSSN_FINE_TIMING_EVERY 1
|
#define BSSN_FINE_TIMING_EVERY 1
|
||||||
|
|
||||||
#define BSSN_FINE_TIMING_TOPN 8
|
#define BSSN_FINE_TIMING_TOPN 8
|
||||||
|
|
||||||
|
#define BSSN_KERNEL_FINE_TIMING 0
|
||||||
|
|
||||||
|
#define BSSN_ENABLE_STDIN_ABORT_POLL 0
|
||||||
|
|
||||||
//#define USE_GPU
|
//#define USE_GPU
|
||||||
|
|
||||||
//#define CHECKDETAIL
|
//#define CHECKDETAIL
|
||||||
@@ -103,6 +107,12 @@
|
|||||||
// define BSSN_FINE_TIMING_TOPN
|
// define BSSN_FINE_TIMING_TOPN
|
||||||
// number of hottest timing buckets shown in stdout
|
// number of hottest timing buckets shown in stdout
|
||||||
//
|
//
|
||||||
|
// define BSSN_KERNEL_FINE_TIMING
|
||||||
|
// enable split timing inside compute_rhs_bssn
|
||||||
|
//
|
||||||
|
// define BSSN_ENABLE_STDIN_ABORT_POLL
|
||||||
|
// poll stdin and broadcast abort flag every coarse step
|
||||||
|
//
|
||||||
// define USE_GPU
|
// define USE_GPU
|
||||||
// use gpu or not
|
// use gpu or not
|
||||||
//
|
//
|
||||||
@@ -157,4 +167,3 @@
|
|||||||
#define TINY 1e-10
|
#define TINY 1e-10
|
||||||
|
|
||||||
#endif /* MICRODEF_H */
|
#endif /* MICRODEF_H */
|
||||||
|
|
||||||
|
|||||||
@@ -144,11 +144,15 @@ def generate_macrodef_h():
|
|||||||
print( "#define REGLEV 0", file=file1 )
|
print( "#define REGLEV 0", file=file1 )
|
||||||
print( file=file1 )
|
print( file=file1 )
|
||||||
|
|
||||||
# Define fine-grained timestep timing macros
|
# Define fine-grained timing/debug macros.
|
||||||
# These default to enabled profiling without requiring AMSS_NCKU_Input.py edits.
|
# All of them default to OFF so production builds do not pay profiling overhead.
|
||||||
|
|
||||||
fine_timing = getattr(input_data, "Fine_Timing",
|
fine_timing = getattr(input_data, "Fine_Timing",
|
||||||
getattr(input_data, "Finegrained_Timing", "yes"))
|
getattr(input_data, "Finegrained_Timing", "no"))
|
||||||
|
kernel_fine_timing = getattr(input_data, "Kernel_Fine_Timing",
|
||||||
|
getattr(input_data, "BSSN_Kernel_Fine_Timing", "no"))
|
||||||
|
stdin_abort_poll = getattr(input_data, "Enable_Stdin_Abort_Poll",
|
||||||
|
getattr(input_data, "Stdin_Abort_Poll", "no"))
|
||||||
timing_report_every = max(1, int(getattr(
|
timing_report_every = max(1, int(getattr(
|
||||||
input_data, "Timing_Every_Steps",
|
input_data, "Timing_Every_Steps",
|
||||||
getattr(input_data, "Timing_Report_Every", 1))))
|
getattr(input_data, "Timing_Report_Every", 1))))
|
||||||
@@ -172,6 +176,30 @@ def generate_macrodef_h():
|
|||||||
print( f"#define BSSN_FINE_TIMING_TOPN {timing_top_hotspots}", file=file1 )
|
print( f"#define BSSN_FINE_TIMING_TOPN {timing_top_hotspots}", file=file1 )
|
||||||
print( file=file1 )
|
print( file=file1 )
|
||||||
|
|
||||||
|
if ( kernel_fine_timing == "yes" ):
|
||||||
|
print( "#define BSSN_KERNEL_FINE_TIMING 1", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
elif ( kernel_fine_timing == "no" ):
|
||||||
|
print( "#define BSSN_KERNEL_FINE_TIMING 0", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
else:
|
||||||
|
print( "Kernel_Fine_Timing setting error!!!" )
|
||||||
|
print()
|
||||||
|
print( "# Kernel_Fine_Timing setting error!!!", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
|
||||||
|
if ( stdin_abort_poll == "yes" ):
|
||||||
|
print( "#define BSSN_ENABLE_STDIN_ABORT_POLL 1", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
elif ( stdin_abort_poll == "no" ):
|
||||||
|
print( "#define BSSN_ENABLE_STDIN_ABORT_POLL 0", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
else:
|
||||||
|
print( "Enable_Stdin_Abort_Poll setting error!!!" )
|
||||||
|
print()
|
||||||
|
print( "# Enable_Stdin_Abort_Poll setting error!!!", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
|
||||||
# Define macro USE_GPU
|
# Define macro USE_GPU
|
||||||
# use GPU or not
|
# use GPU or not
|
||||||
|
|
||||||
@@ -261,6 +289,12 @@ def generate_macrodef_h():
|
|||||||
print( "// define BSSN_FINE_TIMING_TOPN", file=file1 )
|
print( "// define BSSN_FINE_TIMING_TOPN", file=file1 )
|
||||||
print( "// number of hottest timing buckets shown in stdout", file=file1 )
|
print( "// number of hottest timing buckets shown in stdout", file=file1 )
|
||||||
print( "//", file=file1 )
|
print( "//", file=file1 )
|
||||||
|
print( "// define BSSN_KERNEL_FINE_TIMING", file=file1 )
|
||||||
|
print( "// enable split timing inside compute_rhs_bssn", file=file1 )
|
||||||
|
print( "//", file=file1 )
|
||||||
|
print( "// define BSSN_ENABLE_STDIN_ABORT_POLL", file=file1 )
|
||||||
|
print( "// poll stdin and broadcast abort flag every coarse step", file=file1 )
|
||||||
|
print( "//", file=file1 )
|
||||||
print( "// define USE_GPU", file=file1 )
|
print( "// define USE_GPU", file=file1 )
|
||||||
print( "// use gpu or not", file=file1 )
|
print( "// use gpu or not", file=file1 )
|
||||||
print( "//", file=file1 )
|
print( "//", file=file1 )
|
||||||
|
|||||||
Reference in New Issue
Block a user