Add optional BSSN kernel profiling switches
(cherry picked from commit 9c31384b2f)
This commit is contained in:
@@ -2,12 +2,88 @@
|
||||
#include "bssn_rhs.h"
|
||||
#include "share_func.h"
|
||||
#include "tool.h"
|
||||
#include <time.h>
|
||||
// 0-based i,j,k
|
||||
// #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny))
|
||||
// ex(1)=nx, ex(2)=ny, ex(3)=nz
|
||||
|
||||
// 用法:a[ IDX_F(i,j,k,nx,ny) ]
|
||||
|
||||
#ifndef BSSN_KERNEL_FINE_TIMING
|
||||
#define BSSN_KERNEL_FINE_TIMING 0
|
||||
#endif
|
||||
|
||||
#if BSSN_KERNEL_FINE_TIMING
|
||||
namespace rhs_kernel_timing
|
||||
{
|
||||
enum Bucket
|
||||
{
|
||||
KB_SETUP_DERIVS = 0,
|
||||
KB_GEOM_GAMMA,
|
||||
KB_RICCI_METRIC,
|
||||
KB_CHI_LAPSE,
|
||||
KB_AIJ_TRK_GAUGE,
|
||||
KB_KO_CONSTRAINT,
|
||||
KB_COUNT
|
||||
};
|
||||
|
||||
static double local_bucket_seconds[KB_COUNT];
|
||||
|
||||
static const char *bucket_labels[KB_COUNT] =
|
||||
{
|
||||
"setup_derivs",
|
||||
"geom_gamma",
|
||||
"ricci_metric",
|
||||
"chi_lapse",
|
||||
"aij_trk_gauge",
|
||||
"ko_constraint"
|
||||
};
|
||||
|
||||
static inline double now_seconds()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return double(ts.tv_sec) + 1.0e-9 * double(ts.tv_nsec);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void f_bssn_rhs_kernel_timing_reset()
|
||||
{
|
||||
for (int i = 0; i < rhs_kernel_timing::KB_COUNT; ++i)
|
||||
rhs_kernel_timing::local_bucket_seconds[i] = 0.0;
|
||||
}
|
||||
|
||||
extern "C" int f_bssn_rhs_kernel_timing_bucket_count()
|
||||
{
|
||||
return rhs_kernel_timing::KB_COUNT;
|
||||
}
|
||||
|
||||
extern "C" const double *f_bssn_rhs_kernel_timing_local_seconds()
|
||||
{
|
||||
return rhs_kernel_timing::local_bucket_seconds;
|
||||
}
|
||||
|
||||
extern "C" const char *f_bssn_rhs_kernel_timing_label(int bucket_index)
|
||||
{
|
||||
if (bucket_index < 0 || bucket_index >= rhs_kernel_timing::KB_COUNT)
|
||||
return "unknown";
|
||||
return rhs_kernel_timing::bucket_labels[bucket_index];
|
||||
}
|
||||
|
||||
#define RHS_KERNEL_TIMER_DECL(var_name) const double var_name = rhs_kernel_timing::now_seconds()
|
||||
#define RHS_KERNEL_TIMER_ADD(bucket_name, var_name) \
|
||||
rhs_kernel_timing::local_bucket_seconds[int(rhs_kernel_timing::bucket_name)] += \
|
||||
rhs_kernel_timing::now_seconds() - (var_name)
|
||||
#else
|
||||
extern "C" void f_bssn_rhs_kernel_timing_reset() {}
|
||||
extern "C" int f_bssn_rhs_kernel_timing_bucket_count() { return 0; }
|
||||
extern "C" const double *f_bssn_rhs_kernel_timing_local_seconds() { return 0; }
|
||||
extern "C" const char *f_bssn_rhs_kernel_timing_label(int) { return "disabled"; }
|
||||
|
||||
#define RHS_KERNEL_TIMER_DECL(var_name)
|
||||
#define RHS_KERNEL_TIMER_ADD(bucket_name, var_name)
|
||||
#endif
|
||||
|
||||
// C function that calculates the right-hand side for BSSN equations
|
||||
int f_compute_rhs_bssn(int *ex, double &T,
|
||||
double *X, double *Y, double *Z,
|
||||
@@ -102,6 +178,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
dY = Y[1] - Y[0];
|
||||
dZ = Z[1] - Z[0];
|
||||
|
||||
RHS_KERNEL_TIMER_DECL(timer_setup_derivs);
|
||||
// 1ms //
|
||||
for(int i=0;i<all;i+=1){
|
||||
alpn1[i] = Lap[i] + 1.0;
|
||||
@@ -141,6 +218,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
(dxx[i] + ONE) * betaxz[i] + gxy[i] * betayz[i] + gyz[i] * betayx[i]
|
||||
+ (dzz[i] + ONE) * betazx[i] - gxz[i] * betayy[i];
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_SETUP_DERIVS, timer_setup_derivs);
|
||||
RHS_KERNEL_TIMER_DECL(timer_geom_gamma);
|
||||
// Fused: inverse metric + Gamma constraint + Christoffel (3 loops -> 1)
|
||||
for(int i=0;i<all;i+=1){
|
||||
double det = (dxx[i] + ONE) * (dyy[i] + ONE) * (dzz[i] + ONE) + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i] -
|
||||
@@ -312,6 +391,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ TWO * ( Gamzxy[i]*axy + Gamzxz[i]*axz + Gamzyz[i]*ayz )
|
||||
);
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_GEOM_GAMMA, timer_geom_gamma);
|
||||
RHS_KERNEL_TIMER_DECL(timer_ricci_metric);
|
||||
// 22.3ms //
|
||||
fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
|
||||
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev);
|
||||
@@ -682,7 +763,9 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ Gamxyz[i] * gzzx[i] + Gamyyz[i] * gzzy[i] + Gamzyz[i] * gzzz[i]
|
||||
);
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_RICCI_METRIC, timer_ricci_metric);
|
||||
|
||||
RHS_KERNEL_TIMER_DECL(timer_chi_lapse);
|
||||
// 22.3ms //
|
||||
fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
|
||||
@@ -761,6 +844,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
trK_rhs[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ TWO * ( gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i] );
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_CHI_LAPSE, timer_chi_lapse);
|
||||
RHS_KERNEL_TIMER_DECL(timer_aij_trk_gauge);
|
||||
// 2.5ms //
|
||||
for (int i=0;i<all;i+=1) {
|
||||
const double divb = betaxx[i] + betayy[i] + betazz[i];
|
||||
@@ -1061,6 +1146,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
|
||||
#endif
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_AIJ_TRK_GAUGE, timer_aij_trk_gauge);
|
||||
RHS_KERNEL_TIMER_DECL(timer_ko_constraint);
|
||||
// advection + KO dissipation with shared symmetry buffer
|
||||
lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
|
||||
lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
|
||||
@@ -1192,6 +1279,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i];
|
||||
}
|
||||
}
|
||||
RHS_KERNEL_TIMER_ADD(KB_KO_CONSTRAINT, timer_ko_constraint);
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user