Remove profiling code
This commit is contained in:
@@ -2123,17 +2123,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
init_gpu_dispatch();
|
||||
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
|
||||
|
||||
/* --- Profiling: cudaEvent timers (rank 0 only, first 20 calls) --- */
|
||||
static int prof_call_count = 0;
|
||||
const bool do_prof = (g_dispatch.my_rank == 0 && prof_call_count < 20);
|
||||
cudaEvent_t ev_start, ev_h2d, ev_kern, ev_d2h;
|
||||
if (do_prof) {
|
||||
cudaEventCreate(&ev_start); cudaEventCreate(&ev_h2d);
|
||||
cudaEventCreate(&ev_kern); cudaEventCreate(&ev_d2h);
|
||||
cudaEventRecord(ev_start);
|
||||
}
|
||||
|
||||
const int nx = ex[0], ny = ex[1], nz = ex[2];
|
||||
const int nx = ex[0], ny = ex[1], nz = ex[2];
|
||||
const int all = nx * ny * nz;
|
||||
const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0];
|
||||
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||
@@ -2189,10 +2179,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
(size_t)H2D_INPUT_SLOT_COUNT * bytes,
|
||||
cudaMemcpyHostToDevice));
|
||||
|
||||
if (do_prof) cudaEventRecord(ev_h2d);
|
||||
|
||||
/* ============================================================ */
|
||||
/* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */
|
||||
/* ============================================================ */
|
||||
/* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */
|
||||
/* ============================================================ */
|
||||
kern_phase1_prep<<<grid(all),BLK>>>(
|
||||
D(S_Lap), D(S_chi), D(S_dxx), D(S_dyy), D(S_dzz),
|
||||
@@ -2573,8 +2561,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res));
|
||||
}
|
||||
|
||||
if (do_prof) cudaEventRecord(ev_kern);
|
||||
|
||||
/* ============================================================ */
|
||||
/* D2H: copy all output arrays back to host */
|
||||
/* ============================================================ */
|
||||
@@ -2615,22 +2601,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
}
|
||||
}
|
||||
|
||||
if (do_prof) {
|
||||
cudaEventRecord(ev_d2h);
|
||||
cudaEventSynchronize(ev_d2h);
|
||||
float t_h2d, t_kern, t_d2h;
|
||||
cudaEventElapsedTime(&t_h2d, ev_start, ev_h2d);
|
||||
cudaEventElapsedTime(&t_kern, ev_h2d, ev_kern);
|
||||
cudaEventElapsedTime(&t_d2h, ev_kern, ev_d2h);
|
||||
printf("[AMSS-PROF] call#%d nx=%d ny=%d nz=%d(all=%d) "
|
||||
"H2D=%.3fms Kern=%.3fms D2H=%.3fms Total=%.3fms\n",
|
||||
prof_call_count, nx, ny, nz, all,
|
||||
t_h2d, t_kern, t_d2h, t_h2d + t_kern + t_d2h);
|
||||
cudaEventDestroy(ev_start); cudaEventDestroy(ev_h2d);
|
||||
cudaEventDestroy(ev_kern); cudaEventDestroy(ev_d2h);
|
||||
prof_call_count++;
|
||||
}
|
||||
|
||||
#undef D
|
||||
return 0;
|
||||
#undef D
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user