Remove profiling code
This commit is contained in:
@@ -2123,17 +2123,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
init_gpu_dispatch();
|
init_gpu_dispatch();
|
||||||
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
|
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
|
||||||
|
|
||||||
/* --- Profiling: cudaEvent timers (rank 0 only, first 20 calls) --- */
|
const int nx = ex[0], ny = ex[1], nz = ex[2];
|
||||||
static int prof_call_count = 0;
|
|
||||||
const bool do_prof = (g_dispatch.my_rank == 0 && prof_call_count < 20);
|
|
||||||
cudaEvent_t ev_start, ev_h2d, ev_kern, ev_d2h;
|
|
||||||
if (do_prof) {
|
|
||||||
cudaEventCreate(&ev_start); cudaEventCreate(&ev_h2d);
|
|
||||||
cudaEventCreate(&ev_kern); cudaEventCreate(&ev_d2h);
|
|
||||||
cudaEventRecord(ev_start);
|
|
||||||
}
|
|
||||||
|
|
||||||
const int nx = ex[0], ny = ex[1], nz = ex[2];
|
|
||||||
const int all = nx * ny * nz;
|
const int all = nx * ny * nz;
|
||||||
const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0];
|
const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0];
|
||||||
const int NO_SYMM = 0, EQ_SYMM = 1;
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
@@ -2189,10 +2179,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
(size_t)H2D_INPUT_SLOT_COUNT * bytes,
|
(size_t)H2D_INPUT_SLOT_COUNT * bytes,
|
||||||
cudaMemcpyHostToDevice));
|
cudaMemcpyHostToDevice));
|
||||||
|
|
||||||
if (do_prof) cudaEventRecord(ev_h2d);
|
/* ============================================================ */
|
||||||
|
/* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */
|
||||||
/* ============================================================ */
|
|
||||||
/* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */
|
|
||||||
/* ============================================================ */
|
/* ============================================================ */
|
||||||
kern_phase1_prep<<<grid(all),BLK>>>(
|
kern_phase1_prep<<<grid(all),BLK>>>(
|
||||||
D(S_Lap), D(S_chi), D(S_dxx), D(S_dyy), D(S_dzz),
|
D(S_Lap), D(S_chi), D(S_dxx), D(S_dyy), D(S_dzz),
|
||||||
@@ -2573,8 +2561,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res));
|
D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (do_prof) cudaEventRecord(ev_kern);
|
|
||||||
|
|
||||||
/* ============================================================ */
|
/* ============================================================ */
|
||||||
/* D2H: copy all output arrays back to host */
|
/* D2H: copy all output arrays back to host */
|
||||||
/* ============================================================ */
|
/* ============================================================ */
|
||||||
@@ -2615,22 +2601,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (do_prof) {
|
#undef D
|
||||||
cudaEventRecord(ev_d2h);
|
return 0;
|
||||||
cudaEventSynchronize(ev_d2h);
|
|
||||||
float t_h2d, t_kern, t_d2h;
|
|
||||||
cudaEventElapsedTime(&t_h2d, ev_start, ev_h2d);
|
|
||||||
cudaEventElapsedTime(&t_kern, ev_h2d, ev_kern);
|
|
||||||
cudaEventElapsedTime(&t_d2h, ev_kern, ev_d2h);
|
|
||||||
printf("[AMSS-PROF] call#%d nx=%d ny=%d nz=%d(all=%d) "
|
|
||||||
"H2D=%.3fms Kern=%.3fms D2H=%.3fms Total=%.3fms\n",
|
|
||||||
prof_call_count, nx, ny, nz, all,
|
|
||||||
t_h2d, t_kern, t_d2h, t_h2d + t_kern + t_d2h);
|
|
||||||
cudaEventDestroy(ev_start); cudaEventDestroy(ev_h2d);
|
|
||||||
cudaEventDestroy(ev_kern); cudaEventDestroy(ev_d2h);
|
|
||||||
prof_call_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef D
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user