Remove profiling code

This commit is contained in:
2026-03-02 11:29:48 +08:00
parent 90620c2aec
commit 71f6eb7b44

View File

@@ -2123,16 +2123,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
init_gpu_dispatch(); init_gpu_dispatch();
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device)); CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
/* --- Profiling: cudaEvent timers (rank 0 only, first 20 calls) --- */
static int prof_call_count = 0;
const bool do_prof = (g_dispatch.my_rank == 0 && prof_call_count < 20);
cudaEvent_t ev_start, ev_h2d, ev_kern, ev_d2h;
if (do_prof) {
cudaEventCreate(&ev_start); cudaEventCreate(&ev_h2d);
cudaEventCreate(&ev_kern); cudaEventCreate(&ev_d2h);
cudaEventRecord(ev_start);
}
const int nx = ex[0], ny = ex[1], nz = ex[2]; const int nx = ex[0], ny = ex[1], nz = ex[2];
const int all = nx * ny * nz; const int all = nx * ny * nz;
const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0]; const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0];
@@ -2189,8 +2179,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
(size_t)H2D_INPUT_SLOT_COUNT * bytes, (size_t)H2D_INPUT_SLOT_COUNT * bytes,
cudaMemcpyHostToDevice)); cudaMemcpyHostToDevice));
if (do_prof) cudaEventRecord(ev_h2d);
/* ============================================================ */ /* ============================================================ */
/* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */ /* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */
/* ============================================================ */ /* ============================================================ */
@@ -2573,8 +2561,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res)); D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res));
} }
if (do_prof) cudaEventRecord(ev_kern);
/* ============================================================ */ /* ============================================================ */
/* D2H: copy all output arrays back to host */ /* D2H: copy all output arrays back to host */
/* ============================================================ */ /* ============================================================ */
@@ -2615,22 +2601,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
} }
} }
if (do_prof) {
cudaEventRecord(ev_d2h);
cudaEventSynchronize(ev_d2h);
float t_h2d, t_kern, t_d2h;
cudaEventElapsedTime(&t_h2d, ev_start, ev_h2d);
cudaEventElapsedTime(&t_kern, ev_h2d, ev_kern);
cudaEventElapsedTime(&t_d2h, ev_kern, ev_d2h);
printf("[AMSS-PROF] call#%d nx=%d ny=%d nz=%d(all=%d) "
"H2D=%.3fms Kern=%.3fms D2H=%.3fms Total=%.3fms\n",
prof_call_count, nx, ny, nz, all,
t_h2d, t_kern, t_d2h, t_h2d + t_kern + t_d2h);
cudaEventDestroy(ev_start); cudaEventDestroy(ev_h2d);
cudaEventDestroy(ev_kern); cudaEventDestroy(ev_d2h);
prof_call_count++;
}
#undef D #undef D
return 0; return 0;
} }