Remove profiling code

This commit is contained in:
2026-03-02 11:29:48 +08:00
parent 90620c2aec
commit 71f6eb7b44

View File

@@ -2123,17 +2123,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
init_gpu_dispatch();
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
/* --- Profiling: cudaEvent timers (rank 0 only, first 20 calls) --- */
static int prof_call_count = 0;
const bool do_prof = (g_dispatch.my_rank == 0 && prof_call_count < 20);
cudaEvent_t ev_start, ev_h2d, ev_kern, ev_d2h;
if (do_prof) {
cudaEventCreate(&ev_start); cudaEventCreate(&ev_h2d);
cudaEventCreate(&ev_kern); cudaEventCreate(&ev_d2h);
cudaEventRecord(ev_start);
}
const int nx = ex[0], ny = ex[1], nz = ex[2];
const int nx = ex[0], ny = ex[1], nz = ex[2];
const int all = nx * ny * nz;
const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0];
const int NO_SYMM = 0, EQ_SYMM = 1;
@@ -2189,10 +2179,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
(size_t)H2D_INPUT_SLOT_COUNT * bytes,
cudaMemcpyHostToDevice));
if (do_prof) cudaEventRecord(ev_h2d);
/* ============================================================ */
/* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */
/* ============================================================ */
/* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */
/* ============================================================ */
kern_phase1_prep<<<grid(all),BLK>>>(
D(S_Lap), D(S_chi), D(S_dxx), D(S_dyy), D(S_dzz),
@@ -2573,8 +2561,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res));
}
if (do_prof) cudaEventRecord(ev_kern);
/* ============================================================ */
/* D2H: copy all output arrays back to host */
/* ============================================================ */
@@ -2615,22 +2601,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
}
}
if (do_prof) {
cudaEventRecord(ev_d2h);
cudaEventSynchronize(ev_d2h);
float t_h2d, t_kern, t_d2h;
cudaEventElapsedTime(&t_h2d, ev_start, ev_h2d);
cudaEventElapsedTime(&t_kern, ev_h2d, ev_kern);
cudaEventElapsedTime(&t_d2h, ev_kern, ev_d2h);
printf("[AMSS-PROF] call#%d nx=%d ny=%d nz=%d(all=%d) "
"H2D=%.3fms Kern=%.3fms D2H=%.3fms Total=%.3fms\n",
prof_call_count, nx, ny, nz, all,
t_h2d, t_kern, t_d2h, t_h2d + t_kern + t_d2h);
cudaEventDestroy(ev_start); cudaEventDestroy(ev_h2d);
cudaEventDestroy(ev_kern); cudaEventDestroy(ev_d2h);
prof_call_count++;
}
#undef D
return 0;
#undef D
return 0;
}