From 71f6eb7b442a77e0c174ecc9de755188324944da Mon Sep 17 00:00:00 2001 From: ianchb Date: Mon, 2 Mar 2026 11:29:48 +0800 Subject: [PATCH] Remove profiling code --- AMSS_NCKU_source/bssn_rhs_cuda.cu | 40 ++++--------------------------- 1 file changed, 5 insertions(+), 35 deletions(-) diff --git a/AMSS_NCKU_source/bssn_rhs_cuda.cu b/AMSS_NCKU_source/bssn_rhs_cuda.cu index 9d072c6..b859823 100644 --- a/AMSS_NCKU_source/bssn_rhs_cuda.cu +++ b/AMSS_NCKU_source/bssn_rhs_cuda.cu @@ -2123,17 +2123,7 @@ int f_compute_rhs_bssn(int *ex, double &T, init_gpu_dispatch(); CUDA_CHECK(cudaSetDevice(g_dispatch.my_device)); - /* --- Profiling: cudaEvent timers (rank 0 only, first 20 calls) --- */ - static int prof_call_count = 0; - const bool do_prof = (g_dispatch.my_rank == 0 && prof_call_count < 20); - cudaEvent_t ev_start, ev_h2d, ev_kern, ev_d2h; - if (do_prof) { - cudaEventCreate(&ev_start); cudaEventCreate(&ev_h2d); - cudaEventCreate(&ev_kern); cudaEventCreate(&ev_d2h); - cudaEventRecord(ev_start); - } - - const int nx = ex[0], ny = ex[1], nz = ex[2]; + const int nx = ex[0], ny = ex[1], nz = ex[2]; const int all = nx * ny * nz; const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0]; const int NO_SYMM = 0, EQ_SYMM = 1; @@ -2189,10 +2179,8 @@ int f_compute_rhs_bssn(int *ex, double &T, (size_t)H2D_INPUT_SLOT_COUNT * bytes, cudaMemcpyHostToDevice)); - if (do_prof) cudaEventRecord(ev_h2d); - - /* ============================================================ */ - /* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */ + /* ============================================================ */ + /* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */ /* ============================================================ */ kern_phase1_prep<<>>( D(S_Lap), D(S_chi), D(S_dxx), D(S_dyy), D(S_dzz), @@ -2573,8 +2561,6 @@ int f_compute_rhs_bssn(int *ex, double &T, D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res)); } - if (do_prof) cudaEventRecord(ev_kern); - /* ============================================================ */ /* D2H: copy all output arrays back to host */ /* ============================================================ */ @@ -2615,22 +2601,6 @@ int f_compute_rhs_bssn(int *ex, double &T, } } - if (do_prof) { - cudaEventRecord(ev_d2h); - cudaEventSynchronize(ev_d2h); - float t_h2d, t_kern, t_d2h; - cudaEventElapsedTime(&t_h2d, ev_start, ev_h2d); - cudaEventElapsedTime(&t_kern, ev_h2d, ev_kern); - cudaEventElapsedTime(&t_d2h, ev_kern, ev_d2h); - printf("[AMSS-PROF] call#%d nx=%d ny=%d nz=%d(all=%d) " - "H2D=%.3fms Kern=%.3fms D2H=%.3fms Total=%.3fms\n", - prof_call_count, nx, ny, nz, all, - t_h2d, t_kern, t_d2h, t_h2d + t_kern + t_d2h); - cudaEventDestroy(ev_start); cudaEventDestroy(ev_h2d); - cudaEventDestroy(ev_kern); cudaEventDestroy(ev_d2h); - prof_call_count++; - } - - #undef D - return 0; + #undef D + return 0; }