From 46e94d1248a9d59a2429974c351e9012e3eea964 Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Thu, 9 Apr 2026 19:36:19 +0800 Subject: [PATCH] Trim constraint-only GPU downloads --- AMSS_NCKU_source/bssn_class.C | 4 ++-- AMSS_NCKU_source/bssn_gpu.cu | 22 ++++++++++++++++------ AMSS_NCKU_source/bssn_macro.h | 7 ++++--- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index 6733383..b571eaa 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -7290,7 +7290,7 @@ void bssn_class::Constraint_Out() if (myrank == cg->rank) { #ifdef USE_GPU - gpu_rhs(CALLED_BY_CONSTRAINT, myrank, RHS_PARA_CALLED_Constraint_Out); + gpu_rhs(CALLED_BY_CONSTRAINT_CONS_ONLY, myrank, RHS_PARA_CALLED_Constraint_Out); #else f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], @@ -7797,7 +7797,7 @@ void bssn_class::Interp_Constraint(bool infg) if (myrank == cg->rank) { #ifdef USE_GPU - gpu_rhs(CALLED_BY_CONSTRAINT, myrank, RHS_PARA_CALLED_Interp_Constraint); + gpu_rhs(CALLED_BY_CONSTRAINT_CONS_ONLY, myrank, RHS_PARA_CALLED_Interp_Constraint); #else f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], diff --git a/AMSS_NCKU_source/bssn_gpu.cu b/AMSS_NCKU_source/bssn_gpu.cu index 72eccc5..78c0429 100644 --- a/AMSS_NCKU_source/bssn_gpu.cu +++ b/AMSS_NCKU_source/bssn_gpu.cu @@ -4034,9 +4034,9 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y, // Main GPU evolution path now consumes RHS directly from device caches. // Avoiding this bulk D2H copy removes one full round-trip per RK stage. } - else if(calledby == CALLED_BY_CONSTRAINT) - { - cudaMemcpy(Gamxxx, Mh_ Gamxxx, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + else if(calledby == CALLED_BY_CONSTRAINT) + { + cudaMemcpy(Gamxxx, Mh_ Gamxxx, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); cudaMemcpy(Gamxxy, Mh_ Gamxxy, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); cudaMemcpy(Gamxxz, Mh_ Gamxxz, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); cudaMemcpy(Gamxyy, Mh_ Gamxyy, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); @@ -4065,9 +4065,19 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y, cudaMemcpy(movy_Res, Mh_ movy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); cudaMemcpy(movz_Res, Mh_ movz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); cudaMemcpy(Gmx_Res, Mh_ Gmx_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(Gmy_Res, Mh_ Gmy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(Gmz_Res, Mh_ Gmz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); - } + cudaMemcpy(Gmy_Res, Mh_ Gmy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(Gmz_Res, Mh_ Gmz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + } + else if(calledby == CALLED_BY_CONSTRAINT_CONS_ONLY) + { + cudaMemcpy(ham_Res, Mh_ ham_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(movx_Res, Mh_ movx_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(movy_Res, Mh_ movy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(movz_Res, Mh_ movz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(Gmx_Res, Mh_ Gmx_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(Gmy_Res, Mh_ Gmy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(Gmz_Res, Mh_ Gmz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost); + } //----------------------------------------------------- //-------------------FOR GPU TEST---------------------- diff --git a/AMSS_NCKU_source/bssn_macro.h b/AMSS_NCKU_source/bssn_macro.h index b7c3820..676966c 100644 --- a/AMSS_NCKU_source/bssn_macro.h +++ b/AMSS_NCKU_source/bssn_macro.h @@ -65,9 +65,10 @@ if(TIME_COUNT_EACH_RANK == 1){\ }\ } -//3---------------------GPU--------------------- -#define CALLED_BY_STEP 0 -#define CALLED_BY_CONSTRAINT 1 +//3---------------------GPU--------------------- +#define CALLED_BY_STEP 0 +#define CALLED_BY_CONSTRAINT 1 +#define CALLED_BY_CONSTRAINT_CONS_ONLY 2 #define RHS_PARA_CALLED_FIRST_TIME cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,pre