Trim constraint-only GPU downloads
This commit is contained in:
@@ -7290,7 +7290,7 @@ void bssn_class::Constraint_Out()
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
#ifdef USE_GPU
|
||||
gpu_rhs(CALLED_BY_CONSTRAINT, myrank, RHS_PARA_CALLED_Constraint_Out);
|
||||
gpu_rhs(CALLED_BY_CONSTRAINT_CONS_ONLY, myrank, RHS_PARA_CALLED_Constraint_Out);
|
||||
#else
|
||||
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
@@ -7797,7 +7797,7 @@ void bssn_class::Interp_Constraint(bool infg)
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
#ifdef USE_GPU
|
||||
gpu_rhs(CALLED_BY_CONSTRAINT, myrank, RHS_PARA_CALLED_Interp_Constraint);
|
||||
gpu_rhs(CALLED_BY_CONSTRAINT_CONS_ONLY, myrank, RHS_PARA_CALLED_Interp_Constraint);
|
||||
#else
|
||||
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
|
||||
@@ -4034,9 +4034,9 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
// Main GPU evolution path now consumes RHS directly from device caches.
|
||||
// Avoiding this bulk D2H copy removes one full round-trip per RK stage.
|
||||
}
|
||||
else if(calledby == CALLED_BY_CONSTRAINT)
|
||||
{
|
||||
cudaMemcpy(Gamxxx, Mh_ Gamxxx, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
else if(calledby == CALLED_BY_CONSTRAINT)
|
||||
{
|
||||
cudaMemcpy(Gamxxx, Mh_ Gamxxx, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gamxxy, Mh_ Gamxxy, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gamxxz, Mh_ Gamxxz, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gamxyy, Mh_ Gamxyy, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
@@ -4065,9 +4065,19 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
cudaMemcpy(movy_Res, Mh_ movy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(movz_Res, Mh_ movz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gmx_Res, Mh_ Gmx_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gmy_Res, Mh_ Gmy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gmz_Res, Mh_ Gmz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
cudaMemcpy(Gmy_Res, Mh_ Gmy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gmz_Res, Mh_ Gmz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
else if(calledby == CALLED_BY_CONSTRAINT_CONS_ONLY)
|
||||
{
|
||||
cudaMemcpy(ham_Res, Mh_ ham_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(movx_Res, Mh_ movx_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(movy_Res, Mh_ movy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(movz_Res, Mh_ movz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gmx_Res, Mh_ Gmx_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gmy_Res, Mh_ Gmy_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Gmz_Res, Mh_ Gmz_Res, matrix_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
//-------------------FOR GPU TEST----------------------
|
||||
|
||||
@@ -65,9 +65,10 @@ if(TIME_COUNT_EACH_RANK == 1){\
|
||||
}\
|
||||
}
|
||||
|
||||
//3---------------------GPU---------------------
|
||||
#define CALLED_BY_STEP 0
|
||||
#define CALLED_BY_CONSTRAINT 1
|
||||
//3---------------------GPU---------------------
|
||||
#define CALLED_BY_STEP 0
|
||||
#define CALLED_BY_CONSTRAINT 1
|
||||
#define CALLED_BY_CONSTRAINT_CONS_ONLY 2
|
||||
|
||||
#define RHS_PARA_CALLED_FIRST_TIME cg->shape,TRK4,cg->X[0],cg->X[1],cg->X[2],cg->fgfs[phi0->sgfn],cg->fgfs[trK0->sgfn],cg->fgfs[gxx0->sgfn],cg->fgfs[gxy0->sgfn],cg->fgfs[gxz0->sgfn],cg->fgfs[gyy0->sgfn],cg->fgfs[gyz0->sgfn],cg->fgfs[gzz0->sgfn],cg->fgfs[Axx0->sgfn],cg->fgfs[Axy0->sgfn],cg->fgfs[Axz0->sgfn],cg->fgfs[Ayy0->sgfn],cg->fgfs[Ayz0->sgfn],cg->fgfs[Azz0->sgfn],cg->fgfs[Gmx0->sgfn],cg->fgfs[Gmy0->sgfn],cg->fgfs[Gmz0->sgfn],cg->fgfs[Lap0->sgfn],cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn],cg->fgfs[dtSfx0->sgfn],cg->fgfs[dtSfy0->sgfn],cg->fgfs[dtSfz0->sgfn],cg->fgfs[phi_rhs->sgfn],cg->fgfs[trK_rhs->sgfn],cg->fgfs[gxx_rhs->sgfn],cg->fgfs[gxy_rhs->sgfn],cg->fgfs[gxz_rhs->sgfn],cg->fgfs[gyy_rhs->sgfn],cg->fgfs[gyz_rhs->sgfn],cg->fgfs[gzz_rhs->sgfn],cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn],cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn],cg->fgfs[Gmx_rhs->sgfn],cg->fgfs[Gmy_rhs->sgfn],cg->fgfs[Gmz_rhs->sgfn],cg->fgfs[Lap_rhs->sgfn],cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn],cg->fgfs[dtSfx_rhs->sgfn],cg->fgfs[dtSfy_rhs->sgfn],cg->fgfs[dtSfz_rhs->sgfn],cg->fgfs[rho->sgfn],cg->fgfs[Sx->sgfn],cg->fgfs[Sy->sgfn],cg->fgfs[Sz->sgfn],cg->fgfs[Sxx->sgfn],cg->fgfs[Sxy->sgfn],cg->fgfs[Sxz->sgfn],cg->fgfs[Syy->sgfn],cg->fgfs[Syz->sgfn],cg->fgfs[Szz->sgfn],cg->fgfs[Gamxxx->sgfn],cg->fgfs[Gamxxy->sgfn],cg->fgfs[Gamxxz->sgfn],cg->fgfs[Gamxyy->sgfn],cg->fgfs[Gamxyz->sgfn],cg->fgfs[Gamxzz->sgfn],cg->fgfs[Gamyxx->sgfn],cg->fgfs[Gamyxy->sgfn],cg->fgfs[Gamyxz->sgfn],cg->fgfs[Gamyyy->sgfn],cg->fgfs[Gamyyz->sgfn],cg->fgfs[Gamyzz->sgfn],cg->fgfs[Gamzxx->sgfn],cg->fgfs[Gamzxy->sgfn],cg->fgfs[Gamzxz->sgfn],cg->fgfs[Gamzyy->sgfn],cg->fgfs[Gamzyz->sgfn],cg->fgfs[Gamzzz->sgfn],cg->fgfs[Rxx->sgfn],cg->fgfs[Rxy->sgfn],cg->fgfs[Rxz->sgfn],cg->fgfs[Ryy->sgfn],cg->fgfs[Ryz->sgfn],cg->fgfs[Rzz->sgfn],cg->fgfs[Cons_Ham->sgfn],cg->fgfs[Cons_Px->sgfn],cg->fgfs[Cons_Py->sgfn],cg->fgfs[Cons_Pz->sgfn],cg->fgfs[Cons_Gx->sgfn],cg->fgfs[Cons_Gy->sgfn],cg->fgfs[Cons_Gz->sgfn],Symmetry,lev,ndeps,pre
|
||||
|
||||
|
||||
Reference in New Issue
Block a user