Reduce GPU RK4 transfer overhead
This commit is contained in:
@@ -720,16 +720,14 @@ __global__ void enforce_ga(double * trA){
|
||||
}
|
||||
}
|
||||
|
||||
inline void sub_enforce_ga(int matrix_size){
|
||||
double * trA = M_ chin1;
|
||||
enforce_ga<<<GRID_DIM,BLOCK_DIM>>>(trA);
|
||||
cudaMemset(trA,0,matrix_size * sizeof(double));
|
||||
cudaThreadSynchronize();
|
||||
|
||||
//cudaMemset(Mh_ gupxx,0,matrix_size * sizeof(double));
|
||||
//trA gxx,gyy,gzz gupxx,gupxy,gupxz,gupyy,gupyz,gupzz
|
||||
|
||||
}
|
||||
inline void sub_enforce_ga(double *trA, int matrix_size){
|
||||
enforce_ga<<<GRID_DIM,BLOCK_DIM>>>(trA);
|
||||
cudaMemset(trA,0,matrix_size * sizeof(double));
|
||||
|
||||
//cudaMemset(Mh_ gupxx,0,matrix_size * sizeof(double));
|
||||
//trA gxx,gyy,gzz gupxx,gupxy,gupxz,gupyy,gupyz,gupzz
|
||||
|
||||
}
|
||||
__device__ volatile unsigned int global_count = 0;
|
||||
__global__ void test_init_matrix(){
|
||||
int tid = blockIdx.x*blockDim.x+threadIdx.x;
|
||||
@@ -2609,7 +2607,7 @@ void destroy_meta(Meta *meta)
|
||||
|
||||
}*/
|
||||
|
||||
int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y, double *Z,
|
||||
int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y, double *Z,
|
||||
double *chi, double * trK ,
|
||||
double *dxx , double * gxy ,double *gxz ,double * dyy,double *gyz,double *dzz,
|
||||
double *Axx , double *Axy , double * Axz , double * Ayy , double * Ayz , double * Azz,
|
||||
@@ -2652,6 +2650,7 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
if (!prepare_gpu_rhs_cache(cache, device, ex))
|
||||
return 1;
|
||||
Meta * meta = &cache.meta;
|
||||
const int effective_co = (calledby == CALLED_BY_STEP) ? 1 : co;
|
||||
|
||||
/*
|
||||
//#1--------------------init_gpu_meta(meta,matrix_size)---------------------------
|
||||
@@ -3067,74 +3066,6 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
{Mh_ Sx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Sy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Sz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gyy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gzz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ chix, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ chiy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ chiz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxxx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxyx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxzx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gyyx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gyzx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gzzx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxxy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxyy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxzy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gyyy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gyzy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gzzy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxxz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxyz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gxzz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gyyz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gyzz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gzzz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Lapx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Lapy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Lapz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betaxx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betaxy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betaxz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betayy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betayz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betazz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betayx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betazy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ betazx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Kx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Ky, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Kz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamxx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamxy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamxz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamyy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamyz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamzz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamyx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamzy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamzx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ div_beta, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ S, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ f, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ fxx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ fxy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ fxz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ fyy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ fyz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ fzz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gupxx, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gupxy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gupxz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gupyy, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gupyz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ gupzz, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamxa, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamya, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ Gamza, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ alpn1, static_cast<size_t>(matrix_size)},
|
||||
{Mh_ chin1, static_cast<size_t>(matrix_size)},
|
||||
};
|
||||
if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0])))
|
||||
return 1;
|
||||
@@ -3244,7 +3175,7 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
cudaMemcpyToSymbol(T_c,&T, sizeof(double));
|
||||
cudaMemcpyToSymbol(Symmetry_c,&Symmetry, sizeof(int));
|
||||
cudaMemcpyToSymbol(Lev_c,&Lev, sizeof(int));
|
||||
cudaMemcpyToSymbol(co_c,&co, sizeof(int));
|
||||
cudaMemcpyToSymbol(co_c,&effective_co, sizeof(int));
|
||||
cudaMemcpyToSymbol(eps_c,&eps, sizeof(double));
|
||||
|
||||
double dXh = X[1] - X[0];
|
||||
@@ -3341,9 +3272,9 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
/* int deviceCount; cudaGetDeviceCount(&deviceCount);
|
||||
cout<<"myrank is: "<<mpi_rank<<" deviceCount is:"<<deviceCount<<endl;
|
||||
*/
|
||||
//#4-----------------------calculate------------------------------
|
||||
//4.0------enforce_ga---------
|
||||
//sub_enforce_ga(matrix_size);
|
||||
//#4-----------------------calculate------------------------------
|
||||
//4.0------enforce_ga---------
|
||||
sub_enforce_ga(Mh_ chin1, matrix_size);
|
||||
//4.1-----compute rhs---------
|
||||
compute_rhs_bssn_part1<<<GRID_DIM,BLOCK_DIM>>>();
|
||||
|
||||
@@ -3456,7 +3387,7 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,double *X, double *Y,
|
||||
|
||||
}
|
||||
|
||||
if(co == 0){
|
||||
if(effective_co == 0){
|
||||
compute_rhs_bssn_part7<<<GRID_DIM,BLOCK_DIM>>>();
|
||||
|
||||
sub_fderivs(Mh_ Axx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz,sss);
|
||||
|
||||
Reference in New Issue
Block a user