Download staged GPU sync regions incrementally
This commit is contained in:
@@ -914,6 +914,73 @@ int bssn_gpu_stage_upload_region(const double *host_ptr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bssn_gpu_stage_download_region(double *host_ptr,
|
||||
const int *full_shape,
|
||||
const double *full_llb,
|
||||
const double *full_uub,
|
||||
const int *region_shape,
|
||||
const double *region_llb)
|
||||
{
|
||||
if (!host_ptr || !full_shape || !full_llb || !full_uub || !region_shape || !region_llb)
|
||||
return 1;
|
||||
|
||||
const double *device_ptr = bssn_gpu_find_device_buffer(host_ptr);
|
||||
if (!device_ptr)
|
||||
return 1;
|
||||
|
||||
int start[3] = {0, 0, 0};
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
if (full_shape[i] <= 0 || region_shape[i] <= 0)
|
||||
return 1;
|
||||
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
const double dx = (full_uub[i] - full_llb[i]) / static_cast<double>(full_shape[i] - 1);
|
||||
start[i] = static_cast<int>((region_llb[i] - full_llb[i]) / dx + 0.4);
|
||||
#else
|
||||
#ifdef Cell
|
||||
const double dx = (full_uub[i] - full_llb[i]) / static_cast<double>(full_shape[i]);
|
||||
start[i] = static_cast<int>((region_llb[i] - full_llb[i]) / dx + 0.4);
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (start[i] < 0 || start[i] + region_shape[i] > full_shape[i])
|
||||
return 1;
|
||||
}
|
||||
|
||||
cudaMemcpy3DParms parms = {};
|
||||
parms.srcPtr = make_cudaPitchedPtr(const_cast<double *>(device_ptr),
|
||||
static_cast<size_t>(full_shape[0]) * sizeof(double),
|
||||
static_cast<size_t>(full_shape[0]),
|
||||
static_cast<size_t>(full_shape[1]));
|
||||
parms.dstPtr = make_cudaPitchedPtr(host_ptr,
|
||||
static_cast<size_t>(full_shape[0]) * sizeof(double),
|
||||
static_cast<size_t>(full_shape[0]),
|
||||
static_cast<size_t>(full_shape[1]));
|
||||
parms.srcPos = make_cudaPos(static_cast<size_t>(start[0]) * sizeof(double),
|
||||
static_cast<size_t>(start[1]),
|
||||
static_cast<size_t>(start[2]));
|
||||
parms.dstPos = parms.srcPos;
|
||||
parms.extent = make_cudaExtent(static_cast<size_t>(region_shape[0]) * sizeof(double),
|
||||
static_cast<size_t>(region_shape[1]),
|
||||
static_cast<size_t>(region_shape[2]));
|
||||
parms.kind = cudaMemcpyDeviceToHost;
|
||||
|
||||
cudaError_t err = cudaMemcpy3D(&parms);
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
cerr << "cudaMemcpy3D(D2H region) failed: " << cudaGetErrorString(err) << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__global__ void test_const_address(double * testd){
|
||||
int _t = blockIdx.x*blockDim.x+threadIdx.x;
|
||||
if(_t == 0)
|
||||
|
||||
Reference in New Issue
Block a user