Broaden cached CUDA sync paths
This commit is contained in:
@@ -608,6 +608,24 @@ bool cuda_direct_pack_segment_to_device(double *buffer,
|
||||
MyList<var> *VarLists,
|
||||
int Symmetry)
|
||||
{
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
if (state_count == Z4C_CUDA_STATE_COUNT)
|
||||
{
|
||||
if (type != 1)
|
||||
return false;
|
||||
const double t0 = sync_profile_enabled() ? MPI_Wtime() : 0.0;
|
||||
const int i0 = cuda_seg_begin(dst, src->Bg, 0);
|
||||
const int j0 = cuda_seg_begin(dst, src->Bg, 1);
|
||||
const int k0 = cuda_seg_begin(dst, src->Bg, 2);
|
||||
const bool ok = z4c_cuda_pack_state_batch_to_device_buffer(
|
||||
src->Bg, state_count, buffer, src->Bg->shape,
|
||||
i0, j0, k0,
|
||||
dst->shape[0], dst->shape[1], dst->shape[2]) == 0;
|
||||
if (sync_profile_enabled())
|
||||
sync_profile_stats().direct_pack_sec += MPI_Wtime() - t0;
|
||||
return ok;
|
||||
}
|
||||
#endif
|
||||
#if USE_CUDA_BSSN
|
||||
if (state_count <= 0 || state_count > BSSN_CUDA_STATE_COUNT)
|
||||
return false;
|
||||
@@ -777,6 +795,22 @@ bool cuda_direct_unpack_segment_from_device(double *buffer,
|
||||
int state_count,
|
||||
MyList<var> *VarListd)
|
||||
{
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
if (state_count == Z4C_CUDA_STATE_COUNT)
|
||||
{
|
||||
const double t0 = sync_profile_enabled() ? MPI_Wtime() : 0.0;
|
||||
const int i0 = cuda_seg_begin(dst, dst->Bg, 0);
|
||||
const int j0 = cuda_seg_begin(dst, dst->Bg, 1);
|
||||
const int k0 = cuda_seg_begin(dst, dst->Bg, 2);
|
||||
const bool ok = z4c_cuda_unpack_state_batch_from_device_buffer(
|
||||
dst->Bg, state_count, buffer, dst->Bg->shape,
|
||||
i0, j0, k0,
|
||||
dst->shape[0], dst->shape[1], dst->shape[2]) == 0;
|
||||
if (sync_profile_enabled())
|
||||
sync_profile_stats().direct_unpack_sec += MPI_Wtime() - t0;
|
||||
return ok;
|
||||
}
|
||||
#endif
|
||||
#if USE_CUDA_BSSN
|
||||
if (state_count <= 0 || state_count > BSSN_CUDA_STATE_COUNT)
|
||||
return false;
|
||||
@@ -856,6 +890,10 @@ bool cuda_unpack_host_region_to_resident(Block *block,
|
||||
|
||||
bool cuda_device_state_count_supported(int state_count)
|
||||
{
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
if (state_count == Z4C_CUDA_STATE_COUNT)
|
||||
return true;
|
||||
#endif
|
||||
#if USE_CUDA_BSSN
|
||||
return state_count > 0 && state_count <= BSSN_CUDA_STATE_COUNT;
|
||||
#else
|
||||
@@ -933,6 +971,10 @@ int cuda_data_packer_device_batched(double *data,
|
||||
const int state_count = cuda_state_var_count(VarLists, VarListd);
|
||||
if (!cuda_device_state_count_supported(state_count))
|
||||
return -1;
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
if (state_count == Z4C_CUDA_STATE_COUNT)
|
||||
return -1;
|
||||
#endif
|
||||
|
||||
int size_out = 0;
|
||||
Block *batch_block = 0;
|
||||
|
||||
Reference in New Issue
Block a user