Add mixed GPU RP path for EScalar
This commit is contained in:
@@ -7565,6 +7565,78 @@ int bssn_cuda_unpack_state_batch_from_host_buffer_for_host_views(void *block_tag
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int bssn_cuda_restrict_state_batch_to_host_buffer_for_host_views(void *block_tag,
|
||||
double **state_host_key,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int sx, int sy, int sz,
|
||||
int fi0, int fj0, int fk0,
|
||||
const double *state_soa)
|
||||
{
|
||||
init_gpu_dispatch();
|
||||
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
|
||||
if (state_count <= 0 || state_count > BSSN_STATE_COUNT) return 1;
|
||||
if (!host_buffer || sx <= 0 || sy <= 0 || sz <= 0) return 1;
|
||||
StepContext &ctx = ensure_step_ctx(block_tag, (size_t)ex[0] * ex[1] * ex[2]);
|
||||
const size_t all = (size_t)ex[0] * ex[1] * ex[2];
|
||||
const int bank = active_or_keyed_bank(ctx, state_host_key, all, false);
|
||||
if (bank < 0 || !ctx.resident_valid[bank]) return 1;
|
||||
const int region_all = sx * sy * sz;
|
||||
const size_t total_doubles = (size_t)state_count * (size_t)region_all;
|
||||
double *d_comm = ensure_step_comm_buffer(ctx, total_doubles);
|
||||
upload_comm_state_soa(state_soa, state_count);
|
||||
dim3 launch_grid((unsigned int)grid((size_t)region_all),
|
||||
(unsigned int)state_count);
|
||||
kern_restrict_state_region_batch<<<launch_grid, BLK>>>(
|
||||
ctx.d_resident_mem[bank], d_comm,
|
||||
ex[0], ex[1], sx, sy, sz,
|
||||
fi0, fj0, fk0, region_all, state_count,
|
||||
ex[0] * ex[1] * ex[2]);
|
||||
CUDA_CHECK(cudaMemcpy(host_buffer, d_comm,
|
||||
total_doubles * sizeof(double),
|
||||
cudaMemcpyDeviceToHost));
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int bssn_cuda_prolong_state_batch_to_host_buffer_for_host_views(void *block_tag,
|
||||
double **state_host_key,
|
||||
int state_count,
|
||||
double *host_buffer,
|
||||
int *ex,
|
||||
int sx, int sy, int sz,
|
||||
int ii0, int jj0, int kk0,
|
||||
int lbc_i, int lbc_j, int lbc_k,
|
||||
const double *state_soa)
|
||||
{
|
||||
init_gpu_dispatch();
|
||||
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
|
||||
if (state_count <= 0 || state_count > BSSN_STATE_COUNT) return 1;
|
||||
if (!host_buffer || sx <= 0 || sy <= 0 || sz <= 0) return 1;
|
||||
StepContext &ctx = ensure_step_ctx(block_tag, (size_t)ex[0] * ex[1] * ex[2]);
|
||||
const size_t all = (size_t)ex[0] * ex[1] * ex[2];
|
||||
const int bank = active_or_keyed_bank(ctx, state_host_key, all, false);
|
||||
if (bank < 0 || !ctx.resident_valid[bank]) return 1;
|
||||
const int region_all = sx * sy * sz;
|
||||
const size_t total_doubles = (size_t)state_count * (size_t)region_all;
|
||||
double *d_comm = ensure_step_comm_buffer(ctx, total_doubles);
|
||||
upload_comm_state_soa(state_soa, state_count);
|
||||
dim3 launch_grid((unsigned int)grid((size_t)region_all),
|
||||
(unsigned int)state_count);
|
||||
kern_prolong_state_region_batch<<<launch_grid, BLK>>>(
|
||||
ctx.d_resident_mem[bank], d_comm,
|
||||
ex[0], ex[1], sx, sy, sz,
|
||||
ii0, jj0, kk0, lbc_i, lbc_j, lbc_k,
|
||||
region_all, state_count,
|
||||
ex[0] * ex[1] * ex[2]);
|
||||
CUDA_CHECK(cudaMemcpy(host_buffer, d_comm,
|
||||
total_doubles * sizeof(double),
|
||||
cudaMemcpyDeviceToHost));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void copy_state_device_batch(void *block_tag,
|
||||
int state_count,
|
||||
double *device_buffer,
|
||||
|
||||
Reference in New Issue
Block a user