Reduce redundant GPU host buffer preparation

This commit is contained in:
2026-04-09 21:20:45 +08:00
parent cf3c6d6218
commit e1a0bff43c
2 changed files with 11 additions and 24 deletions

View File

@@ -787,12 +787,19 @@ int bssn_cuda_rk4_boundary_var(int *ex, double dT,
const bool refresh_rhs =
(rk_stage == 0) || !cache.rhs_resident || cache.host_rhs != rhs_accum;
double *stage_ptr = nullptr;
const double *mapped_state0_ptr = refresh_state0 ? bssn_gpu_find_device_buffer(state0) : cache.state0.ptr;
const double *mapped_boundary_ptr = need_boundary_input ? bssn_gpu_find_device_buffer(boundary_src) : nullptr;
const double *mapped_stage_ptr = need_stage_input ? bssn_gpu_find_device_buffer(stage_data) : nullptr;
const double *mapped_rhs_ptr = refresh_rhs ? bssn_gpu_find_device_buffer(rhs_accum) : cache.rhs.ptr;
bssn_gpu_prepare_host_buffer(state0, n);
if (need_boundary_input) bssn_gpu_prepare_host_buffer(boundary_src, n);
if (need_stage_input) bssn_gpu_prepare_host_buffer(stage_data, n);
bssn_gpu_prepare_host_buffer(rhs_accum, n);
if (refresh_state0 && !mapped_state0_ptr)
bssn_gpu_prepare_host_buffer(state0, n);
if (need_boundary_input && !mapped_boundary_ptr)
bssn_gpu_prepare_host_buffer(boundary_src, n);
if (need_stage_input && !mapped_stage_ptr)
bssn_gpu_prepare_host_buffer(stage_data, n);
if (refresh_rhs && !mapped_rhs_ptr)
bssn_gpu_prepare_host_buffer(rhs_accum, n);
ok = ok &&
(!refresh_state0 || copy_to_device_preferring_device(cache.state0, state0, bytes)) &&