From e1a0bff43c53dfa6d456698da6294cdac4a5fdd4 Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Thu, 9 Apr 2026 21:20:45 +0800 Subject: [PATCH] Reduce redundant GPU host buffer preparation --- AMSS_NCKU_source/bssn_cuda_ops.cu | 15 +++++++++++---- AMSS_NCKU_source/bssn_cuda_step.C | 20 -------------------- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/AMSS_NCKU_source/bssn_cuda_ops.cu b/AMSS_NCKU_source/bssn_cuda_ops.cu index 45fb588..be62fed 100644 --- a/AMSS_NCKU_source/bssn_cuda_ops.cu +++ b/AMSS_NCKU_source/bssn_cuda_ops.cu @@ -787,12 +787,19 @@ int bssn_cuda_rk4_boundary_var(int *ex, double dT, const bool refresh_rhs = (rk_stage == 0) || !cache.rhs_resident || cache.host_rhs != rhs_accum; double *stage_ptr = nullptr; + const double *mapped_state0_ptr = refresh_state0 ? bssn_gpu_find_device_buffer(state0) : cache.state0.ptr; + const double *mapped_boundary_ptr = need_boundary_input ? bssn_gpu_find_device_buffer(boundary_src) : nullptr; const double *mapped_stage_ptr = need_stage_input ? bssn_gpu_find_device_buffer(stage_data) : nullptr; + const double *mapped_rhs_ptr = refresh_rhs ? bssn_gpu_find_device_buffer(rhs_accum) : cache.rhs.ptr; - bssn_gpu_prepare_host_buffer(state0, n); - if (need_boundary_input) bssn_gpu_prepare_host_buffer(boundary_src, n); - if (need_stage_input) bssn_gpu_prepare_host_buffer(stage_data, n); - bssn_gpu_prepare_host_buffer(rhs_accum, n); + if (refresh_state0 && !mapped_state0_ptr) + bssn_gpu_prepare_host_buffer(state0, n); + if (need_boundary_input && !mapped_boundary_ptr) + bssn_gpu_prepare_host_buffer(boundary_src, n); + if (need_stage_input && !mapped_stage_ptr) + bssn_gpu_prepare_host_buffer(stage_data, n); + if (refresh_rhs && !mapped_rhs_ptr) + bssn_gpu_prepare_host_buffer(rhs_accum, n); ok = ok && (!refresh_state0 || copy_to_device_preferring_device(cache.state0, state0, bytes)) && diff --git a/AMSS_NCKU_source/bssn_cuda_step.C b/AMSS_NCKU_source/bssn_cuda_step.C index 51f488b..c5ea4c6 100644 --- a/AMSS_NCKU_source/bssn_cuda_step.C +++ b/AMSS_NCKU_source/bssn_cuda_step.C @@ -124,25 +124,6 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN) } }; - auto stage_upload_var_list = - [&](Block *cg, MyList *var_list) { - const int n = cg->shape[0] * cg->shape[1] * cg->shape[2]; - while (var_list) - { - if (bssn_gpu_stage_upload_buffer(cg->fgfs[var_list->data->sgfn], n)) - { - cerr << "GPU state upload failure: lev=" << lev - << " var=" << var_list->data->name - << " bbox=(" << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - break; - } - var_list = var_list->next; - } - }; - auto ensure_stage_device_var_list = [&](Block *cg, MyList *var_list) { const int n = cg->shape[0] * cg->shape[1] * cg->shape[2]; @@ -340,7 +321,6 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN) Block *cg = BP->data; if (myrank == cg->rank) { - stage_upload_var_list(cg, StateList); if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_FIRST_TIME)) ERROR = 1;