Reduce staged GPU host-device copies

This commit is contained in:
2026-04-09 16:44:08 +08:00
parent 42e851d19a
commit 5b00d49070
5 changed files with 349 additions and 49 deletions

View File

@@ -19,9 +19,11 @@ int bssn_cuda_rk4_boundary_var(int *ex, double dT,
const double SoA[3],
int symmetry,
int lev,
int rk_stage);
int rk_stage,
bool download_to_host = true);
int bssn_cuda_lowerbound(int *ex, double *chi, double tinny);
int bssn_cuda_lowerbound(int *ex, double *chi, double tinny, bool download_to_host = true);
int bssn_cuda_download_buffer(int *ex, double *host_ptr);
int bssn_cuda_prolong3_pack(int wei,
const double *llbc, const double *uubc, const int *extc, const double *func,