Save Z4C CUDA optimization progress

This commit is contained in:
2026-05-02 00:49:02 +08:00
parent 531b31e8db
commit 383e936e88
6 changed files with 343 additions and 66 deletions

View File

@@ -7788,7 +7788,7 @@ extern "C" int z4c_cuda_rk4_substep(void *block_tag,
}
double t0 = profile ? cuda_profile_now_ms() : 0.0;
if (!use_resident_state || RK4 == 0 || !ctx.state_ready) {
if (!use_resident_state || !ctx.state_ready) {
upload_state_inputs(state_host_in, all);
}
if (apply_enforce_ga) {
@@ -8117,6 +8117,35 @@ extern "C" int z4c_cuda_upload_state_subset(void *block_tag,
return 0;
}
extern "C" int z4c_cuda_compute_constraints_resident(void *block_tag,
int *ex, double *X, double *Y, double *Z,
int Symmetry, double eps, int co,
double **constraint_host_out)
{
using namespace z4c_cuda;
init_gpu_dispatch();
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
if (!block_tag || !ex || !constraint_host_out) return 1;
StepContext &ctx = ensure_step_ctx(block_tag, (size_t)ex[0] * ex[1] * ex[2]);
if (!ctx.state_ready) return 1;
setup_grid_params(ex, X, Y, Z, Symmetry, eps, co);
bind_state_input_slots(ctx.d_state_curr);
launch_z4c_rhs_pipeline((int)((size_t)ex[0] * ex[1] * ex[2]), eps);
download_constraint_outputs(constraint_host_out, (size_t)ex[0] * ex[1] * ex[2]);
return 0;
}
extern "C" int z4c_cuda_download_constraint_outputs(int *ex,
double **constraint_host_out)
{
using namespace z4c_cuda;
init_gpu_dispatch();
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
if (!ex || !constraint_host_out) return 1;
download_constraint_outputs(constraint_host_out, (size_t)ex[0] * ex[1] * ex[2]);
return 0;
}
extern "C" int z4c_cuda_has_resident_state(void *block_tag)
{
using namespace z4c_cuda;