Refactor CUDA step buffers to remove loop-time allocations
This commit is contained in:
@@ -3626,8 +3626,8 @@ int bssn_cuda_rk4_substep(void *block_tag,
|
|||||||
} else {
|
} else {
|
||||||
download_state_outputs(state_host_out, all);
|
download_state_outputs(state_host_out, all);
|
||||||
}
|
}
|
||||||
if (RK4 == 3 && !use_resident_state) {
|
if (RK4 == 3) {
|
||||||
release_step_ctx(block_tag);
|
ctx.matter_ready = false; /* invalidate matter cache for next timestep */
|
||||||
}
|
}
|
||||||
if (profile) {
|
if (profile) {
|
||||||
cuda_profile_sync();
|
cuda_profile_sync();
|
||||||
|
|||||||
Reference in New Issue
Block a user