Stabilize GPU buffer lifecycle around regrid

This commit is contained in:
2026-04-09 20:48:06 +08:00
parent 46e94d1248
commit cf3c6d6218
8 changed files with 186 additions and 81 deletions

View File

@@ -135,7 +135,7 @@ struct GpuRhsCache
const double *last_y = nullptr;
const double *last_z = nullptr;
bool meta_uploaded = false;
static const int max_mapped_buffers = 128;
static const int max_mapped_buffers = 512;
const double *host_buffers[max_mapped_buffers] = {nullptr};
const double *device_buffers[max_mapped_buffers] = {nullptr};
int mapped_buffer_count = 0;
@@ -143,7 +143,7 @@ struct GpuRhsCache
struct ExternalBufferRegistry
{
static const int max_mapped_buffers = 256;
static const int max_mapped_buffers = 4096;
const double *host_buffers[max_mapped_buffers] = {nullptr};
const double *device_buffers[max_mapped_buffers] = {nullptr};
int mapped_buffer_count = 0;
@@ -151,7 +151,7 @@ struct ExternalBufferRegistry
struct OwnedBufferRegistry
{
static const int max_mapped_buffers = 256;
static const int max_mapped_buffers = 4096;
const double *host_buffers[max_mapped_buffers] = {nullptr};
double *device_buffers[max_mapped_buffers] = {nullptr};
size_t capacities[max_mapped_buffers] = {0};
@@ -223,7 +223,11 @@ void map_buffer(GpuRhsCache &cache, const double *host_ptr, const double *device
}
if (cache.mapped_buffer_count >= GpuRhsCache::max_mapped_buffers)
{
cerr << "gpu RHS buffer registry exhausted at " << GpuRhsCache::max_mapped_buffers
<< " entries" << endl;
return;
}
cache.host_buffers[cache.mapped_buffer_count] = host_ptr;
cache.device_buffers[cache.mapped_buffer_count] = device_ptr;
@@ -255,7 +259,11 @@ void map_external_buffer(ExternalBufferRegistry &registry, const double *host_pt
}
if (registry.mapped_buffer_count >= ExternalBufferRegistry::max_mapped_buffers)
{
cerr << "external CUDA buffer registry exhausted at "
<< ExternalBufferRegistry::max_mapped_buffers << " entries" << endl;
return;
}
registry.host_buffers[registry.mapped_buffer_count] = host_ptr;
registry.device_buffers[registry.mapped_buffer_count] = device_ptr;
@@ -421,6 +429,7 @@ void ensure_host_buffer_registered(const double *host_ptr, size_t bytes)
return;
}
cerr << "cudaHostRegister failed: " << cudaGetErrorString(err) << endl;
registry.failed[slot] = true;
registry.capacities[slot] = bytes;
}
@@ -932,6 +941,25 @@ void bssn_gpu_clear_cached_device_buffers()
invalidate_owned_buffer_map(owned_buffer_registry());
}
void bssn_gpu_release_pinned_host_buffers()
{
PinnedHostRegistry &pinned = pinned_host_registry();
for (int i = 0; i < pinned.buffer_count; ++i)
{
if (pinned.registered[i] && pinned.host_buffers[i])
{
cudaError_t unreg_err = cudaHostUnregister(const_cast<double *>(pinned.host_buffers[i]));
if (unreg_err != cudaSuccess && unreg_err != cudaErrorHostMemoryNotRegistered)
cerr << "cudaHostUnregister failed: " << cudaGetErrorString(unreg_err) << endl;
}
pinned.host_buffers[i] = nullptr;
pinned.capacities[i] = 0;
pinned.registered[i] = false;
pinned.failed[i] = false;
}
pinned.buffer_count = 0;
}
void bssn_gpu_register_device_buffer(const double *host_ptr, const double *device_ptr)
{
map_external_buffer(external_buffer_registry(), host_ptr, device_ptr);