Stabilize GPU buffer lifecycle around regrid
This commit is contained in:
@@ -135,7 +135,7 @@ struct GpuRhsCache
|
||||
const double *last_y = nullptr;
|
||||
const double *last_z = nullptr;
|
||||
bool meta_uploaded = false;
|
||||
static const int max_mapped_buffers = 128;
|
||||
static const int max_mapped_buffers = 512;
|
||||
const double *host_buffers[max_mapped_buffers] = {nullptr};
|
||||
const double *device_buffers[max_mapped_buffers] = {nullptr};
|
||||
int mapped_buffer_count = 0;
|
||||
@@ -143,7 +143,7 @@ struct GpuRhsCache
|
||||
|
||||
struct ExternalBufferRegistry
|
||||
{
|
||||
static const int max_mapped_buffers = 256;
|
||||
static const int max_mapped_buffers = 4096;
|
||||
const double *host_buffers[max_mapped_buffers] = {nullptr};
|
||||
const double *device_buffers[max_mapped_buffers] = {nullptr};
|
||||
int mapped_buffer_count = 0;
|
||||
@@ -151,7 +151,7 @@ struct ExternalBufferRegistry
|
||||
|
||||
struct OwnedBufferRegistry
|
||||
{
|
||||
static const int max_mapped_buffers = 256;
|
||||
static const int max_mapped_buffers = 4096;
|
||||
const double *host_buffers[max_mapped_buffers] = {nullptr};
|
||||
double *device_buffers[max_mapped_buffers] = {nullptr};
|
||||
size_t capacities[max_mapped_buffers] = {0};
|
||||
@@ -223,7 +223,11 @@ void map_buffer(GpuRhsCache &cache, const double *host_ptr, const double *device
|
||||
}
|
||||
|
||||
if (cache.mapped_buffer_count >= GpuRhsCache::max_mapped_buffers)
|
||||
{
|
||||
cerr << "gpu RHS buffer registry exhausted at " << GpuRhsCache::max_mapped_buffers
|
||||
<< " entries" << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
cache.host_buffers[cache.mapped_buffer_count] = host_ptr;
|
||||
cache.device_buffers[cache.mapped_buffer_count] = device_ptr;
|
||||
@@ -255,7 +259,11 @@ void map_external_buffer(ExternalBufferRegistry ®istry, const double *host_pt
|
||||
}
|
||||
|
||||
if (registry.mapped_buffer_count >= ExternalBufferRegistry::max_mapped_buffers)
|
||||
{
|
||||
cerr << "external CUDA buffer registry exhausted at "
|
||||
<< ExternalBufferRegistry::max_mapped_buffers << " entries" << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
registry.host_buffers[registry.mapped_buffer_count] = host_ptr;
|
||||
registry.device_buffers[registry.mapped_buffer_count] = device_ptr;
|
||||
@@ -421,6 +429,7 @@ void ensure_host_buffer_registered(const double *host_ptr, size_t bytes)
|
||||
return;
|
||||
}
|
||||
|
||||
cerr << "cudaHostRegister failed: " << cudaGetErrorString(err) << endl;
|
||||
registry.failed[slot] = true;
|
||||
registry.capacities[slot] = bytes;
|
||||
}
|
||||
@@ -932,6 +941,25 @@ void bssn_gpu_clear_cached_device_buffers()
|
||||
invalidate_owned_buffer_map(owned_buffer_registry());
|
||||
}
|
||||
|
||||
void bssn_gpu_release_pinned_host_buffers()
|
||||
{
|
||||
PinnedHostRegistry &pinned = pinned_host_registry();
|
||||
for (int i = 0; i < pinned.buffer_count; ++i)
|
||||
{
|
||||
if (pinned.registered[i] && pinned.host_buffers[i])
|
||||
{
|
||||
cudaError_t unreg_err = cudaHostUnregister(const_cast<double *>(pinned.host_buffers[i]));
|
||||
if (unreg_err != cudaSuccess && unreg_err != cudaErrorHostMemoryNotRegistered)
|
||||
cerr << "cudaHostUnregister failed: " << cudaGetErrorString(unreg_err) << endl;
|
||||
}
|
||||
pinned.host_buffers[i] = nullptr;
|
||||
pinned.capacities[i] = 0;
|
||||
pinned.registered[i] = false;
|
||||
pinned.failed[i] = false;
|
||||
}
|
||||
pinned.buffer_count = 0;
|
||||
}
|
||||
|
||||
void bssn_gpu_register_device_buffer(const double *host_ptr, const double *device_ptr)
|
||||
{
|
||||
map_external_buffer(external_buffer_registry(), host_ptr, device_ptr);
|
||||
|
||||
Reference in New Issue
Block a user