Optimize BSSN CUDA resident state and CUDA-aware MPI
This commit is contained in:
@@ -6,14 +6,68 @@
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <new>
|
||||
using namespace std;
|
||||
|
||||
#include "Block.h"
|
||||
#include "misc.h"
|
||||
|
||||
Block::Block(int DIM, int *shapei, double *bboxi, int ranki, int ingfsi, int fngfsi, int levi, const int cgpui) : rank(ranki), ingfs(ingfsi), fngfs(fngfsi), lev(levi), cgpu(cgpui)
|
||||
{
|
||||
#include <new>
|
||||
using namespace std;
|
||||
|
||||
#include "Block.h"
|
||||
#include "misc.h"
|
||||
|
||||
#if USE_CUDA_BSSN || USE_CUDA_Z4C
|
||||
#include <cuda_runtime_api.h>
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
bool cuda_pin_gridfuncs_enabled()
|
||||
{
|
||||
static int enabled = -1;
|
||||
if (enabled < 0)
|
||||
{
|
||||
const char *env = getenv("AMSS_CUDA_PIN_GRIDFUNCS");
|
||||
enabled = (env && atoi(env) != 0) ? 1 : 0;
|
||||
}
|
||||
return enabled != 0;
|
||||
}
|
||||
|
||||
double *alloc_gridfunc(size_t count, unsigned char &pinned)
|
||||
{
|
||||
pinned = 0;
|
||||
#if USE_CUDA_BSSN || USE_CUDA_Z4C
|
||||
if (cuda_pin_gridfuncs_enabled())
|
||||
{
|
||||
double *ptr = 0;
|
||||
cudaError_t err = cudaMallocHost((void **)&ptr, count * sizeof(double));
|
||||
if (err == cudaSuccess)
|
||||
{
|
||||
pinned = 1;
|
||||
return ptr;
|
||||
}
|
||||
cudaGetLastError();
|
||||
}
|
||||
#endif
|
||||
return (double *)malloc(sizeof(double) * count);
|
||||
}
|
||||
|
||||
void free_gridfunc(double *ptr, unsigned char pinned)
|
||||
{
|
||||
if (!ptr)
|
||||
return;
|
||||
#if USE_CUDA_BSSN || USE_CUDA_Z4C
|
||||
if (pinned)
|
||||
{
|
||||
cudaFreeHost(ptr);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
(void)pinned;
|
||||
#endif
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Block::Block(int DIM, int *shapei, double *bboxi, int ranki, int ingfsi, int fngfsi, int levi, const int cgpui) : rank(ranki), lev(levi), cgpu(cgpui), ingfs(ingfsi), fngfs(fngfsi), igfs(0), fgfs(0), fgfs_pinned(0)
|
||||
{
|
||||
for (int i = 0; i < dim; i++)
|
||||
X[i] = 0;
|
||||
|
||||
@@ -68,14 +122,15 @@ Block::Block(int DIM, int *shapei, double *bboxi, int ranki, int ingfsi, int fng
|
||||
#endif
|
||||
}
|
||||
|
||||
int nn = shape[0] * shape[1] * shape[2];
|
||||
fgfs = new double *[fngfs];
|
||||
for (int i = 0; i < fngfs; i++)
|
||||
{
|
||||
fgfs[i] = (double *)malloc(sizeof(double) * nn);
|
||||
if (!(fgfs[i]))
|
||||
{
|
||||
cout << "on node#" << rank << ", out of memory when constructing Block." << endl;
|
||||
int nn = shape[0] * shape[1] * shape[2];
|
||||
fgfs = new double *[fngfs];
|
||||
fgfs_pinned = new unsigned char[fngfs];
|
||||
for (int i = 0; i < fngfs; i++)
|
||||
{
|
||||
fgfs[i] = alloc_gridfunc((size_t)nn, fgfs_pinned[i]);
|
||||
if (!(fgfs[i]))
|
||||
{
|
||||
cout << "on node#" << rank << ", out of memory when constructing Block." << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
memset(fgfs[i], 0, sizeof(double) * nn);
|
||||
@@ -103,17 +158,19 @@ Block::~Block()
|
||||
{
|
||||
for (int i = 0; i < dim; i++)
|
||||
delete[] X[i];
|
||||
for (int i = 0; i < ingfs; i++)
|
||||
free(igfs[i]);
|
||||
delete[] igfs;
|
||||
for (int i = 0; i < fngfs; i++)
|
||||
free(fgfs[i]);
|
||||
delete[] fgfs;
|
||||
X[0] = X[1] = X[2] = 0;
|
||||
igfs = 0;
|
||||
fgfs = 0;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < ingfs; i++)
|
||||
free(igfs[i]);
|
||||
delete[] igfs;
|
||||
for (int i = 0; i < fngfs; i++)
|
||||
free_gridfunc(fgfs[i], fgfs_pinned ? fgfs_pinned[i] : 0);
|
||||
delete[] fgfs;
|
||||
delete[] fgfs_pinned;
|
||||
X[0] = X[1] = X[2] = 0;
|
||||
igfs = 0;
|
||||
fgfs = 0;
|
||||
fgfs_pinned = 0;
|
||||
}
|
||||
}
|
||||
void Block::checkBlock()
|
||||
{
|
||||
int myrank;
|
||||
@@ -184,12 +241,14 @@ void Block::swapList(MyList<var> *VarList1, MyList<var> *VarList2, int myrank)
|
||||
if (rank == myrank)
|
||||
{
|
||||
MyList<var> *varl1 = VarList1, *varl2 = VarList2;
|
||||
while (varl1 && varl2)
|
||||
{
|
||||
misc::swap<double *>(fgfs[varl1->data->sgfn], fgfs[varl2->data->sgfn]);
|
||||
varl1 = varl1->next;
|
||||
varl2 = varl2->next;
|
||||
}
|
||||
while (varl1 && varl2)
|
||||
{
|
||||
misc::swap<double *>(fgfs[varl1->data->sgfn], fgfs[varl2->data->sgfn]);
|
||||
if (fgfs_pinned)
|
||||
misc::swap<unsigned char>(fgfs_pinned[varl1->data->sgfn], fgfs_pinned[varl2->data->sgfn]);
|
||||
varl1 = varl1->next;
|
||||
varl2 = varl2->next;
|
||||
}
|
||||
if (varl1 || varl2)
|
||||
{
|
||||
cout << "error in Block::swaplist, var lists does not match." << endl;
|
||||
|
||||
Reference in New Issue
Block a user