Pack sync send buffers directly from GPU state
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
#include "fmisc.h"
|
||||
#include "prolongrestrict.h"
|
||||
#include "bssn_cuda_ops.h"
|
||||
#include "bssn_gpu.h"
|
||||
#include "misc.h"
|
||||
#include "parameters.h"
|
||||
#include <cstring>
|
||||
@@ -43,6 +44,85 @@ struct ParallelTransferContextGuard
|
||||
}
|
||||
};
|
||||
|
||||
bool parallel_can_gpu_pack_segments(MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst,
|
||||
int rank_in, MyList<var> *VarLists, MyList<var> *VarListd)
|
||||
{
|
||||
int myrank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
if (!src || !dst)
|
||||
return false;
|
||||
|
||||
if (src->data->Bg->lev != dst->data->Bg->lev)
|
||||
return false;
|
||||
|
||||
while (src && dst)
|
||||
{
|
||||
if ((dst->data->Bg->rank == rank_in) && (src->data->Bg->rank == myrank))
|
||||
{
|
||||
MyList<var> *varls = VarLists;
|
||||
MyList<var> *varld = VarListd;
|
||||
while (varls && varld)
|
||||
{
|
||||
(void)varld;
|
||||
if (!bssn_gpu_find_device_buffer(src->data->Bg->fgfs[varls->data->sgfn]))
|
||||
return false;
|
||||
varls = varls->next;
|
||||
varld = varld->next;
|
||||
}
|
||||
if (varls || varld)
|
||||
return false;
|
||||
}
|
||||
src = src->next;
|
||||
dst = dst->next;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parallel_gpu_pack_segments(double *data,
|
||||
MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst,
|
||||
int rank_in, MyList<var> *VarLists, MyList<var> *VarListd)
|
||||
{
|
||||
if (!data)
|
||||
return false;
|
||||
|
||||
int myrank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
if (!src || !dst)
|
||||
return false;
|
||||
if (src->data->Bg->lev != dst->data->Bg->lev)
|
||||
return false;
|
||||
|
||||
int size_out = 0;
|
||||
while (src && dst)
|
||||
{
|
||||
if ((dst->data->Bg->rank == rank_in) && (src->data->Bg->rank == myrank))
|
||||
{
|
||||
MyList<var> *varls = VarLists;
|
||||
MyList<var> *varld = VarListd;
|
||||
while (varls && varld)
|
||||
{
|
||||
(void)varld;
|
||||
if (bssn_gpu_stage_download_region_to_buffer(src->data->Bg->fgfs[varls->data->sgfn],
|
||||
src->data->Bg->shape,
|
||||
src->data->Bg->bbox,
|
||||
src->data->Bg->bbox + dim,
|
||||
dst->data->shape,
|
||||
dst->data->llb,
|
||||
data + size_out))
|
||||
return false;
|
||||
size_out += dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2];
|
||||
varls = varls->next;
|
||||
varld = varld->next;
|
||||
}
|
||||
if (varls || varld)
|
||||
return false;
|
||||
}
|
||||
src = src->next;
|
||||
dst = dst->next;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void parallel_report_mpi_error(const char *context, int errcode, int req_no)
|
||||
{
|
||||
char errstr[MPI_MAX_ERROR_STRING];
|
||||
@@ -4843,27 +4923,30 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
||||
|
||||
for (int node = 0; node < cpusize; node++)
|
||||
{
|
||||
if (node == myrank)
|
||||
{
|
||||
int length;
|
||||
if (node == myrank)
|
||||
{
|
||||
int length;
|
||||
if (!cache.lengths_valid) {
|
||||
length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||
cache.recv_lengths[node] = length;
|
||||
} else {
|
||||
length = cache.recv_lengths[node];
|
||||
}
|
||||
if (length > 0)
|
||||
{
|
||||
if (length > cache.recv_buf_caps[node])
|
||||
{
|
||||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||||
cache.recv_bufs[node] = new double[length];
|
||||
cache.recv_buf_caps[node] = length;
|
||||
}
|
||||
data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||
}
|
||||
}
|
||||
else
|
||||
if (length > 0)
|
||||
{
|
||||
if (length > cache.recv_buf_caps[node])
|
||||
{
|
||||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||||
cache.recv_bufs[node] = new double[length];
|
||||
cache.recv_buf_caps[node] = length;
|
||||
}
|
||||
bssn_gpu_prepare_host_buffer(cache.recv_bufs[node], length);
|
||||
if (!parallel_can_gpu_pack_segments(src[myrank], dst[myrank], node, VarList, VarList) ||
|
||||
!parallel_gpu_pack_segments(cache.recv_bufs[node], src[myrank], dst[myrank], node, VarList, VarList))
|
||||
data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int slength;
|
||||
if (!cache.lengths_valid) {
|
||||
@@ -4872,17 +4955,20 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
||||
} else {
|
||||
slength = cache.send_lengths[node];
|
||||
}
|
||||
if (slength > 0)
|
||||
{
|
||||
if (slength > cache.send_buf_caps[node])
|
||||
{
|
||||
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
|
||||
cache.send_bufs[node] = new double[slength];
|
||||
cache.send_buf_caps[node] = slength;
|
||||
}
|
||||
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||
state.req_node[state.req_no] = node;
|
||||
state.req_is_recv[state.req_no] = 0;
|
||||
if (slength > 0)
|
||||
{
|
||||
if (slength > cache.send_buf_caps[node])
|
||||
{
|
||||
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
|
||||
cache.send_bufs[node] = new double[slength];
|
||||
cache.send_buf_caps[node] = slength;
|
||||
}
|
||||
bssn_gpu_prepare_host_buffer(cache.send_bufs[node], slength);
|
||||
if (!parallel_can_gpu_pack_segments(src[myrank], dst[myrank], node, VarList, VarList) ||
|
||||
!parallel_gpu_pack_segments(cache.send_bufs[node], src[myrank], dst[myrank], node, VarList, VarList))
|
||||
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||
state.req_node[state.req_no] = node;
|
||||
state.req_is_recv[state.req_no] = 0;
|
||||
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, state.mpi_tag, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
||||
}
|
||||
int rlength;
|
||||
|
||||
Reference in New Issue
Block a user