修改transfer
This commit is contained in:
@@ -72,14 +72,14 @@ int Parallel::partition3(int *nxyz, int split_size, int *min_width, int cpusize,
|
|||||||
int n;
|
int n;
|
||||||
|
|
||||||
block_size = shape[0] * shape[1] * shape[2];
|
block_size = shape[0] * shape[1] * shape[2];
|
||||||
n = Mymax(1, (block_size + split_size / 2) / split_size);
|
n = Mymax(1, (block_size + split_size / 2) / split_size);
|
||||||
|
|
||||||
maxnx = Mymax(1, shape[0] / min_width[0]);
|
maxnx = Mymax(1, shape[0] / min_width[0]);
|
||||||
maxnx = Mymin(cpusize, maxnx);
|
maxnx = Mymin(cpusize, maxnx);
|
||||||
maxny = Mymax(1, shape[1] / min_width[1]);
|
maxny = Mymax(1, shape[1] / min_width[1]);
|
||||||
maxny = Mymin(cpusize, maxny);
|
maxny = Mymin(cpusize, maxny);
|
||||||
maxnz = Mymax(1, shape[2] / min_width[2]);
|
maxnz = Mymax(1, shape[2] / min_width[2]);
|
||||||
maxnz = Mymin(cpusize, maxnz);
|
maxnz = Mymin(cpusize, maxnz);
|
||||||
fx = (double)shape[0] / (shape[0] + shape[1] + shape[2]);
|
fx = (double)shape[0] / (shape[0] + shape[1] + shape[2]);
|
||||||
fy = (double)shape[1] / (shape[0] + shape[1] + shape[2]);
|
fy = (double)shape[1] / (shape[0] + shape[1] + shape[2]);
|
||||||
fz = (double)shape[2] / (shape[0] + shape[1] + shape[2]);
|
fz = (double)shape[2] / (shape[0] + shape[1] + shape[2]);
|
||||||
@@ -3504,7 +3504,7 @@ int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyLis
|
|||||||
|
|
||||||
return size_out;
|
return size_out;
|
||||||
}
|
}
|
||||||
//
|
|
||||||
void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
|
void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
||||||
int Symmetry)
|
int Symmetry)
|
||||||
@@ -3512,7 +3512,16 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
|
|||||||
int myrank, cpusize;
|
int myrank, cpusize;
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
/*
|
||||||
|
// Early exit: if no gridseg pairs exist for any node, skip all work
|
||||||
|
{
|
||||||
|
bool has_segs = false;
|
||||||
|
for (int n = 0; n < cpusize; n++) {
|
||||||
|
if (src[n] && dst[n]) { has_segs = true; break; }
|
||||||
|
}
|
||||||
|
if (!has_segs) return;
|
||||||
|
}
|
||||||
|
*/
|
||||||
int node;
|
int node;
|
||||||
|
|
||||||
MPI_Request *reqs;
|
MPI_Request *reqs;
|
||||||
@@ -3527,49 +3536,46 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
|
|||||||
int length;
|
int length;
|
||||||
|
|
||||||
for (node = 0; node < cpusize; node++)
|
for (node = 0; node < cpusize; node++)
|
||||||
{
|
|
||||||
send_data[node] = rec_data[node] = 0;
|
send_data[node] = rec_data[node] = 0;
|
||||||
|
|
||||||
|
// 第1遍: 本地拷贝 + 所有 Irecv(先 post 接收,减少 unexpected message buffering)
|
||||||
|
for (node = 0; node < cpusize; node++)
|
||||||
|
{
|
||||||
if (node == myrank)
|
if (node == myrank)
|
||||||
{
|
{
|
||||||
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
||||||
{
|
{
|
||||||
rec_data[node] = new double[length];
|
rec_data[node] = new double[length];
|
||||||
if (!rec_data[node])
|
|
||||||
{
|
|
||||||
cout << "out of memory when new in short transfer, place 1" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// send from this cpu to cpu#node
|
|
||||||
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
|
||||||
{
|
|
||||||
send_data[node] = new double[length];
|
|
||||||
if (!send_data[node])
|
|
||||||
{
|
|
||||||
cout << "out of memory when new in short transfer, place 2" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
|
||||||
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
|
||||||
}
|
|
||||||
// receive from cpu#node to this cpu
|
// receive from cpu#node to this cpu
|
||||||
if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
|
if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
|
||||||
{
|
{
|
||||||
rec_data[node] = new double[length];
|
rec_data[node] = new double[length];
|
||||||
if (!rec_data[node])
|
|
||||||
{
|
|
||||||
cout << "out of memory when new in short transfer, place 3" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// wait for all requests to complete
|
|
||||||
|
// 第2遍: 所有 Isend
|
||||||
|
for (node = 0; node < cpusize; node++)
|
||||||
|
{
|
||||||
|
if (node == myrank) continue;
|
||||||
|
// send from this cpu to cpu#node
|
||||||
|
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
||||||
|
{
|
||||||
|
send_data[node] = new double[length];
|
||||||
|
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||||||
|
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 一次性等待所有通信完成
|
||||||
|
//for (int i = 0; i < req_no; i++)
|
||||||
|
//MPI_Wait(&reqs[i], &stats[i]);
|
||||||
MPI_Waitall(req_no, reqs, stats);
|
MPI_Waitall(req_no, reqs, stats);
|
||||||
|
|
||||||
for (node = 0; node < cpusize; node++)
|
for (node = 0; node < cpusize; node++)
|
||||||
|
|||||||
Reference in New Issue
Block a user