Compare commits
2 Commits
chb-twopun
...
yx-mpi
| Author | SHA1 | Date | |
|---|---|---|---|
| b32675ba99 | |||
| 93362baee5 |
@@ -4,6 +4,8 @@
|
|||||||
#include "prolongrestrict.h"
|
#include "prolongrestrict.h"
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "parameters.h"
|
#include "parameters.h"
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
|
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
|
||||||
{
|
{
|
||||||
@@ -72,14 +74,14 @@ int Parallel::partition3(int *nxyz, int split_size, int *min_width, int cpusize,
|
|||||||
int n;
|
int n;
|
||||||
|
|
||||||
block_size = shape[0] * shape[1] * shape[2];
|
block_size = shape[0] * shape[1] * shape[2];
|
||||||
n = Mymax(1, (block_size + split_size / 2) / split_size);
|
n = Mymax(1, (block_size + split_size / 2) / split_size);
|
||||||
|
|
||||||
maxnx = Mymax(1, shape[0] / min_width[0]);
|
maxnx = Mymax(1, shape[0] / min_width[0]);
|
||||||
maxnx = Mymin(cpusize, maxnx);
|
maxnx = Mymin(cpusize, maxnx);
|
||||||
maxny = Mymax(1, shape[1] / min_width[1]);
|
maxny = Mymax(1, shape[1] / min_width[1]);
|
||||||
maxny = Mymin(cpusize, maxny);
|
maxny = Mymin(cpusize, maxny);
|
||||||
maxnz = Mymax(1, shape[2] / min_width[2]);
|
maxnz = Mymax(1, shape[2] / min_width[2]);
|
||||||
maxnz = Mymin(cpusize, maxnz);
|
maxnz = Mymin(cpusize, maxnz);
|
||||||
fx = (double)shape[0] / (shape[0] + shape[1] + shape[2]);
|
fx = (double)shape[0] / (shape[0] + shape[1] + shape[2]);
|
||||||
fy = (double)shape[1] / (shape[0] + shape[1] + shape[2]);
|
fy = (double)shape[1] / (shape[0] + shape[1] + shape[2]);
|
||||||
fz = (double)shape[2] / (shape[0] + shape[1] + shape[2]);
|
fz = (double)shape[2] / (shape[0] + shape[1] + shape[2]);
|
||||||
@@ -352,14 +354,73 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
|
|||||||
split_size = Mymax(min_size, block_size / nodes);
|
split_size = Mymax(min_size, block_size / nodes);
|
||||||
split_size = Mymax(1, split_size);
|
split_size = Mymax(1, split_size);
|
||||||
|
|
||||||
int n_rank = 0;
|
// Pass 1: compute block volumes for greedy rank assignment
|
||||||
|
std::vector<long> block_volumes;
|
||||||
PLi = PatchLIST;
|
PLi = PatchLIST;
|
||||||
int reacpu = 0;
|
int reacpu = 0;
|
||||||
while (PLi)
|
while (PLi)
|
||||||
{
|
{
|
||||||
Patch *PP = PLi->data;
|
Patch *PP = PLi->data;
|
||||||
|
|
||||||
reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
|
reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
|
||||||
|
int ibbox_here[2 * dim];
|
||||||
|
for (int i = 0; i < nxyz[0]; i++)
|
||||||
|
for (int j = 0; j < nxyz[1]; j++)
|
||||||
|
for (int k = 0; k < nxyz[2]; k++)
|
||||||
|
{
|
||||||
|
ibbox_here[0] = (PP->shape[0] * i) / nxyz[0];
|
||||||
|
ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||||||
|
ibbox_here[1] = (PP->shape[1] * j) / nxyz[1];
|
||||||
|
ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||||||
|
ibbox_here[2] = (PP->shape[2] * k) / nxyz[2];
|
||||||
|
ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||||||
|
if (periodic)
|
||||||
|
{
|
||||||
|
for (int d = 0; d < dim; d++) { ibbox_here[d] -= ghost_width; ibbox_here[dim + d] += ghost_width; }
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width);
|
||||||
|
ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width);
|
||||||
|
ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width);
|
||||||
|
ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width);
|
||||||
|
ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width);
|
||||||
|
ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width);
|
||||||
|
}
|
||||||
|
long vol = 1;
|
||||||
|
for (int d = 0; d < dim; d++)
|
||||||
|
vol *= (ibbox_here[dim + d] - ibbox_here[d] + 1);
|
||||||
|
block_volumes.push_back(vol);
|
||||||
|
}
|
||||||
|
PLi = PLi->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Greedy LPT: sort by volume descending, assign each to least-loaded rank
|
||||||
|
std::vector<int> assigned_ranks(block_volumes.size());
|
||||||
|
{
|
||||||
|
std::vector<int> order(block_volumes.size());
|
||||||
|
for (int i = 0; i < (int)order.size(); i++) order[i] = i;
|
||||||
|
std::sort(order.begin(), order.end(), [&](int a, int b) {
|
||||||
|
return block_volumes[a] > block_volumes[b];
|
||||||
|
});
|
||||||
|
std::vector<long> load(cpusize, 0);
|
||||||
|
for (int idx : order)
|
||||||
|
{
|
||||||
|
int min_r = 0;
|
||||||
|
for (int r = 1; r < cpusize; r++)
|
||||||
|
if (load[r] < load[min_r]) min_r = r;
|
||||||
|
assigned_ranks[idx] = min_r;
|
||||||
|
load[min_r] += block_volumes[idx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass 2: create blocks with pre-assigned ranks
|
||||||
|
int block_idx = 0;
|
||||||
|
PLi = PatchLIST;
|
||||||
|
while (PLi)
|
||||||
|
{
|
||||||
|
Patch *PP = PLi->data;
|
||||||
|
|
||||||
|
partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
|
||||||
|
|
||||||
Block *ng0, *ng;
|
Block *ng0, *ng;
|
||||||
int shape_here[dim], ibbox_here[2 * dim];
|
int shape_here[dim], ibbox_here[2 * dim];
|
||||||
@@ -443,10 +504,7 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
|
|||||||
int shape_res[dim * pices];
|
int shape_res[dim * pices];
|
||||||
double bbox_res[2 * dim * pices];
|
double bbox_res[2 * dim * pices];
|
||||||
misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width);
|
misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width);
|
||||||
ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks
|
ng = ng0 = new Block(dim, shape_res, bbox_res, assigned_ranks[block_idx++], ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks
|
||||||
|
|
||||||
// if(n_rank==cpusize) {n_rank=0; cerr<<"place one!!"<<endl;}
|
|
||||||
|
|
||||||
// ng->checkBlock();
|
// ng->checkBlock();
|
||||||
if (BlL)
|
if (BlL)
|
||||||
BlL->insert(ng);
|
BlL->insert(ng);
|
||||||
@@ -455,22 +513,19 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
|
|||||||
|
|
||||||
for (int i = 1; i < pices; i++)
|
for (int i = 1; i < pices; i++)
|
||||||
{
|
{
|
||||||
ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks
|
ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, assigned_ranks[block_idx++], ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks
|
||||||
// if(n_rank==cpusize) {n_rank=0; cerr<<"place two!! "<<i<<endl;}
|
|
||||||
// ng->checkBlock();
|
// ng->checkBlock();
|
||||||
BlL->insert(ng);
|
BlL->insert(ng);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks
|
ng = ng0 = new Block(dim, shape_here, bbox_here, assigned_ranks[block_idx++], ingfsi, fngfsi, PP->lev); // delete through KillBlocks
|
||||||
// ng->checkBlock();
|
// ng->checkBlock();
|
||||||
if (BlL)
|
if (BlL)
|
||||||
BlL->insert(ng);
|
BlL->insert(ng);
|
||||||
else
|
else
|
||||||
BlL = new MyList<Block>(ng); // delete through KillBlocks
|
BlL = new MyList<Block>(ng); // delete through KillBlocks
|
||||||
#endif
|
#endif
|
||||||
if (n_rank == cpusize)
|
|
||||||
n_rank = 0;
|
|
||||||
|
|
||||||
// set PP->blb
|
// set PP->blb
|
||||||
if (i == 0 && j == 0 && k == 0)
|
if (i == 0 && j == 0 && k == 0)
|
||||||
@@ -3504,7 +3559,7 @@ int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyLis
|
|||||||
|
|
||||||
return size_out;
|
return size_out;
|
||||||
}
|
}
|
||||||
//
|
|
||||||
void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
|
void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
||||||
int Symmetry)
|
int Symmetry)
|
||||||
@@ -3512,13 +3567,20 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
|
|||||||
int myrank, cpusize;
|
int myrank, cpusize;
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
/*
|
||||||
|
// Early exit: if no gridseg pairs exist for any node, skip all work
|
||||||
|
{
|
||||||
|
bool has_segs = false;
|
||||||
|
for (int n = 0; n < cpusize; n++) {
|
||||||
|
if (src[n] && dst[n]) { has_segs = true; break; }
|
||||||
|
}
|
||||||
|
if (!has_segs) return;
|
||||||
|
}
|
||||||
|
*/
|
||||||
int node;
|
int node;
|
||||||
|
|
||||||
MPI_Request *reqs;
|
MPI_Request *reqs = new MPI_Request[2 * cpusize];
|
||||||
MPI_Status *stats;
|
MPI_Status *stats = new MPI_Status[2 * cpusize];
|
||||||
reqs = new MPI_Request[2 * cpusize];
|
|
||||||
stats = new MPI_Status[2 * cpusize];
|
|
||||||
int req_no = 0;
|
int req_no = 0;
|
||||||
|
|
||||||
double **send_data, **rec_data;
|
double **send_data, **rec_data;
|
||||||
@@ -3527,49 +3589,41 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
|
|||||||
int length;
|
int length;
|
||||||
|
|
||||||
for (node = 0; node < cpusize; node++)
|
for (node = 0; node < cpusize; node++)
|
||||||
{
|
|
||||||
send_data[node] = rec_data[node] = 0;
|
send_data[node] = rec_data[node] = 0;
|
||||||
|
|
||||||
|
// 第1步: 本地拷贝 + 所有 Irecv
|
||||||
|
for (node = 0; node < cpusize; node++)
|
||||||
|
{
|
||||||
if (node == myrank)
|
if (node == myrank)
|
||||||
{
|
{
|
||||||
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
||||||
{
|
{
|
||||||
rec_data[node] = new double[length];
|
rec_data[node] = new double[length];
|
||||||
if (!rec_data[node])
|
|
||||||
{
|
|
||||||
cout << "out of memory when new in short transfer, place 1" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// send from this cpu to cpu#node
|
|
||||||
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
|
||||||
{
|
|
||||||
send_data[node] = new double[length];
|
|
||||||
if (!send_data[node])
|
|
||||||
{
|
|
||||||
cout << "out of memory when new in short transfer, place 2" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
|
||||||
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
|
||||||
}
|
|
||||||
// receive from cpu#node to this cpu
|
|
||||||
if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
|
if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
|
||||||
{
|
{
|
||||||
rec_data[node] = new double[length];
|
rec_data[node] = new double[length];
|
||||||
if (!rec_data[node])
|
|
||||||
{
|
|
||||||
cout << "out of memory when new in short transfer, place 3" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// wait for all requests to complete
|
|
||||||
|
// 第2步: pack + Isend
|
||||||
|
for (node = 0; node < cpusize; node++)
|
||||||
|
{
|
||||||
|
if (node == myrank) continue;
|
||||||
|
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
||||||
|
{
|
||||||
|
send_data[node] = new double[length];
|
||||||
|
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||||||
|
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
MPI_Waitall(req_no, reqs, stats);
|
MPI_Waitall(req_no, reqs, stats);
|
||||||
|
|
||||||
for (node = 0; node < cpusize; node++)
|
for (node = 0; node < cpusize; node++)
|
||||||
|
|||||||
Reference in New Issue
Block a user