7248 lines
213 KiB
C
7248 lines
213 KiB
C
|
||
#include "Parallel.h"
|
||
#include "fmisc.h"
|
||
#include "prolongrestrict.h"
|
||
#include "misc.h"
|
||
#include "parameters.h"
|
||
#include <set>
|
||
|
||
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
|
||
{
|
||
nx = Mymax(1, shape / min_width);
|
||
nx = Mymin(cpusize, nx);
|
||
|
||
return nx;
|
||
}
|
||
int Parallel::partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape) // special for 2 diemnsions
|
||
{
|
||
#define SEARCH_SIZE 5
|
||
int i, j, nx, ny;
|
||
int maxnx, maxny;
|
||
int mnx, mny;
|
||
int dn, hmin_width, cmin_width;
|
||
int cnx, cny;
|
||
double fx, fy;
|
||
int block_size;
|
||
int n;
|
||
|
||
block_size = shape[0] * shape[1];
|
||
n = Mymax(1, (block_size + split_size / 2) / split_size);
|
||
|
||
maxnx = Mymax(1, shape[0] / min_width[0]);
|
||
maxnx = Mymin(cpusize, maxnx);
|
||
maxny = Mymax(1, shape[1] / min_width[1]);
|
||
maxny = Mymin(cpusize, maxny);
|
||
fx = (double)shape[0] / (shape[0] + shape[1]);
|
||
fy = (double)shape[1] / (shape[0] + shape[1]);
|
||
nx = mnx = Mymax(1, Mymin(maxnx, (int)(sqrt(double(n)) * fx / fy)));
|
||
ny = mny = Mymax(1, Mymin(maxny, (int)(sqrt(double(n)) * fy / fx)));
|
||
dn = abs(n - nx * ny);
|
||
hmin_width = Mymin(shape[0] / nx, shape[1] / ny);
|
||
for (cny = Mymax(1, mny - SEARCH_SIZE); cny <= (Mymin(mny + SEARCH_SIZE, maxny)); cny++)
|
||
for (cnx = Mymax(1, mnx - SEARCH_SIZE); cnx <= (Mymin(mnx + SEARCH_SIZE, maxnx)); cnx++)
|
||
{
|
||
cmin_width = Mymin(shape[0] / cnx, shape[1] / cny);
|
||
if (dn > abs(n - cnx * cny) || (dn == abs(n - cnx * cny) && cmin_width > hmin_width))
|
||
{
|
||
dn = abs(n - cnx * cny);
|
||
nx = cnx;
|
||
ny = cny;
|
||
hmin_width = cmin_width;
|
||
}
|
||
}
|
||
|
||
nxy[0] = nx;
|
||
nxy[1] = ny;
|
||
|
||
return nx * ny;
|
||
#undef SEARCH_SIZE
|
||
}
|
||
int Parallel::partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape) // special for 3 diemnsions
|
||
#if 1 // algrithsm from Pretorius
|
||
{
|
||
// cout<<split_size<<endl<<min_width[0]<<endl<<min_width[1]<<endl<<min_width[2]<<endl
|
||
// <<shape[0]<<endl<<shape[1]<<endl<<shape[2]<<endl<<cpusize<<endl;
|
||
#define SEARCH_SIZE 5
|
||
int i, j, k, nx, ny, nz;
|
||
int maxnx, maxny, maxnz;
|
||
int mnx, mny, mnz;
|
||
int dn, hmin_width, cmin_width;
|
||
int cnx, cny, cnz;
|
||
double fx, fy, fz, max_fxfy, max_fxfz, max_fyfz;
|
||
int block_size;
|
||
int n;
|
||
|
||
block_size = shape[0] * shape[1] * shape[2];
|
||
n = Mymax(1, (block_size + split_size / 2) / split_size);
|
||
|
||
maxnx = Mymax(1, shape[0] / min_width[0]);
|
||
maxnx = Mymin(cpusize, maxnx);
|
||
maxny = Mymax(1, shape[1] / min_width[1]);
|
||
maxny = Mymin(cpusize, maxny);
|
||
maxnz = Mymax(1, shape[2] / min_width[2]);
|
||
maxnz = Mymin(cpusize, maxnz);
|
||
fx = (double)shape[0] / (shape[0] + shape[1] + shape[2]);
|
||
fy = (double)shape[1] / (shape[0] + shape[1] + shape[2]);
|
||
fz = (double)shape[2] / (shape[0] + shape[1] + shape[2]);
|
||
max_fxfy = Mymax(fx, fy);
|
||
max_fxfz = Mymax(fx, fz);
|
||
max_fyfz = Mymax(fy, fz);
|
||
nx = mnx = Mymax(1, Mymin(maxnx, (int)(pow(n, 1.0 / 3.0) * fx / max_fyfz)));
|
||
ny = mny = Mymax(1, Mymin(maxny, (int)(pow(n, 1.0 / 3.0) * fy / max_fxfz)));
|
||
nz = mnz = Mymax(1, Mymin(maxnz, (int)(pow(n, 1.0 / 3.0) * fz / max_fxfy)));
|
||
dn = abs(n - nx * ny * nz);
|
||
hmin_width = Mymin(shape[2] / nz, shape[1] / ny);
|
||
hmin_width = Mymin(hmin_width, shape[0] / nx);
|
||
for (cnz = Mymax(1, mnz - SEARCH_SIZE); cnz <= (Mymin(mnz + SEARCH_SIZE, maxnz)); cnz++)
|
||
for (cny = Mymax(1, mny - SEARCH_SIZE); cny <= (Mymin(mny + SEARCH_SIZE, maxny)); cny++)
|
||
for (cnx = Mymax(1, mnx - SEARCH_SIZE); cnx <= (Mymin(mnx + SEARCH_SIZE, maxnx)); cnx++)
|
||
{
|
||
cmin_width = Mymin(shape[2] / cnz, shape[1] / cny);
|
||
cmin_width = Mymin(cmin_width, shape[0] / cnx);
|
||
if (dn > abs(n - cnx * cny * cnz) || (dn == abs(n - cnx * cny * cnz) && cmin_width > hmin_width))
|
||
{
|
||
dn = abs(n - cnx * cny * cnz);
|
||
nx = cnx;
|
||
ny = cny;
|
||
nz = cnz;
|
||
hmin_width = cmin_width;
|
||
}
|
||
}
|
||
|
||
nxyz[0] = nx;
|
||
nxyz[1] = ny;
|
||
nxyz[2] = nz;
|
||
|
||
return nx * ny * nz;
|
||
#undef SEARCH_SIZE
|
||
}
|
||
#elif 0 // Zhihui's idea one on 2013-09-25
|
||
{
|
||
int nx, ny, nz;
|
||
int hmin_width;
|
||
hmin_width = Mymin(min_width[0], min_width[1]);
|
||
hmin_width = Mymin(hmin_width, min_width[2]);
|
||
nx = shape[0] / hmin_width;
|
||
if (nx * hmin_width < shape[0])
|
||
nx++;
|
||
ny = shape[1] / hmin_width;
|
||
if (ny * hmin_width < shape[1])
|
||
ny++;
|
||
nz = shape[2] / hmin_width;
|
||
if (nz * hmin_width < shape[2])
|
||
nz++;
|
||
while (nx * ny * nz > cpusize)
|
||
{
|
||
hmin_width++;
|
||
nx = shape[0] / hmin_width;
|
||
if (nx * hmin_width < shape[0])
|
||
nx++;
|
||
ny = shape[1] / hmin_width;
|
||
if (ny * hmin_width < shape[1])
|
||
ny++;
|
||
nz = shape[2] / hmin_width;
|
||
if (nz * hmin_width < shape[2])
|
||
nz++;
|
||
}
|
||
|
||
nxyz[0] = nx;
|
||
nxyz[1] = ny;
|
||
nxyz[2] = nz;
|
||
|
||
return nx * ny * nz;
|
||
}
|
||
#elif 0 // Zhihui's idea two on 2013-09-25
|
||
{
|
||
int nx, ny, nz;
|
||
const int hmin_width = 8; // for example we use 8
|
||
nx = shape[0] / hmin_width;
|
||
if (nx * hmin_width < shape[0])
|
||
nx++;
|
||
ny = shape[1] / hmin_width;
|
||
if (ny * hmin_width < shape[1])
|
||
ny++;
|
||
nz = shape[2] / hmin_width;
|
||
if (nz * hmin_width < shape[2])
|
||
nz++;
|
||
|
||
nxyz[0] = nx;
|
||
nxyz[1] = ny;
|
||
nxyz[2] = nz;
|
||
|
||
return nx * ny * nz;
|
||
}
|
||
#endif
|
||
// distribute the data to cprocessors
|
||
#if (PSTR == 0)
|
||
MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||
bool periodic, int nodes)
|
||
{
|
||
#ifdef USE_GPU_DIVIDE
|
||
double cpu_part, gpu_part;
|
||
map<string, double>::iterator iter;
|
||
iter = parameters::dou_par.find("cpu part");
|
||
if (iter != parameters::dou_par.end())
|
||
{
|
||
cpu_part = iter->second;
|
||
}
|
||
else
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good() && myrank == 0)
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "cpu part")
|
||
cpu_part = atof(sval.c_str());
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::dou_par.insert(map<string, double>::value_type("cpu part", cpu_part));
|
||
}
|
||
iter = parameters::dou_par.find("gpu part");
|
||
if (iter != parameters::dou_par.end())
|
||
{
|
||
gpu_part = iter->second;
|
||
}
|
||
else
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good() && myrank == 0)
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "gpu part")
|
||
gpu_part = atof(sval.c_str());
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::dou_par.insert(map<string, double>::value_type("gpu part", gpu_part));
|
||
}
|
||
|
||
if (nodes == 0)
|
||
nodes = cpusize / 2;
|
||
#else
|
||
if (nodes == 0)
|
||
nodes = cpusize;
|
||
#endif
|
||
|
||
if (dim != 3)
|
||
{
|
||
cout << "distrivute: now we only support 3-dimension" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
MyList<Block> *BlL = 0;
|
||
|
||
int split_size, min_size, block_size = 0;
|
||
|
||
int min_width = 2 * Mymax(ghost_width, buffer_width);
|
||
int nxyz[dim], mmin_width[dim], min_shape[dim];
|
||
|
||
MyList<Patch> *PLi = PatchLIST;
|
||
for (int i = 0; i < dim; i++)
|
||
min_shape[i] = PLi->data->shape[i];
|
||
int lev = PLi->data->lev;
|
||
PLi = PLi->next;
|
||
while (PLi)
|
||
{
|
||
Patch *PP = PLi->data;
|
||
for (int i = 0; i < dim; i++)
|
||
min_shape[i] = Mymin(min_shape[i], PP->shape[i]);
|
||
if (lev != PLi->data->lev)
|
||
cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl;
|
||
PLi = PLi->next;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
mmin_width[i] = Mymin(min_width, min_shape[i]);
|
||
|
||
min_size = mmin_width[0];
|
||
for (int i = 1; i < dim; i++)
|
||
min_size = min_size * mmin_width[i];
|
||
|
||
PLi = PatchLIST;
|
||
while (PLi)
|
||
{
|
||
Patch *PP = PLi->data;
|
||
// PP->checkPatch(true);
|
||
int bs = PP->shape[0];
|
||
for (int i = 1; i < dim; i++)
|
||
bs = bs * PP->shape[i];
|
||
block_size = block_size + bs;
|
||
PLi = PLi->next;
|
||
}
|
||
split_size = Mymax(min_size, block_size / nodes);
|
||
split_size = Mymax(1, split_size);
|
||
|
||
int n_rank = 0;
|
||
PLi = PatchLIST;
|
||
int reacpu = 0;
|
||
while (PLi)
|
||
{
|
||
Patch *PP = PLi->data;
|
||
|
||
reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
|
||
|
||
Block *ng0, *ng;
|
||
int shape_here[dim], ibbox_here[2 * dim];
|
||
double bbox_here[2 * dim], dd;
|
||
|
||
// ibbox : 0,...N-1
|
||
for (int i = 0; i < nxyz[0]; i++)
|
||
for (int j = 0; j < nxyz[1]; j++)
|
||
for (int k = 0; k < nxyz[2]; k++)
|
||
{
|
||
ibbox_here[0] = (PP->shape[0] * i) / nxyz[0];
|
||
ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||
ibbox_here[1] = (PP->shape[1] * j) / nxyz[1];
|
||
ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||
ibbox_here[2] = (PP->shape[2] * k) / nxyz[2];
|
||
ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||
|
||
if (periodic)
|
||
{
|
||
ibbox_here[0] = ibbox_here[0] - ghost_width;
|
||
ibbox_here[3] = ibbox_here[3] + ghost_width;
|
||
ibbox_here[1] = ibbox_here[1] - ghost_width;
|
||
ibbox_here[4] = ibbox_here[4] + ghost_width;
|
||
ibbox_here[2] = ibbox_here[2] - ghost_width;
|
||
ibbox_here[5] = ibbox_here[5] + ghost_width;
|
||
}
|
||
else
|
||
{
|
||
ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width);
|
||
ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width);
|
||
ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width);
|
||
ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width);
|
||
ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width);
|
||
ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width);
|
||
}
|
||
|
||
shape_here[0] = ibbox_here[3] - ibbox_here[0] + 1;
|
||
shape_here[1] = ibbox_here[4] - ibbox_here[1] + 1;
|
||
shape_here[2] = ibbox_here[5] - ibbox_here[2] + 1;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// 0--4, 5--10
|
||
dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1);
|
||
bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd;
|
||
bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd;
|
||
|
||
dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1);
|
||
bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd;
|
||
bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd;
|
||
|
||
dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1);
|
||
bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd;
|
||
bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd;
|
||
#else
|
||
#ifdef Cell
|
||
// 0--5, 5--10
|
||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||
bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd;
|
||
bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd;
|
||
|
||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||
bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd;
|
||
bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd;
|
||
|
||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||
bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd;
|
||
bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd;
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
|
||
#ifdef USE_GPU_DIVIDE
|
||
{
|
||
const int pices = 2;
|
||
double picef[pices];
|
||
picef[0] = cpu_part;
|
||
picef[1] = gpu_part;
|
||
int shape_res[dim * pices];
|
||
double bbox_res[2 * dim * pices];
|
||
misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width);
|
||
ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks
|
||
|
||
// if(n_rank==cpusize) {n_rank=0; cerr<<"place one!!"<<endl;}
|
||
|
||
// ng->checkBlock();
|
||
if (BlL)
|
||
BlL->insert(ng);
|
||
else
|
||
BlL = new MyList<Block>(ng); // delete through KillBlocks
|
||
|
||
for (int i = 1; i < pices; i++)
|
||
{
|
||
ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks
|
||
// if(n_rank==cpusize) {n_rank=0; cerr<<"place two!! "<<i<<endl;}
|
||
// ng->checkBlock();
|
||
BlL->insert(ng);
|
||
}
|
||
}
|
||
#else
|
||
ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks
|
||
// ng->checkBlock();
|
||
if (BlL)
|
||
BlL->insert(ng);
|
||
else
|
||
BlL = new MyList<Block>(ng); // delete through KillBlocks
|
||
#endif
|
||
if (n_rank == cpusize)
|
||
n_rank = 0;
|
||
|
||
// set PP->blb
|
||
if (i == 0 && j == 0 && k == 0)
|
||
{
|
||
MyList<Block> *Bp = BlL;
|
||
while (Bp->data != ng0)
|
||
Bp = Bp->next; // ng0 is the first of the pices list
|
||
PP->blb = Bp;
|
||
}
|
||
}
|
||
// set PP->ble
|
||
{
|
||
MyList<Block> *Bp = BlL;
|
||
while (Bp->data != ng)
|
||
Bp = Bp->next; // ng is the last of the pices list
|
||
PP->ble = Bp;
|
||
}
|
||
PLi = PLi->next;
|
||
}
|
||
if (reacpu < nodes * 2 / 3)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
if (myrank == 0)
|
||
cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl;
|
||
}
|
||
|
||
return BlL;
|
||
}
|
||
MyList<Block> *Parallel::distribute_hard(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||
bool periodic, int nodes)
|
||
{
|
||
#ifdef USE_GPU_DIVIDE
|
||
double cpu_part, gpu_part;
|
||
map<string, double>::iterator iter;
|
||
iter = parameters::dou_par.find("cpu part");
|
||
if (iter != parameters::dou_par.end())
|
||
{
|
||
cpu_part = iter->second;
|
||
}
|
||
else
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good() && myrank == 0)
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "cpu part")
|
||
cpu_part = atof(sval.c_str());
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::dou_par.insert(map<string, double>::value_type("cpu part", cpu_part));
|
||
}
|
||
iter = parameters::dou_par.find("gpu part");
|
||
if (iter != parameters::dou_par.end())
|
||
{
|
||
gpu_part = iter->second;
|
||
}
|
||
else
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good() && myrank == 0)
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "gpu part")
|
||
gpu_part = atof(sval.c_str());
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::dou_par.insert(map<string, double>::value_type("gpu part", gpu_part));
|
||
}
|
||
|
||
if (nodes == 0)
|
||
nodes = cpusize / 2;
|
||
#else
|
||
if (nodes == 0)
|
||
nodes = cpusize;
|
||
#endif
|
||
|
||
if (dim != 3)
|
||
{
|
||
cout << "distrivute: now we only support 3-dimension" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
MyList<Block> *BlL = 0;
|
||
int split_size, min_size, block_size = 0;
|
||
|
||
int min_width = 2 * Mymax(ghost_width, buffer_width);
|
||
int nxyz[dim], mmin_width[dim], min_shape[dim];
|
||
|
||
MyList<Patch> *PLi = PatchLIST;
|
||
for (int i = 0; i < dim; i++)
|
||
min_shape[i] = PLi->data->shape[i];
|
||
int lev = PLi->data->lev;
|
||
PLi = PLi->next;
|
||
while (PLi)
|
||
{
|
||
Patch *PP = PLi->data;
|
||
for (int i = 0; i < dim; i++)
|
||
min_shape[i] = Mymin(min_shape[i], PP->shape[i]);
|
||
if (lev != PLi->data->lev)
|
||
cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl;
|
||
PLi = PLi->next;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
mmin_width[i] = Mymin(min_width, min_shape[i]);
|
||
|
||
min_size = mmin_width[0];
|
||
for (int i = 1; i < dim; i++)
|
||
min_size = min_size * mmin_width[i];
|
||
|
||
PLi = PatchLIST;
|
||
while (PLi)
|
||
{
|
||
Patch *PP = PLi->data;
|
||
// PP->checkPatch(true);
|
||
int bs = PP->shape[0];
|
||
for (int i = 1; i < dim; i++)
|
||
bs = bs * PP->shape[i];
|
||
block_size = block_size + bs;
|
||
PLi = PLi->next;
|
||
}
|
||
split_size = Mymax(min_size, block_size / nodes);
|
||
split_size = Mymax(1, split_size);
|
||
|
||
int n_rank = 0;
|
||
PLi = PatchLIST;
|
||
int reacpu = 0;
|
||
int current_block_id = 0;
|
||
while (PLi) {
|
||
Block *ng0, *ng;
|
||
bool first_block_in_patch = true;
|
||
Patch *PP = PLi->data;
|
||
reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
|
||
|
||
for (int i = 0; i < nxyz[0]; i++)
|
||
for (int j = 0; j < nxyz[1]; j++)
|
||
for (int k = 0; k < nxyz[2]; k++)
|
||
{
|
||
// --- 1. 定义局部变量 ---
|
||
int ibbox_here[6], shape_here[3];
|
||
double bbox_here[6], dd;
|
||
Block *current_ng_start = nullptr; // 本次循环产生的第一个(或唯一一个)块
|
||
|
||
// --- 2. 核心逻辑分支 ---
|
||
if (current_block_id == 27 || current_block_id == 28 ||
|
||
current_block_id == 35 || current_block_id == 36)
|
||
{
|
||
// A. 计算原始索引 (不带 Ghost)
|
||
int ib0 = (PP->shape[0] * i) / nxyz[0];
|
||
int ib3 = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||
int jb1 = (PP->shape[1] * j) / nxyz[1];
|
||
int jb4 = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||
int kb2 = (PP->shape[2] * k) / nxyz[2];
|
||
int kb5 = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||
|
||
int r_1, r_2, r_3, r_4;
|
||
Block * split_first_block = nullptr;
|
||
Block * split_last_block = nullptr;
|
||
if(current_block_id == 27)
|
||
{
|
||
r_1 = 24; r_2 = 25; r_3 = 26; r_4 = 27;
|
||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||
PP, r_1,r_2,r_3, r_4, ingfsi, fngfsi, periodic,split_first_block,split_last_block);
|
||
}
|
||
|
||
else if(current_block_id == 28)
|
||
{
|
||
r_1 = 28; r_2 = 29;
|
||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||
PP, r_1, r_2, ingfsi, fngfsi, periodic,split_first_block,split_last_block);
|
||
}
|
||
else if(current_block_id == 35)
|
||
{
|
||
r_1 = 34; r_2 = 35;
|
||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||
PP, r_1, r_2, ingfsi, fngfsi, periodic,split_first_block,split_last_block);
|
||
}
|
||
else
|
||
{
|
||
r_1 = 36; r_2 = 37; r_3 = 38; r_4 = 39;
|
||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||
PP, r_1,r_2,r_3, r_4, ingfsi, fngfsi, periodic,split_first_block,split_last_block);
|
||
}
|
||
|
||
current_ng_start = split_first_block;
|
||
ng = split_last_block;
|
||
}
|
||
else
|
||
{
|
||
// B. 普通块逻辑 (含 Ghost 扩张)
|
||
ibbox_here[0] = (PP->shape[0] * i) / nxyz[0];
|
||
ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||
ibbox_here[1] = (PP->shape[1] * j) / nxyz[1];
|
||
ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||
ibbox_here[2] = (PP->shape[2] * k) / nxyz[2];
|
||
ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||
|
||
if (periodic) {
|
||
for(int d=0; d<3; d++) {
|
||
ibbox_here[d] -= ghost_width;
|
||
ibbox_here[d+3] += ghost_width;
|
||
}
|
||
} else {
|
||
ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width);
|
||
ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width);
|
||
ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width);
|
||
ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width);
|
||
ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width);
|
||
ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width);
|
||
}
|
||
|
||
for(int d=0; d<3; d++) shape_here[d] = ibbox_here[d+3] - ibbox_here[d] + 1;
|
||
|
||
// 物理坐标计算 (根据你的宏定义 Cell/Vertex)
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// 0--4, 5--10
|
||
dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1);
|
||
bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd;
|
||
bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd;
|
||
|
||
dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1);
|
||
bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd;
|
||
bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd;
|
||
|
||
dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1);
|
||
bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd;
|
||
bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd;
|
||
#else
|
||
#ifdef Cell
|
||
// 0--5, 5--10
|
||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||
bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd;
|
||
bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd;
|
||
|
||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||
bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd;
|
||
bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd;
|
||
|
||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||
bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd;
|
||
bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd;
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
ng = createMappedBlock(BlL, dim, shape_here, bbox_here, current_block_id, ingfsi, fngfsi, PP->lev);
|
||
current_ng_start = ng;
|
||
}
|
||
|
||
// --- 3. 统一处理 Patch 起始 Block 指针 ---
|
||
if (first_block_in_patch) {
|
||
ng0 = current_ng_start;
|
||
|
||
// 立即设置 PP->blb,避免后续循环覆盖 ng0
|
||
MyList<Block> *Bp_start = BlL;
|
||
while (Bp_start && Bp_start->data != ng0) Bp_start = Bp_start->next;
|
||
PP->blb = Bp_start;
|
||
|
||
first_block_in_patch = false;
|
||
}
|
||
|
||
current_block_id++;
|
||
}
|
||
|
||
// --- 4. 设置 Patch 结束 Block 指针 ---
|
||
MyList<Block> *Bp_end = BlL;
|
||
while (Bp_end && Bp_end->data != ng) Bp_end = Bp_end->next;
|
||
PP->ble = Bp_end;
|
||
|
||
PLi = PLi->next;
|
||
first_block_in_patch = true;
|
||
}
|
||
if (reacpu < nodes * 2 / 3)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
if (myrank == 0)
|
||
cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl;
|
||
}
|
||
|
||
return BlL;
|
||
}
|
||
|
||
|
||
|
||
/**
|
||
* @brief 将当前 Block 几何4等分并存入列表
|
||
*/
|
||
|
||
|
||
Block* Parallel::splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||
int ib0_orig, int ib3_orig,
|
||
int jb1_orig, int jb4_orig,
|
||
int kb2_orig, int kb5_orig,
|
||
Patch* PP, int r_1, int r_2, int r_3, int r_4,
|
||
int ingfsi, int fngfsi, bool periodic,
|
||
Block* &split_first_block, Block* &split_last_block)
|
||
{
|
||
// 1. 计算四分索引区间
|
||
// 计算 X 方向的总网格数
|
||
int total_len = ib3_orig - ib0_orig + 1;
|
||
|
||
// 计算三个切分点,确保覆盖 [ib0_orig, ib3_orig]
|
||
// 段 1: [ib0_orig, m1]
|
||
// 段 2: [m1 + 1, m2]
|
||
// 段 3: [m2 + 1, m3]
|
||
// 段 4: [m3 + 1, ib3_orig]
|
||
int m1 = ib0_orig + total_len / 4 - 1;
|
||
int m2 = ib0_orig + total_len / 2 - 1;
|
||
int m3 = ib0_orig + (3 * total_len) / 4 - 1;
|
||
|
||
int indices[4][6] = {
|
||
{ib0_orig, jb1_orig, kb2_orig, m1, jb4_orig, kb5_orig}, // 子块 1
|
||
{m1 + 1, jb1_orig, kb2_orig, m2, jb4_orig, kb5_orig}, // 子块 2
|
||
{m2 + 1, jb1_orig, kb2_orig, m3, jb4_orig, kb5_orig}, // 子块 3
|
||
{m3 + 1, jb1_orig, kb2_orig, ib3_orig, jb4_orig, kb5_orig} // 子块 4
|
||
};
|
||
|
||
int target_ranks[4] = {r_1, r_2, r_3, r_4};
|
||
|
||
// 2. 内部处理逻辑 (保持原有的 Ghost 扩张和物理坐标转换)
|
||
auto createSubBlock = [&](int* ib_raw, int target_rank) {
|
||
int ib_final[6];
|
||
int sh_here[3];
|
||
double bb_here[6], dd;
|
||
|
||
// --- 逻辑 A: Ghost 扩张 ---
|
||
if (periodic) {
|
||
ib_final[0] = ib_raw[0] - ghost_width;
|
||
ib_final[3] = ib_raw[3] + ghost_width;
|
||
ib_final[1] = ib_raw[1] - ghost_width;
|
||
ib_final[4] = ib_raw[4] + ghost_width;
|
||
ib_final[2] = ib_raw[2] - ghost_width;
|
||
ib_final[5] = ib_raw[5] + ghost_width;
|
||
} else {
|
||
ib_final[0] = Mymax(0, ib_raw[0] - ghost_width);
|
||
ib_final[3] = Mymin(PP->shape[0] - 1, ib_raw[3] + ghost_width);
|
||
ib_final[1] = Mymax(0, ib_raw[1] - ghost_width);
|
||
ib_final[4] = Mymin(PP->shape[1] - 1, ib_raw[4] + ghost_width);
|
||
ib_final[2] = Mymax(0, ib_raw[2] - ghost_width);
|
||
ib_final[5] = Mymin(PP->shape[2] - 1, ib_raw[5] + ghost_width);
|
||
}
|
||
|
||
sh_here[0] = ib_final[3] - ib_final[0] + 1;
|
||
sh_here[1] = ib_final[4] - ib_final[1] + 1;
|
||
sh_here[2] = ib_final[5] - ib_final[2] + 1;
|
||
|
||
// --- 逻辑 B: 物理坐标计算 ---
|
||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||
bb_here[0] = PP->bbox[0] + ib_final[0] * dd;
|
||
bb_here[3] = PP->bbox[0] + (ib_final[3] + 1) * dd;
|
||
|
||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||
bb_here[1] = PP->bbox[1] + ib_final[1] * dd;
|
||
bb_here[4] = PP->bbox[1] + (ib_final[4] + 1) * dd;
|
||
|
||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||
bb_here[2] = PP->bbox[2] + ib_final[2] * dd;
|
||
bb_here[5] = PP->bbox[2] + (ib_final[5] + 1) * dd;
|
||
|
||
Block* Bg = new Block(_dim, sh_here, bb_here, target_rank, ingfsi, fngfsi, PP->lev);
|
||
|
||
if (BlL) BlL->insert(Bg);
|
||
else BlL = new MyList<Block>(Bg);
|
||
|
||
return Bg;
|
||
};
|
||
|
||
// 3. 执行创建并返回首尾指针
|
||
split_first_block = createSubBlock(indices[0], target_ranks[0]);
|
||
createSubBlock(indices[1], target_ranks[1]); // 中间块仅插入 List
|
||
createSubBlock(indices[2], target_ranks[2]); // 中间块仅插入 List
|
||
split_last_block = createSubBlock(indices[3], target_ranks[3]);
|
||
}
|
||
|
||
/**
|
||
* @brief 将当前 Block 几何二等分并存入列表
|
||
* @param axis 拆分轴:0-x, 1-y, 2-z (建议选最长轴)
|
||
*/
|
||
|
||
|
||
Block* Parallel::splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||
int ib0_orig, int ib3_orig,
|
||
int jb1_orig, int jb4_orig,
|
||
int kb2_orig, int kb5_orig,
|
||
Patch* PP, int r_left, int r_right,
|
||
int ingfsi, int fngfsi, bool periodic,
|
||
Block* &split_first_block, Block* &split_last_block)
|
||
{
|
||
// 1. 索引二分 (基于无 ghost 的原始索引)
|
||
int mid = (ib0_orig + ib3_orig) / 2;
|
||
|
||
// 左块原始索引: [ib0, mid], 右块原始索引: [mid+1, ib3]
|
||
int indices_L[6] = {ib0_orig, jb1_orig, kb2_orig, mid, jb4_orig, kb5_orig};
|
||
int indices_R[6] = {mid + 1, jb1_orig, kb2_orig, ib3_orig, jb4_orig, kb5_orig};
|
||
|
||
// 2. 内部处理逻辑 (复刻原 distribute 逻辑)
|
||
auto createSubBlock = [&](int* ib_raw, int target_rank) {
|
||
int ib_final[6];
|
||
int sh_here[3];
|
||
double bb_here[6], dd;
|
||
|
||
// --- 逻辑 A: Ghost 扩张 ---
|
||
if (periodic) {
|
||
ib_final[0] = ib_raw[0] - ghost_width;
|
||
ib_final[3] = ib_raw[3] + ghost_width;
|
||
ib_final[1] = ib_raw[1] - ghost_width;
|
||
ib_final[4] = ib_raw[4] + ghost_width;
|
||
ib_final[2] = ib_raw[2] - ghost_width;
|
||
ib_final[5] = ib_raw[5] + ghost_width;
|
||
} else {
|
||
ib_final[0] = Mymax(0, ib_raw[0] - ghost_width);
|
||
ib_final[3] = Mymin(PP->shape[0] - 1, ib_raw[3] + ghost_width);
|
||
ib_final[1] = Mymax(0, ib_raw[1] - ghost_width);
|
||
ib_final[4] = Mymin(PP->shape[1] - 1, ib_raw[4] + ghost_width);
|
||
ib_final[2] = Mymax(0, ib_raw[2] - ghost_width);
|
||
ib_final[5] = Mymin(PP->shape[2] - 1, ib_raw[5] + ghost_width);
|
||
}
|
||
|
||
sh_here[0] = ib_final[3] - ib_final[0] + 1;
|
||
sh_here[1] = ib_final[4] - ib_final[1] + 1;
|
||
sh_here[2] = ib_final[5] - ib_final[2] + 1;
|
||
|
||
// --- 逻辑 B: 物理坐标计算 (严格匹配 Cell 模式) ---
|
||
// X 方向
|
||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||
bb_here[0] = PP->bbox[0] + ib_final[0] * dd;
|
||
bb_here[3] = PP->bbox[0] + (ib_final[3] + 1) * dd;
|
||
|
||
// Y 方向
|
||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||
bb_here[1] = PP->bbox[1] + ib_final[1] * dd;
|
||
bb_here[4] = PP->bbox[1] + (ib_final[4] + 1) * dd;
|
||
|
||
// Z 方向
|
||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||
bb_here[2] = PP->bbox[2] + ib_final[2] * dd;
|
||
bb_here[5] = PP->bbox[2] + (ib_final[5] + 1) * dd;
|
||
|
||
Block* Bg = new Block(dim, sh_here, bb_here, target_rank, ingfsi, fngfsi, PP->lev);
|
||
if (BlL) BlL->insert(Bg);
|
||
else BlL = new MyList<Block>(Bg);
|
||
|
||
|
||
return Bg;
|
||
};
|
||
|
||
// 执行创建
|
||
split_first_block = createSubBlock(indices_L, r_left);
|
||
split_last_block = createSubBlock(indices_R, r_right);
|
||
}
|
||
|
||
|
||
/**
|
||
* @brief 创建映射后的 Block
|
||
*/
|
||
Block* Parallel::createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
||
int block_id, int ingfsi, int fngfsi, int lev)
|
||
{
|
||
// 映射表逻辑
|
||
int target_rank = block_id;
|
||
switch(block_id){
|
||
case 24: target_rank = 23; break;
|
||
case 25: target_rank = 23; break;
|
||
case 26: target_rank = 23; break;
|
||
case 29: target_rank = 30; break;
|
||
case 34: target_rank = 33; break;
|
||
case 37: target_rank = 40; break;
|
||
case 38: target_rank = 40; break;
|
||
case 39: target_rank = 40; break;
|
||
|
||
}
|
||
|
||
Block* ng = new Block(dim, shape, bbox, target_rank, ingfsi, fngfsi, lev);
|
||
|
||
if (BlL) BlL->insert(ng);
|
||
else BlL = new MyList<Block>(ng);
|
||
|
||
return ng;
|
||
}
|
||
|
||
|
||
|
||
|
||
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||
MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||
bool periodic, int start_rank, int end_rank, int nodes)
|
||
{
|
||
#ifdef USE_GPU_DIVIDE
|
||
double cpu_part, gpu_part;
|
||
map<string, double>::iterator iter;
|
||
iter = parameters::dou_par.find("cpu part");
|
||
if (iter != parameters::dou_par.end())
|
||
{
|
||
cpu_part = iter->second;
|
||
}
|
||
else
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good() && myrank == 0)
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "cpu part")
|
||
cpu_part = atof(sval.c_str());
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::dou_par.insert(map<string, double>::value_type("cpu part", cpu_part));
|
||
}
|
||
iter = parameters::dou_par.find("gpu part");
|
||
if (iter != parameters::dou_par.end())
|
||
{
|
||
gpu_part = iter->second;
|
||
}
|
||
else
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good() && myrank == 0)
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "gpu part")
|
||
gpu_part = atof(sval.c_str());
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::dou_par.insert(map<string, double>::value_type("gpu part", gpu_part));
|
||
}
|
||
|
||
if (nodes == 0)
|
||
nodes = cpusize / 2;
|
||
#else
|
||
if (nodes == 0)
|
||
nodes = cpusize;
|
||
#endif
|
||
|
||
if (dim != 3)
|
||
{
|
||
cout << "distrivute: now we only support 3-dimension" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
MyList<Block> *BlL = 0;
|
||
|
||
int split_size, min_size, block_size = 0;
|
||
|
||
int min_width = 2 * Mymax(ghost_width, buffer_width);
|
||
int nxyz[dim], mmin_width[dim], min_shape[dim];
|
||
|
||
MyList<Patch> *PLi = PatchLIST;
|
||
for (int i = 0; i < dim; i++)
|
||
min_shape[i] = PLi->data->shape[i];
|
||
int lev = PLi->data->lev;
|
||
PLi = PLi->next;
|
||
while (PLi)
|
||
{
|
||
Patch *PP = PLi->data;
|
||
for (int i = 0; i < dim; i++)
|
||
min_shape[i] = Mymin(min_shape[i], PP->shape[i]);
|
||
if (lev != PLi->data->lev)
|
||
cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl;
|
||
PLi = PLi->next;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
mmin_width[i] = Mymin(min_width, min_shape[i]);
|
||
|
||
min_size = mmin_width[0];
|
||
for (int i = 1; i < dim; i++)
|
||
min_size = min_size * mmin_width[i];
|
||
|
||
PLi = PatchLIST;
|
||
while (PLi)
|
||
{
|
||
Patch *PP = PLi->data;
|
||
// PP->checkPatch(true);
|
||
int bs = PP->shape[0];
|
||
for (int i = 1; i < dim; i++)
|
||
bs = bs * PP->shape[i];
|
||
block_size = block_size + bs;
|
||
PLi = PLi->next;
|
||
}
|
||
split_size = Mymax(min_size, block_size / cpusize);
|
||
split_size = Mymax(1, split_size);
|
||
|
||
int n_rank = start_rank;
|
||
PLi = PatchLIST;
|
||
int reacpu = 0;
|
||
while (PLi)
|
||
{
|
||
Patch *PP = PLi->data;
|
||
|
||
reacpu += partition3(nxyz, split_size, mmin_width, cpusize, PP->shape);
|
||
|
||
Block *ng, *ng0;
|
||
int shape_here[dim], ibbox_here[2 * dim];
|
||
double bbox_here[2 * dim], dd;
|
||
|
||
// ibbox : 0,...N-1
|
||
for (int i = 0; i < nxyz[0]; i++)
|
||
for (int j = 0; j < nxyz[1]; j++)
|
||
for (int k = 0; k < nxyz[2]; k++)
|
||
{
|
||
ibbox_here[0] = (PP->shape[0] * i) / nxyz[0];
|
||
ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||
ibbox_here[1] = (PP->shape[1] * j) / nxyz[1];
|
||
ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||
ibbox_here[2] = (PP->shape[2] * k) / nxyz[2];
|
||
ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||
|
||
if (periodic)
|
||
{
|
||
ibbox_here[0] = ibbox_here[0] - ghost_width;
|
||
ibbox_here[3] = ibbox_here[3] + ghost_width;
|
||
ibbox_here[1] = ibbox_here[1] - ghost_width;
|
||
ibbox_here[4] = ibbox_here[4] + ghost_width;
|
||
ibbox_here[2] = ibbox_here[2] - ghost_width;
|
||
ibbox_here[5] = ibbox_here[5] + ghost_width;
|
||
}
|
||
else
|
||
{
|
||
ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width);
|
||
ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width);
|
||
ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width);
|
||
ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width);
|
||
ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width);
|
||
ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width);
|
||
}
|
||
|
||
shape_here[0] = ibbox_here[3] - ibbox_here[0] + 1;
|
||
shape_here[1] = ibbox_here[4] - ibbox_here[1] + 1;
|
||
shape_here[2] = ibbox_here[5] - ibbox_here[2] + 1;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// 0--4, 5--10
|
||
dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1);
|
||
bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd;
|
||
bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd;
|
||
|
||
dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1);
|
||
bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd;
|
||
bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd;
|
||
|
||
dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1);
|
||
bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd;
|
||
bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd;
|
||
#else
|
||
#ifdef Cell
|
||
// 0--5, 5--10
|
||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||
bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd;
|
||
bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd;
|
||
|
||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||
bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd;
|
||
bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd;
|
||
|
||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||
bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd;
|
||
bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd;
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
|
||
#ifdef USE_GPU_DIVIDE
|
||
{
|
||
const int pices = 2;
|
||
double picef[pices];
|
||
picef[0] = cpu_part;
|
||
picef[1] = gpu_part;
|
||
int shape_res[dim * pices];
|
||
double bbox_res[2 * dim * pices];
|
||
misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width);
|
||
ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks
|
||
// ng->checkBlock();
|
||
if (BlL)
|
||
BlL->insert(ng);
|
||
else
|
||
BlL = new MyList<Block>(ng); // delete through KillBlocks
|
||
|
||
for (int i = 1; i < pices; i++)
|
||
{
|
||
ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks
|
||
// ng->checkBlock();
|
||
BlL->insert(ng);
|
||
}
|
||
}
|
||
#else
|
||
ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks
|
||
// ng->checkBlock();
|
||
if (BlL)
|
||
BlL->insert(ng);
|
||
else
|
||
BlL = new MyList<Block>(ng); // delete through KillBlocks
|
||
#endif
|
||
|
||
if (n_rank == end_rank + 1)
|
||
n_rank = start_rank;
|
||
|
||
// set PP->blb
|
||
if (i == 0 && j == 0 && k == 0)
|
||
{
|
||
MyList<Block> *Bp = BlL;
|
||
while (Bp->data != ng0)
|
||
Bp = Bp->next; // ng0 is the first of the pices list
|
||
PP->blb = Bp;
|
||
}
|
||
}
|
||
// set PP->ble
|
||
{
|
||
MyList<Block> *Bp = BlL;
|
||
while (Bp->data != ng)
|
||
Bp = Bp->next; // ng is the last of the pices list
|
||
PP->ble = Bp;
|
||
}
|
||
PLi = PLi->next;
|
||
}
|
||
if (reacpu < nodes * 2 / 3)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
if (myrank == start_rank)
|
||
cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl;
|
||
}
|
||
|
||
return BlL;
|
||
}
|
||
#endif
|
||
void Parallel::setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z))
|
||
{
|
||
while (BlL)
|
||
{
|
||
if (BlL->data->X[0])
|
||
{
|
||
int nn = BlL->data->shape[0] * BlL->data->shape[1] * BlL->data->shape[2];
|
||
double *p = BlL->data->fgfs[vn->sgfn];
|
||
for (int i = 0; i < nn; i++)
|
||
{
|
||
int ind[3];
|
||
getarrayindex(3, BlL->data->shape, ind, i);
|
||
p[i] = func(BlL->data->X[0][ind[0]], BlL->data->X[1][ind[1]], BlL->data->X[2][ind[2]]);
|
||
}
|
||
}
|
||
BlL = BlL->next;
|
||
}
|
||
}
|
||
// set function only for cpu rank
|
||
void Parallel::setfunction(int rank, MyList<Block> *BlL, var *vn, double func(double x, double y, double z))
|
||
{
|
||
while (BlL)
|
||
{
|
||
if (BlL->data->X[0] && BlL->data->rank == rank)
|
||
{
|
||
int nn = BlL->data->shape[0] * BlL->data->shape[1] * BlL->data->shape[2];
|
||
double *p = BlL->data->fgfs[vn->sgfn];
|
||
for (int i = 0; i < nn; i++)
|
||
{
|
||
int ind[3];
|
||
getarrayindex(3, BlL->data->shape, ind, i);
|
||
p[i] = func(BlL->data->X[0][ind[0]], BlL->data->X[1][ind[1]], BlL->data->X[2][ind[2]]);
|
||
}
|
||
}
|
||
BlL = BlL->next;
|
||
}
|
||
}
|
||
void Parallel::getarrayindex(int DIM, int *shape, int *index, int n)
|
||
{
|
||
// we assume index has already memory space
|
||
int *mu;
|
||
mu = new int[DIM];
|
||
mu[0] = 1;
|
||
for (int i = 1; i < DIM; i++)
|
||
mu[i] = mu[i - 1] * shape[i - 1];
|
||
for (int i = DIM - 1; i >= 0; i--)
|
||
{
|
||
index[i] = n / mu[i];
|
||
n = n - index[i] * mu[i];
|
||
}
|
||
|
||
delete[] mu;
|
||
}
|
||
int Parallel::getarraylocation(int DIM, int *shape, int *index)
|
||
{
|
||
int n, mu;
|
||
mu = shape[0];
|
||
n = index[0];
|
||
for (int i = 1; i < DIM; i++)
|
||
{
|
||
n = n + index[i] * mu;
|
||
mu = mu * shape[i];
|
||
}
|
||
|
||
return n;
|
||
}
|
||
void Parallel::copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin,
|
||
int *shape, double *datain, double *llb, double *uub)
|
||
{
|
||
// for 3 dimensional case, based on simple test, I found this is half slower than f90 code
|
||
int *illi, *iuui;
|
||
int *illo, *iuuo;
|
||
int *indi, *indo;
|
||
illi = new int[DIM];
|
||
iuui = new int[DIM];
|
||
illo = new int[DIM];
|
||
iuuo = new int[DIM];
|
||
indi = new int[DIM];
|
||
indo = new int[DIM];
|
||
|
||
int ial = 1;
|
||
for (int i = 0; i < DIM; i++)
|
||
{
|
||
double ho, hi;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
ho = (uubout[i] - llbout[i]) / (Dshape[i] - 1);
|
||
hi = (uubin[i] - llbin[i]) / (shape[i] - 1);
|
||
#else
|
||
#ifdef Cell
|
||
ho = (uubout[i] - llbout[i]) / Dshape[i];
|
||
hi = (uubin[i] - llbin[i]) / shape[i];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
illo[i] = int((llb[i] - llbout[i]) / ho);
|
||
iuuo[i] = Dshape[i] - 1 - int((uubout[i] - uub[i]) / ho);
|
||
illi[i] = int((llb[i] - llbin[i]) / hi);
|
||
iuui[i] = shape[i] - 1 - int((uubin[i] - uub[i]) / hi);
|
||
|
||
if (illo[i] > iuuo[i] || illi[i] > iuui[i] || illo[i] < 0 || illi[i] < 0 ||
|
||
iuui[i] >= shape[i] || iuuo[i] >= Dshape[i])
|
||
{
|
||
cout << "Parallel copy: in direction " << i << ":" << endl;
|
||
cout << "llb = " << llb[i] << ", uub = " << uub[i] << endl;
|
||
cout << " in data : il = " << illi[i] << ", iu = " << iuui[i] << endl;
|
||
cout << "bbox = (" << llbin[i] << "," << uubin[i] << ")" << endl;
|
||
cout << "shape = " << shape[i] << endl;
|
||
cout << "out data : il = " << illo[i] << ", iu = " << iuuo[i] << endl;
|
||
cout << "bbox = (" << llbout[i] << "," << uubout[i] << ")" << endl;
|
||
cout << "shape = " << Dshape[i] << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
int ihi = iuui[i] - illi[i] + 1, iho = iuuo[i] - illo[i] + 1;
|
||
if (!(feq(ho, hi, ho / 2)) || ihi != iho)
|
||
{
|
||
cout << "Parallel copy: in direction " << i << ":" << endl;
|
||
cout << "Parallel copy: not the same grid structure." << endl;
|
||
cout << "hi = " << hi << ", bbox = (" << llbin[i] << "," << uubin[i] << "), shape = " << shape[i] << endl;
|
||
cout << "ho = " << ho << ", bbox = (" << llbout[i] << "," << uubout[i] << "), shape = " << Dshape[i] << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
ial = ial * ihi;
|
||
}
|
||
|
||
for (int i = 0; i < DIM; i++)
|
||
{
|
||
indi[i] = illi[i];
|
||
indo[i] = illo[i];
|
||
}
|
||
/*
|
||
//check start index
|
||
for(int i=0;i<DIM;i++)
|
||
{
|
||
cout << "Parallel copy: in direction " <<i<<":"<< endl;
|
||
cout<<"start : indi = "<<indi[i]<<", indo = "<<indo[i]<<endl;
|
||
}
|
||
*/
|
||
int NNi = 1, NNo = 1;
|
||
for (int i = 0; i < DIM; i++)
|
||
{
|
||
NNi = NNi * shape[i];
|
||
NNo = NNo * Dshape[i];
|
||
}
|
||
for (int i = 0; i < ial; i++)
|
||
{
|
||
int ni, no;
|
||
ni = getarraylocation(DIM, shape, indi);
|
||
no = getarraylocation(DIM, Dshape, indo);
|
||
if (no < 0 || no > NNo)
|
||
{
|
||
cout << "Parallel copy: no = " << no << " is out of array range (0," << NNo << ")." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
if (ni < 0 || ni > NNi)
|
||
{
|
||
cout << "Parallel copy: ni = " << ni << " is out of array range (0," << NNi << ")." << endl;
|
||
cout << "shape = (";
|
||
for (int j = 0; j < DIM; j++)
|
||
{
|
||
cout << shape[j];
|
||
if (j < DIM - 1)
|
||
cout << ",";
|
||
else
|
||
cout << ")" << endl;
|
||
}
|
||
cout << "ind = (";
|
||
for (int j = 0; j < DIM; j++)
|
||
{
|
||
cout << indi[j];
|
||
if (j < DIM - 1)
|
||
cout << ",";
|
||
else
|
||
cout << ")" << endl;
|
||
}
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
DD[no] = datain[ni];
|
||
|
||
indi[0]++;
|
||
for (int j = 1; j < DIM; j++)
|
||
{
|
||
if (indi[j - 1] == iuui[j - 1] + 1)
|
||
{
|
||
indi[j - 1] = illi[j - 1];
|
||
indi[j]++;
|
||
} // carry 1 to next digital
|
||
else
|
||
break;
|
||
}
|
||
indo[0]++;
|
||
for (int j = 1; j < DIM; j++)
|
||
{
|
||
if (indo[j - 1] == iuuo[j - 1] + 1)
|
||
{
|
||
indo[j - 1] = illo[j - 1];
|
||
indo[j]++;
|
||
}
|
||
else
|
||
break;
|
||
}
|
||
}
|
||
/*
|
||
//check final index
|
||
for(int i=0;i<DIM;i++)
|
||
{
|
||
cout << "Parallel copy: in direction " <<i<<":"<< endl;
|
||
cout<<"final : indi = "<<indi[i]<<", indo = "<<indo[i]<<endl;
|
||
}
|
||
*/
|
||
delete[] illi;
|
||
delete[] iuui;
|
||
delete[] illo;
|
||
delete[] iuuo;
|
||
delete[] indi;
|
||
delete[] indo;
|
||
}
|
||
void Parallel::writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax,
|
||
double zmin, double zmax, char *filename, double *data_out)
|
||
{
|
||
ofstream outfile;
|
||
outfile.open(filename, ios::out | ios::trunc);
|
||
if (!outfile)
|
||
{
|
||
cout << "Can't open " << filename << " for output." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
outfile.write((char *)&time, sizeof(double));
|
||
outfile.write((char *)&nx, sizeof(int));
|
||
outfile.write((char *)&ny, sizeof(int));
|
||
outfile.write((char *)&nz, sizeof(int));
|
||
outfile.write((char *)&xmin, sizeof(double));
|
||
outfile.write((char *)&xmax, sizeof(double));
|
||
outfile.write((char *)&ymin, sizeof(double));
|
||
outfile.write((char *)&ymax, sizeof(double));
|
||
outfile.write((char *)&zmin, sizeof(double));
|
||
outfile.write((char *)&zmax, sizeof(double));
|
||
outfile.write((char *)data_out, nx * ny * nz * sizeof(double));
|
||
outfile.close();
|
||
}
|
||
void Parallel::writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax,
|
||
char *filename, double *datain)
|
||
{
|
||
int i, j;
|
||
double *X, *Y;
|
||
X = new double[nx];
|
||
Y = new double[ny];
|
||
double dd;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
dd = (xmax - xmin) / (nx - 1);
|
||
for (i = 0; i < nx; i++)
|
||
X[i] = xmin + i * dd;
|
||
dd = (ymax - ymin) / (ny - 1);
|
||
for (j = 0; j < ny; j++)
|
||
Y[j] = ymin + j * dd;
|
||
#else
|
||
#ifdef Cell
|
||
dd = (xmax - xmin) / nx;
|
||
for (i = 0; i < nx; i++)
|
||
X[i] = xmin + (i + 0.5) * dd;
|
||
dd = (ymax - ymin) / ny;
|
||
for (j = 0; j < ny; j++)
|
||
Y[j] = ymin + (j + 0.5) * dd;
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
ofstream outfile;
|
||
outfile.open(filename, ios::out | ios::trunc);
|
||
if (!outfile)
|
||
{
|
||
cout << "Can't open " << filename << " for output." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
outfile << "# t = " << time << endl;
|
||
for (j = 0; j < ny; j++)
|
||
{
|
||
for (i = 0; i < nx; i++)
|
||
{
|
||
int ind1 = i + j * nx;
|
||
outfile << setw(10) << setprecision(10) << X[i] << " "
|
||
<< setw(10) << setprecision(10) << Y[j] << " "
|
||
<< setw(16) << setprecision(15) << datain[ind1]
|
||
<< endl;
|
||
}
|
||
outfile << "\n"; /* blanck line for gnuplot */
|
||
}
|
||
outfile.close();
|
||
|
||
delete[] X;
|
||
delete[] Y;
|
||
}
|
||
void Parallel::Dump_CPU_Data(MyList<Block> *BlL, MyList<var> *DumpList, char *tag, double time, double dT)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// round at 4 and 5
|
||
int ncount = int(time / dT + 0.5);
|
||
|
||
MyList<Block> *Bp;
|
||
while (DumpList)
|
||
{
|
||
Bp = BlL;
|
||
int Bi = 0;
|
||
while (Bp)
|
||
{
|
||
Block *BP = Bp->data;
|
||
var *VP = DumpList->data;
|
||
if (BP->rank == myrank)
|
||
{
|
||
|
||
string out_dir;
|
||
map<string, string>::iterator iter;
|
||
iter = parameters::str_par.find("output dir");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
out_dir = iter->second;
|
||
}
|
||
else
|
||
{
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good())
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "output dir")
|
||
out_dir = sval;
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::str_par.insert(map<string, string>::value_type("output dir", out_dir));
|
||
}
|
||
|
||
char filename[100];
|
||
if (tag)
|
||
sprintf(filename, "%s/%s_Lev%02d-%02d_%02d_%s_%05d.bin", out_dir.c_str(), tag, BP->lev, Bi, myrank, VP->name, ncount);
|
||
else
|
||
sprintf(filename, "%s/Lev%02d-%02d_%02d_%s_%05d.bin", out_dir.c_str(), BP->lev, Bi, myrank, VP->name, ncount);
|
||
writefile(time, BP->shape[0], BP->shape[1], BP->shape[2], BP->bbox[0], BP->bbox[3], BP->bbox[1], BP->bbox[4],
|
||
BP->bbox[2], BP->bbox[5], filename, BP->fgfs[VP->sgfn]);
|
||
cout << "end of dump " << VP->name << " at time " << time << ", on node " << myrank << endl;
|
||
}
|
||
Bp = Bp->next;
|
||
Bi++;
|
||
}
|
||
DumpList = DumpList->next;
|
||
}
|
||
}
|
||
// Now we dump the data including buffer points
|
||
void Parallel::Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// round at 4 and 5
|
||
int ncount = int(time / dT + 0.5);
|
||
|
||
MPI_Status sta;
|
||
int DIM = 3;
|
||
double llb[3], uub[3];
|
||
double DX, DY, DZ;
|
||
|
||
double *databuffer = 0;
|
||
if (myrank == 0)
|
||
{
|
||
databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]);
|
||
if (!databuffer)
|
||
{
|
||
cout << "Parallel::Dump_Data: out of memory when dumping data." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
}
|
||
|
||
while (DumpList)
|
||
{
|
||
var *VP = DumpList->data;
|
||
|
||
MyList<Block> *Bp = PP->blb;
|
||
while (Bp)
|
||
{
|
||
Block *BP = Bp->data;
|
||
if (BP->rank == 0 && myrank == 0)
|
||
{
|
||
DX = BP->getdX(0);
|
||
DY = BP->getdX(1);
|
||
DZ = BP->getdX(2);
|
||
llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX;
|
||
llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY;
|
||
llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ;
|
||
uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX;
|
||
uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY;
|
||
uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ;
|
||
f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub);
|
||
}
|
||
else
|
||
{
|
||
int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]);
|
||
if (myrank == 0)
|
||
{
|
||
double *bufferhere = (double *)malloc(sizeof(double) * nnn);
|
||
if (!bufferhere)
|
||
{
|
||
cout << "on node#" << myrank << ", out of memory when dumping data." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta);
|
||
DX = BP->getdX(0);
|
||
DY = BP->getdX(1);
|
||
DZ = BP->getdX(2);
|
||
llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX;
|
||
llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY;
|
||
llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ;
|
||
uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX;
|
||
uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY;
|
||
uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ;
|
||
f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub);
|
||
free(bufferhere);
|
||
}
|
||
else if (myrank == BP->rank)
|
||
{
|
||
MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
|
||
}
|
||
}
|
||
if (Bp == PP->ble)
|
||
break;
|
||
Bp = Bp->next;
|
||
}
|
||
if (myrank == 0)
|
||
{
|
||
|
||
string out_dir;
|
||
map<string, string>::iterator iter;
|
||
iter = parameters::str_par.find("output dir");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
out_dir = iter->second;
|
||
}
|
||
else
|
||
{
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good())
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "output dir")
|
||
out_dir = sval;
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::str_par.insert(map<string, string>::value_type("output dir", out_dir));
|
||
}
|
||
|
||
char filename[100];
|
||
if (tag)
|
||
sprintf(filename, "%s/%s_Lev%02d-%02d_%s_%05d.bin", out_dir.c_str(), tag, PP->lev, grd, VP->name, ncount);
|
||
else
|
||
sprintf(filename, "%s/Lev%02d-%02d_%s_%05d.bin", out_dir.c_str(), PP->lev, grd, VP->name, ncount);
|
||
|
||
writefile(time, PP->shape[0], PP->shape[1], PP->shape[2], PP->bbox[0], PP->bbox[3], PP->bbox[1], PP->bbox[4],
|
||
PP->bbox[2], PP->bbox[5], filename, databuffer);
|
||
}
|
||
DumpList = DumpList->next;
|
||
}
|
||
|
||
if (myrank == 0)
|
||
free(databuffer);
|
||
}
|
||
void Parallel::Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT)
|
||
{
|
||
MyList<Patch> *Pp;
|
||
Pp = PL;
|
||
int grd = 0;
|
||
while (Pp)
|
||
{
|
||
Patch *PP = Pp->data;
|
||
Dump_Data(PP, DumpList, tag, time, dT, grd);
|
||
grd++;
|
||
Pp = Pp->next;
|
||
}
|
||
}
|
||
// collect the data including buffer points
|
||
double *Parallel::Collect_Data(Patch *PP, var *VP)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
MPI_Status sta;
|
||
int DIM = 3;
|
||
double llb[3], uub[3];
|
||
double DX, DY, DZ;
|
||
|
||
double *databuffer = 0;
|
||
if (myrank == 0)
|
||
{
|
||
databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]);
|
||
if (!databuffer)
|
||
{
|
||
cout << "Parallel::Collect_Data: out of memory when dumping data." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
}
|
||
|
||
MyList<Block> *Bp = PP->blb;
|
||
while (Bp)
|
||
{
|
||
Block *BP = Bp->data;
|
||
if (BP->rank == 0 && myrank == 0)
|
||
{
|
||
DX = BP->getdX(0);
|
||
DY = BP->getdX(1);
|
||
DZ = BP->getdX(2);
|
||
llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX;
|
||
llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY;
|
||
llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ;
|
||
uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX;
|
||
uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY;
|
||
uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ;
|
||
f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub);
|
||
}
|
||
else
|
||
{
|
||
int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]);
|
||
if (myrank == 0)
|
||
{
|
||
double *bufferhere = (double *)malloc(sizeof(double) * nnn);
|
||
if (!bufferhere)
|
||
{
|
||
cout << "on node#" << myrank << ", out of memory when dumping data." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta);
|
||
DX = BP->getdX(0);
|
||
DY = BP->getdX(1);
|
||
DZ = BP->getdX(2);
|
||
llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX;
|
||
llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY;
|
||
llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ;
|
||
uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX;
|
||
uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY;
|
||
uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ;
|
||
f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub);
|
||
free(bufferhere);
|
||
}
|
||
else if (myrank == BP->rank)
|
||
{
|
||
MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
|
||
}
|
||
}
|
||
if (Bp == PP->ble)
|
||
break;
|
||
Bp = Bp->next;
|
||
}
|
||
|
||
return databuffer;
|
||
}
|
||
// Now we dump the data including buffer points
|
||
// dump z = 0 plane
|
||
void Parallel::d2Dump_Data(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT, int grd)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// round at 4 and 5
|
||
int ncount = int(time / dT + 0.5);
|
||
|
||
MPI_Status sta;
|
||
int DIM = 3;
|
||
double llb[3], uub[3];
|
||
double DX, DY, DZ;
|
||
|
||
double *databuffer = 0, *databuffer2 = 0;
|
||
if (myrank == 0)
|
||
{
|
||
databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]);
|
||
databuffer2 = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1]);
|
||
if (!databuffer || !databuffer2)
|
||
{
|
||
cout << "Parallel::d2Dump_Data: out of memory when dumping data." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
}
|
||
|
||
while (DumpList)
|
||
{
|
||
var *VP = DumpList->data;
|
||
|
||
MyList<Block> *Bp = PP->blb;
|
||
while (Bp)
|
||
{
|
||
Block *BP = Bp->data;
|
||
if (BP->rank == 0 && myrank == 0)
|
||
{
|
||
DX = BP->getdX(0);
|
||
DY = BP->getdX(1);
|
||
DZ = BP->getdX(2);
|
||
llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX;
|
||
llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY;
|
||
llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ;
|
||
uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX;
|
||
uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY;
|
||
uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ;
|
||
f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub);
|
||
}
|
||
else
|
||
{
|
||
int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]);
|
||
if (myrank == 0)
|
||
{
|
||
double *bufferhere = (double *)malloc(sizeof(double) * nnn);
|
||
if (!bufferhere)
|
||
{
|
||
cout << "on node#" << myrank << ", out of memory when dumping data." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta);
|
||
DX = BP->getdX(0);
|
||
DY = BP->getdX(1);
|
||
DZ = BP->getdX(2);
|
||
llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX;
|
||
llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY;
|
||
llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ;
|
||
uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX;
|
||
uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY;
|
||
uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ;
|
||
f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub);
|
||
free(bufferhere);
|
||
}
|
||
else if (myrank == BP->rank)
|
||
{
|
||
MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
|
||
}
|
||
}
|
||
if (Bp == PP->ble)
|
||
break;
|
||
Bp = Bp->next;
|
||
}
|
||
if (myrank == 0)
|
||
{
|
||
|
||
string out_dir;
|
||
map<string, string>::iterator iter;
|
||
iter = parameters::str_par.find("output dir");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
out_dir = iter->second;
|
||
}
|
||
else
|
||
{
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good())
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "output dir")
|
||
out_dir = sval;
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::str_par.insert(map<string, string>::value_type("output dir", out_dir));
|
||
}
|
||
|
||
char filename[100];
|
||
if (tag)
|
||
sprintf(filename, "%s/%s_2d_Lev%02d-%02d_%s_%05d.dat", out_dir.c_str(), tag, PP->lev, grd, VP->name, ncount);
|
||
else
|
||
sprintf(filename, "%s/2d_Lev%02d-%02d_%s_%05d.dat", out_dir.c_str(), PP->lev, grd, VP->name, ncount);
|
||
|
||
int gord = ghost_width;
|
||
f_d2dump(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, databuffer2, gord, VP->SoA);
|
||
writefile(time, PP->shape[0], PP->shape[1], PP->bbox[0], PP->bbox[3], PP->bbox[1], PP->bbox[4],
|
||
filename, databuffer2);
|
||
}
|
||
DumpList = DumpList->next;
|
||
}
|
||
|
||
if (myrank == 0)
|
||
{
|
||
free(databuffer);
|
||
free(databuffer2);
|
||
}
|
||
}
|
||
void Parallel::d2Dump_Data(MyList<Patch> *PL, MyList<var> *DumpList, char *tag, double time, double dT)
|
||
{
|
||
MyList<Patch> *Pp;
|
||
Pp = PL;
|
||
int grd = 0;
|
||
while (Pp)
|
||
{
|
||
Patch *PP = Pp->data;
|
||
d2Dump_Data(PP, DumpList, tag, time, dT, grd);
|
||
grd++;
|
||
Pp = Pp->next;
|
||
}
|
||
}
|
||
// Now we dump the data including buffer points and ghost points of the given patch
|
||
void Parallel::Dump_Data0(Patch *PP, MyList<var> *DumpList, char *tag, double time, double dT)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
// round at 4 and 5
|
||
int ncount = int(time / dT + 0.5);
|
||
|
||
MPI_Status sta;
|
||
int DIM = 3;
|
||
double llb[3], uub[3], tllb[3], tuub[3];
|
||
int tshape[3];
|
||
double DX, DY, DZ;
|
||
|
||
for (int i = 0; i < 3; i++)
|
||
{
|
||
double DX = PP->blb->data->getdX(i);
|
||
tshape[i] = PP->shape[i] + 2 * ghost_width;
|
||
tllb[i] = PP->bbox[i] - ghost_width * DX;
|
||
tuub[i] = PP->bbox[i + dim] + ghost_width * DX;
|
||
}
|
||
|
||
int NN = tshape[0] * tshape[1] * tshape[2];
|
||
double *databuffer = 0;
|
||
if (myrank == 0)
|
||
{
|
||
databuffer = (double *)malloc(sizeof(double) * NN);
|
||
if (!databuffer)
|
||
{
|
||
cout << "on node# " << myrank << ", out of memory when dumping data." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
}
|
||
|
||
while (DumpList)
|
||
{
|
||
var *VP = DumpList->data;
|
||
MyList<Block> *Bp = PP->blb;
|
||
while (Bp)
|
||
{
|
||
Block *BP = Bp->data;
|
||
if (BP->rank == 0 && myrank == 0)
|
||
{
|
||
DX = BP->getdX(0);
|
||
DY = BP->getdX(1);
|
||
DZ = BP->getdX(2);
|
||
llb[0] = (feq(BP->bbox[0], tllb[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX;
|
||
llb[1] = (feq(BP->bbox[1], tllb[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY;
|
||
llb[2] = (feq(BP->bbox[2], tllb[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ;
|
||
uub[0] = (feq(BP->bbox[3], tuub[0], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX;
|
||
uub[1] = (feq(BP->bbox[4], tuub[1], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY;
|
||
uub[2] = (feq(BP->bbox[5], tuub[2], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ;
|
||
f_copy(DIM, tllb, tuub, tshape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub);
|
||
}
|
||
else
|
||
{
|
||
if (myrank == 0)
|
||
{
|
||
int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]);
|
||
double *bufferhere = (double *)malloc(sizeof(double) * nnn);
|
||
if (!bufferhere)
|
||
{
|
||
cout << "on node#" << myrank << ", out of memory when dumping data." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta);
|
||
DX = BP->getdX(0);
|
||
DY = BP->getdX(1);
|
||
DZ = BP->getdX(2);
|
||
llb[0] = (feq(BP->bbox[0], tllb[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX;
|
||
llb[1] = (feq(BP->bbox[1], tllb[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY;
|
||
llb[2] = (feq(BP->bbox[2], tllb[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ;
|
||
uub[0] = (feq(BP->bbox[3], tuub[0], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX;
|
||
uub[1] = (feq(BP->bbox[4], tuub[1], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY;
|
||
uub[2] = (feq(BP->bbox[5], tuub[2], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ;
|
||
f_copy(DIM, tllb, tuub, tshape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub);
|
||
free(bufferhere);
|
||
}
|
||
else if (myrank == BP->rank)
|
||
{
|
||
int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]);
|
||
MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
|
||
}
|
||
}
|
||
if (Bp == PP->ble)
|
||
break;
|
||
Bp = Bp->next;
|
||
}
|
||
if (myrank == 0)
|
||
{
|
||
|
||
string out_dir;
|
||
map<string, string>::iterator iter;
|
||
iter = parameters::str_par.find("output dir");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
out_dir = iter->second;
|
||
}
|
||
else
|
||
{
|
||
// read parameter from file
|
||
const int LEN = 256;
|
||
char pline[LEN];
|
||
string str, sgrp, skey, sval;
|
||
int sind;
|
||
char pname[50];
|
||
{
|
||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||
if (iter != parameters::str_par.end())
|
||
{
|
||
strcpy(pname, (iter->second).c_str());
|
||
}
|
||
else
|
||
{
|
||
cout << "Error inputpar" << endl;
|
||
exit(0);
|
||
}
|
||
}
|
||
ifstream inf(pname, ifstream::in);
|
||
if (!inf.good())
|
||
{
|
||
cout << "Can not open parameter file " << pname << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
for (int i = 1; inf.good(); i++)
|
||
{
|
||
inf.getline(pline, LEN);
|
||
str = pline;
|
||
|
||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||
if (status == -1)
|
||
{
|
||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
else if (status == 0)
|
||
continue;
|
||
|
||
if (sgrp == "ABE")
|
||
{
|
||
if (skey == "output dir")
|
||
out_dir = sval;
|
||
}
|
||
}
|
||
inf.close();
|
||
|
||
parameters::str_par.insert(map<string, string>::value_type("output dir", out_dir));
|
||
}
|
||
|
||
char filename[100];
|
||
if (tag)
|
||
sprintf(filename, "%s/%s_Lev%02d_%s_%05d.bin", out_dir.c_str(), tag, PP->lev, VP->name, ncount);
|
||
else
|
||
sprintf(filename, "%s/Lev%02d_%s_%05d.bin", out_dir.c_str(), PP->lev, VP->name, ncount);
|
||
|
||
writefile(time, tshape[0], tshape[1], tshape[2], tllb[0], tuub[0], tllb[1], tuub[2],
|
||
tllb[2], tuub[2], filename, databuffer);
|
||
}
|
||
DumpList = DumpList->next;
|
||
}
|
||
|
||
if (myrank == 0)
|
||
free(databuffer);
|
||
}
|
||
// Map point is much easier than maping data itself
|
||
// But the main problem is about the points near the boundary
|
||
// worst case is -ghost -ghost+1 .... 0 * ......
|
||
double Parallel::global_interp(int DIM, int *ext, double **CoX, double *datain,
|
||
double *poXb, int ordn, double *SoA, int Symmetry)
|
||
{
|
||
if (DIM != 3)
|
||
{
|
||
cout << "Parallel::global_interp does not suport DIM = " << DIM << " for Symmetry." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
double resu;
|
||
double poX[3];
|
||
double asgn = 1;
|
||
|
||
for (int i = 0; i < 3; i++)
|
||
poX[i] = poXb[i];
|
||
|
||
switch (Symmetry)
|
||
{
|
||
case 2:
|
||
for (int i = 0; i < 3; i++)
|
||
if (poX[i] < 0)
|
||
{
|
||
poX[i] = -poX[i];
|
||
asgn = asgn * SoA[i];
|
||
}
|
||
break;
|
||
case 1:
|
||
if (poX[2] < 0)
|
||
{
|
||
poX[2] = -poX[2];
|
||
asgn = asgn * SoA[2];
|
||
}
|
||
}
|
||
|
||
int extb[3];
|
||
|
||
for (int i = 0; i < 3; i++)
|
||
extb[i] = ext[i];
|
||
|
||
switch (Symmetry)
|
||
{
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
case 2:
|
||
if (poX[0] < (ghost_width - 1) * (CoX[0][1] - CoX[0][0]))
|
||
extb[0] = extb[0] + ghost_width - 1;
|
||
if (poX[1] < (ghost_width - 1) * (CoX[1][1] - CoX[1][0]))
|
||
extb[1] = extb[1] + ghost_width - 1;
|
||
case 1:
|
||
if (poX[2] < (ghost_width - 1) * (CoX[2][1] - CoX[2][0]))
|
||
extb[2] = extb[2] + ghost_width - 1;
|
||
#else
|
||
#ifdef Cell
|
||
case 2:
|
||
if (poX[0] < (ghost_width - 0.5) * (CoX[0][1] - CoX[0][0]))
|
||
extb[0] = extb[0] + ghost_width;
|
||
if (poX[1] < (ghost_width - 0.5) * (CoX[1][1] - CoX[1][0]))
|
||
extb[1] = extb[1] + ghost_width;
|
||
case 1:
|
||
if (poX[2] < (ghost_width - 0.5) * (CoX[2][1] - CoX[2][0]))
|
||
extb[2] = extb[2] + ghost_width;
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
|
||
if (extb[0] > ext[0] || extb[1] > ext[1] || extb[2] > ext[2])
|
||
{
|
||
double *CoXb[3];
|
||
int Nb = extb[0] * extb[1] * extb[2];
|
||
double *datab;
|
||
datab = new double[Nb];
|
||
for (int i = 0; i < 3; i++)
|
||
{
|
||
CoXb[i] = new double[extb[i]];
|
||
double DH = CoX[i][1] - CoX[i][0];
|
||
if (extb[i] > ext[i])
|
||
{
|
||
if (CoX[i][0] > DH)
|
||
{
|
||
cout << "lower boundary[" << i << "] = " << CoX[i][0] << ", but SYmmetry = " << Symmetry << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
for (int j = 0; j < ghost_width - 1; j++)
|
||
CoXb[i][j] = -CoX[i][ghost_width - 1 - j];
|
||
for (int j = ghost_width - 1; j < extb[i]; j++)
|
||
CoXb[i][j] = CoX[i][j - ghost_width + 1];
|
||
#else
|
||
#ifdef Cell
|
||
for (int j = 0; j < ghost_width; j++)
|
||
CoXb[i][j] = -CoX[i][ghost_width - 1 - j];
|
||
for (int j = ghost_width; j < extb[i]; j++)
|
||
CoXb[i][j] = CoX[i][j - ghost_width];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
else
|
||
{
|
||
for (int j = 0; j < extb[i]; j++)
|
||
CoXb[i][j] = CoX[i][j];
|
||
}
|
||
}
|
||
|
||
for (int i = 0; i < Nb; i++)
|
||
{
|
||
int ind[3], indb[3];
|
||
getarrayindex(3, extb, indb, i);
|
||
double sgn = 1;
|
||
for (int j = 0; j < 3; j++)
|
||
{
|
||
if (extb[j] > ext[j])
|
||
{
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
if (indb[j] < ghost_width - 1)
|
||
{
|
||
ind[j] = ghost_width - 1 - indb[j];
|
||
sgn = sgn * SoA[j];
|
||
}
|
||
else
|
||
{
|
||
ind[j] = 1 + indb[j] - ghost_width;
|
||
}
|
||
#else
|
||
#ifdef Cell
|
||
if (indb[j] < ghost_width)
|
||
{
|
||
ind[j] = ghost_width - 1 - indb[j];
|
||
sgn = sgn * SoA[j];
|
||
}
|
||
else
|
||
{
|
||
ind[j] = indb[j] - ghost_width;
|
||
}
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
else
|
||
ind[j] = indb[j];
|
||
}
|
||
int lon = getarraylocation(3, ext, ind);
|
||
datab[i] = datain[lon] * sgn;
|
||
}
|
||
|
||
resu = global_interp(DIM, extb, CoXb, datab, poX, ordn);
|
||
|
||
for (int i = 0; i < 3; i++)
|
||
delete[] CoXb[i];
|
||
delete[] datab;
|
||
}
|
||
else
|
||
{
|
||
resu = global_interp(DIM, ext, CoX, datain, poX, ordn);
|
||
}
|
||
|
||
return resu * asgn;
|
||
}
|
||
double Parallel::global_interp(int DIM, int *ext, double **CoX, double *datain,
|
||
double *poX, int ordn)
|
||
{
|
||
if (ordn > 2 * ghost_width)
|
||
{
|
||
cout << "Parallel::global_interp can not handle ordn = " << ordn << " > 2*ghost_width = " << 2 * ghost_width << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
double *bbox, *datainbbox;
|
||
bbox = new double[2 * DIM];
|
||
datainbbox = new double[2 * DIM];
|
||
|
||
int *NN, *ind, *shape;
|
||
NN = new int[DIM];
|
||
ind = new int[DIM];
|
||
shape = new int[DIM];
|
||
|
||
for (int i = 0; i < DIM; i++)
|
||
{
|
||
ind[i] = int((poX[i] - CoX[i][0]) / (CoX[i][1] - CoX[i][0])) - ordn / 2 + 1;
|
||
// poX may exactly locate on the boundary (exclude ghost)
|
||
if (ind[i] == -1 && feq(poX[i], CoX[i][0], (CoX[i][1] - CoX[i][0]) / 2))
|
||
ind[i] = 0;
|
||
/*
|
||
if(ind[i] < 0)
|
||
{
|
||
cout<<"Parallel::global_interp error ind["<<i<<"] = "<<ind[i]<<endl;
|
||
cout<<"pox = "<<poX[i]<<", CoX[0] = "<<CoX[i][0]<<endl;
|
||
MPI_Abort(MPI_COMM_WORLD,1);
|
||
}
|
||
*/
|
||
if (ind[i] == ext[i] - ordn + 1 && feq(poX[i], CoX[i][ext[i] - ordn / 2], (CoX[i][1] - CoX[i][0]) / 2))
|
||
ind[i] = ext[i] - ordn - 1;
|
||
/*
|
||
if(ind[i]+ordn-1 > ext[i]-1)
|
||
{
|
||
cout<<"Parallel::global_interp error ind["<<i<<"] = "<<ind[i]<<" + ordn ("<<ordn<<") > ext = "<<ext[i]<<endl;
|
||
cout<<"pox = "<<poX[i]<<", CoX[ind] = "<<CoX[i][ind[i]]<<", CoX = ("<<CoX[i][0]<<","<<CoX[i][ext[i]-1]<<")"<<endl;
|
||
MPI_Abort(MPI_COMM_WORLD,1);
|
||
}
|
||
*/
|
||
bbox[i] = CoX[i][ind[i]];
|
||
bbox[DIM + i] = CoX[i][ind[i] + ordn - 1];
|
||
datainbbox[i] = CoX[i][0];
|
||
datainbbox[DIM + i] = CoX[i][ext[i] - 1];
|
||
shape[i] = ordn;
|
||
}
|
||
|
||
NN[DIM - 1] = ordn;
|
||
for (int i = DIM - 2; i >= 0; i--)
|
||
NN[i] = NN[i + 1] * ordn;
|
||
|
||
double *xpts, *funcvals;
|
||
xpts = new double[ordn];
|
||
funcvals = new double[ordn];
|
||
double *DDd, *DDd1, rr;
|
||
|
||
DDd = new double[NN[0]];
|
||
|
||
copy(DIM, bbox, bbox + DIM, shape, DDd, datainbbox, datainbbox + DIM, ext, datain, bbox, bbox + DIM);
|
||
|
||
for (int i = 0; i < DIM; i++)
|
||
{
|
||
for (int j = ind[i]; j < ind[i] + ordn; j++)
|
||
{
|
||
xpts[j - ind[i]] = CoX[i][j];
|
||
}
|
||
|
||
if (i < DIM - 1)
|
||
{
|
||
DDd1 = new double[NN[i + 1]];
|
||
for (int j = 0; j < NN[i + 1]; j++)
|
||
{
|
||
for (int k = 0; k < ordn; k++)
|
||
funcvals[k] = DDd[k + j * ordn];
|
||
DDd1[j] = Lagrangian_Int(poX[i], ordn, xpts, funcvals);
|
||
}
|
||
delete[] DDd;
|
||
DDd = DDd1;
|
||
}
|
||
else
|
||
{
|
||
for (int j = 0; j < ordn; j++)
|
||
funcvals[j] = DDd[j];
|
||
rr = Lagrangian_Int(poX[i], ordn, xpts, funcvals);
|
||
delete[] DDd1; // since DDd and DDd1 now point to the same stuff, we need delete after above int
|
||
}
|
||
}
|
||
|
||
delete[] NN;
|
||
delete[] ind;
|
||
delete[] xpts;
|
||
delete[] funcvals;
|
||
delete[] bbox;
|
||
delete[] datainbbox;
|
||
delete[] shape;
|
||
|
||
return rr;
|
||
}
|
||
double Parallel::Lagrangian_Int(double x, int npts, double *xpts, double *funcvals)
|
||
{
|
||
double sum = 0;
|
||
for (int i = 0; i < npts; i++)
|
||
{
|
||
sum = sum + funcvals[i] * LagrangePoly(x, i, npts, xpts);
|
||
}
|
||
return sum;
|
||
}
|
||
double Parallel::LagrangePoly(double x, int pt, int npts, double *xpts)
|
||
{
|
||
double h = 1;
|
||
int i;
|
||
|
||
for (i = 0; i < pt; i++)
|
||
h = h * (x - xpts[i]) / (xpts[pt] - xpts[i]);
|
||
|
||
for (i = pt + 1; i < npts; i++)
|
||
h = h * (x - xpts[i]) / (xpts[pt] - xpts[i]);
|
||
|
||
return h;
|
||
}
|
||
// collect all grid segments or blocks including ghost and buffer for given patch
|
||
MyList<Parallel::gridseg> *Parallel::build_complete_gsl(Patch *Pat)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>; // delete through destroyList();
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
gs->data->llb[i] = BP->data->bbox[i];
|
||
gs->data->uub[i] = BP->data->bbox[dim + i];
|
||
gs->data->shape[i] = BP->data->shape[i];
|
||
}
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks including ghost and buffer for given patch list
|
||
MyList<Parallel::gridseg> *Parallel::build_complete_gsl(MyList<Patch> *PatL)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
while (PatL)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = build_complete_gsl(PatL->data);
|
||
gs = cgsl;
|
||
while (gs->next)
|
||
gs = gs->next;
|
||
}
|
||
else
|
||
{
|
||
gs->next = build_complete_gsl(PatL->data);
|
||
gs = gs->next;
|
||
while (gs->next)
|
||
gs = gs->next;
|
||
}
|
||
PatL = PatL->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// cellect the information of Patch list
|
||
MyList<Parallel::gridseg> *Parallel::build_complete_gsl_virtual(MyList<Patch> *PatL)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
while (PatL)
|
||
{
|
||
if (cgsl)
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
gs->data->llb[i] = PatL->data->bbox[i];
|
||
gs->data->uub[i] = PatL->data->bbox[dim + i];
|
||
gs->data->shape[i] = PatL->data->shape[i];
|
||
}
|
||
gs->data->Bg = 0;
|
||
gs->next = 0;
|
||
|
||
PatL = PatL->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// cellect the information of Patch list without buffer points
|
||
MyList<Parallel::gridseg> *Parallel::build_complete_gsl_virtual2(MyList<Patch> *PatL) // - buffer
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
while (PatL)
|
||
{
|
||
if (cgsl)
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = PatL->data->getdX(i);
|
||
gs->data->llb[i] = PatL->data->bbox[i] + PatL->data->lli[i] * DH;
|
||
gs->data->uub[i] = PatL->data->bbox[dim + i] - PatL->data->uui[i] * DH;
|
||
gs->data->shape[i] = PatL->data->shape[i] - PatL->data->lli[i] - PatL->data->uui[i];
|
||
}
|
||
gs->data->Bg = 0;
|
||
gs->next = 0;
|
||
|
||
PatL = PatL->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks without ghost for given patch, without extension
|
||
MyList<Parallel::gridseg> *Parallel::build_bulk_gsl(Patch *Pat)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *bp = BP->data;
|
||
if (!cgsl)
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = bp->getdX(i);
|
||
gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH;
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// bulk part for given Block within given patch, without extension
|
||
MyList<Parallel::gridseg> *Parallel::build_bulk_gsl(Block *bp, Patch *Pat)
|
||
{
|
||
MyList<Parallel::gridseg> *gs = 0;
|
||
|
||
gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = bp->getdX(i);
|
||
gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH;
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
gs->data->Bg = bp;
|
||
gs->next = 0;
|
||
|
||
return gs;
|
||
}
|
||
MyList<Parallel::gridseg> *Parallel::clone_gsl(MyList<Parallel::gridseg> *p, bool first_only)
|
||
{
|
||
MyList<Parallel::gridseg> *np = 0, *q = 0, *pq = 0;
|
||
|
||
while (p)
|
||
{
|
||
q = new MyList<Parallel::gridseg>;
|
||
q->data = new Parallel::gridseg;
|
||
q->data->Bg = p->data->Bg;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
q->data->llb[i] = p->data->llb[i];
|
||
q->data->uub[i] = p->data->uub[i];
|
||
q->data->shape[i] = p->data->shape[i];
|
||
}
|
||
if (pq)
|
||
pq->next = q;
|
||
else
|
||
np = q;
|
||
if (first_only)
|
||
{
|
||
np->next = 0;
|
||
return np;
|
||
}
|
||
pq = q;
|
||
p = p->next;
|
||
}
|
||
return np;
|
||
}
|
||
MyList<Parallel::gridseg> *Parallel::gs_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B)
|
||
{
|
||
if (!A)
|
||
return 0;
|
||
if (!B)
|
||
return clone_gsl(A, true);
|
||
|
||
double cut_plane[2 * dim], DH[dim];
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
DH[i] = A->data->Bg->getdX(i);
|
||
if (B->data->Bg && !feq(DH[i], B->data->Bg->getdX(i), DH[i] / 2))
|
||
{
|
||
cout << "Parallel::gs_subtract meets different grid segment " << DH[i] << " vs " << B->data->Bg->getdX(i) << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
}
|
||
|
||
MyList<Parallel::gridseg> *C = 0, *q;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
if (B->data->llb[i] > A->data->uub[i] || B->data->uub[i] < A->data->llb[i])
|
||
return clone_gsl(A, true);
|
||
cut_plane[i] = A->data->llb[i];
|
||
cut_plane[i + dim] = A->data->uub[i];
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
cut_plane[i] = Mymax(A->data->llb[i], B->data->llb[i]);
|
||
if (cut_plane[i] - A->data->llb[i] > DH[i] / 2)
|
||
{
|
||
q = clone_gsl(A, true);
|
||
// prolong the list from head
|
||
if (C)
|
||
q->next = C;
|
||
C = q;
|
||
for (int j = 0; j < dim; j++)
|
||
{
|
||
if (i == j)
|
||
{
|
||
C->data->llb[i] = A->data->llb[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i] - DH[i]);
|
||
#else
|
||
#ifdef Cell
|
||
C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i]);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
else
|
||
{
|
||
C->data->llb[j] = cut_plane[j];
|
||
C->data->uub[j] = cut_plane[j + dim];
|
||
}
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
}
|
||
|
||
cut_plane[i + dim] = Mymin(A->data->uub[i], B->data->uub[i]);
|
||
if (A->data->uub[i] - cut_plane[i + dim] > DH[i] / 2)
|
||
{
|
||
q = clone_gsl(A, true);
|
||
if (C)
|
||
q->next = C;
|
||
C = q;
|
||
for (int j = 0; j < dim; j++)
|
||
{
|
||
if (i == j)
|
||
{
|
||
C->data->uub[i] = A->data->uub[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim] + DH[i]);
|
||
#else
|
||
#ifdef Cell
|
||
C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim]);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
else
|
||
{
|
||
C->data->llb[j] = cut_plane[j];
|
||
C->data->uub[j] = cut_plane[j + dim];
|
||
}
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
}
|
||
}
|
||
return C;
|
||
}
|
||
// stupid method
|
||
/*
|
||
MyList<Parallel::gridseg> *Parallel::gsl_subtract(MyList<Parallel::gridseg> *A,MyList<Parallel::gridseg> *B) //A subtract B but with A's information
|
||
{
|
||
// always make return and A, B distinct
|
||
if(!A) return 0;
|
||
|
||
if(!B) return clone_gsl(A,0);
|
||
|
||
MyList<Parallel::gridseg> *C=0,*C0,*C1,*Cc,*CC0,*gs;
|
||
|
||
while(A)
|
||
{
|
||
C0=gs_subtract(A,B); // note C0 becomes a list after subtraction
|
||
C1=B->next;
|
||
while(C1)
|
||
{
|
||
CC0=C0;
|
||
Cc=0;
|
||
while(CC0)
|
||
{
|
||
gs=gs_subtract(CC0,C1);
|
||
if(Cc) Cc->catList(gs);
|
||
else Cc=gs;
|
||
CC0=CC0->next;
|
||
}
|
||
if(C0) C0->destroyList();
|
||
C0=Cc;
|
||
C1=C1->next;
|
||
}
|
||
if(C) C->catList(C0);
|
||
else C=C0;
|
||
A=A->next;
|
||
}
|
||
|
||
return C;
|
||
}
|
||
*/
|
||
// more clever method
|
||
MyList<Parallel::gridseg> *Parallel::gsl_subtract(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B) // A subtract B but with A's information
|
||
{
|
||
// always make return and A, B distinct
|
||
if (!A)
|
||
return 0;
|
||
|
||
MyList<Parallel::gridseg> *C = 0, *C0, *C1;
|
||
|
||
C = clone_gsl(A, 0);
|
||
|
||
while (B)
|
||
{
|
||
C0 = 0;
|
||
C1 = C;
|
||
while (C1)
|
||
{
|
||
if (C0)
|
||
C0->catList(gs_subtract(C1, B));
|
||
else
|
||
C0 = gs_subtract(C1, B);
|
||
C1 = C1->next;
|
||
}
|
||
if (C)
|
||
C->destroyList();
|
||
else
|
||
{
|
||
if (C0)
|
||
C0->destroyList();
|
||
return 0;
|
||
}
|
||
|
||
C = C0;
|
||
B = B->next;
|
||
}
|
||
|
||
return C;
|
||
}
|
||
MyList<Parallel::gridseg> *Parallel::gs_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B)
|
||
{
|
||
if (!A || !B)
|
||
return 0;
|
||
|
||
double llb[dim], uub[dim];
|
||
bool flag = false;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
llb[i] = Mymax(A->data->llb[i], B->data->llb[i]);
|
||
uub[i] = Mymin(A->data->uub[i], B->data->uub[i]);
|
||
if (llb[i] > uub[i])
|
||
{
|
||
flag = true;
|
||
break;
|
||
}
|
||
}
|
||
if (flag)
|
||
return 0;
|
||
|
||
MyList<Parallel::gridseg> *C;
|
||
C = clone_gsl(A, true);
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
C->data->llb[i] = llb[i];
|
||
C->data->uub[i] = uub[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / C->data->Bg->getdX(i) + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / C->data->Bg->getdX(i) + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
|
||
return C;
|
||
}
|
||
// overlap of A_i and (union of all j of B_j)
|
||
MyList<Parallel::gridseg> *Parallel::gsl_and(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B) // A and B but with A's information
|
||
{
|
||
MyList<Parallel::gridseg> *C = 0, *C1;
|
||
|
||
while (A)
|
||
{
|
||
C1 = B;
|
||
while (C1)
|
||
{
|
||
if (C)
|
||
C->catList(gs_and(A, C1));
|
||
else
|
||
C = gs_and(A, C1);
|
||
C1 = C1->next;
|
||
}
|
||
A = A->next;
|
||
}
|
||
return C;
|
||
}
|
||
// collect all ghost grid segments or blocks for given patch
|
||
MyList<Parallel::gridseg> *Parallel::build_ghost_gsl(Patch *Pat)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs, *gsb;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
gs->data->llb[i] = BP->data->bbox[i];
|
||
gs->data->uub[i] = BP->data->bbox[dim + i];
|
||
gs->data->shape[i] = BP->data->shape[i];
|
||
}
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
|
||
gsb = build_bulk_gsl(BP->data, Pat);
|
||
|
||
if (!cgsl)
|
||
cgsl = gs_subtract(gs, gsb);
|
||
else
|
||
cgsl->catList(gs_subtract(gs, gsb));
|
||
|
||
gsb->destroyList();
|
||
gs->destroyList();
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all ghost grid segments or blocks for given patch list
|
||
MyList<Parallel::gridseg> *Parallel::build_ghost_gsl(MyList<Patch> *PatL)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
while (PatL)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = build_ghost_gsl(PatL->data);
|
||
gs = cgsl;
|
||
while (gs->next)
|
||
gs = gs->next;
|
||
}
|
||
else
|
||
{
|
||
gs->next = build_ghost_gsl(PatL->data);
|
||
gs = gs->next;
|
||
while (gs->next)
|
||
gs = gs->next;
|
||
}
|
||
PatL = PatL->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks without ghost for given patch
|
||
// special for Sync usage, so we do not need consider missing points
|
||
MyList<Parallel::gridseg> *Parallel::build_owned_gsl0(Patch *Pat, int rank_in)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *bp = BP->data;
|
||
if (bp->rank == rank_in)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = bp->getdX(i);
|
||
gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH;
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
}
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks without ghost for given patch
|
||
MyList<Parallel::gridseg> *Parallel::build_owned_gsl1(Patch *Pat, int rank_in)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *bp = BP->data;
|
||
if (bp->rank == rank_in)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = bp->getdX(i);
|
||
gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// NOTE: our dividing structure is (exclude ghost)
|
||
// -1 0
|
||
// 1 2
|
||
// so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to
|
||
// the fortran routine where we always take floor to get index
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + (ghost_width - 1) * DH;
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH;
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
}
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks without ghost nor buffer for given patch
|
||
MyList<Parallel::gridseg> *Parallel::build_owned_gsl2(Patch *Pat, int rank_in)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *bp = BP->data;
|
||
if (bp->rank == rank_in)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = bp->getdX(i);
|
||
gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i] - ghost_width * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// NOTE: our dividing structure is (exclude ghost)
|
||
// -1 0
|
||
// 1 2
|
||
// so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to
|
||
// the fortran routine where we always take floor to get index
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + (ghost_width - 1) * DH;
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + ghost_width * DH;
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
}
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks without ghost for given patch, and delete the ghost_width for interpolation consideration on the patch boundary
|
||
MyList<Parallel::gridseg> *Parallel::build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *bp = BP->data;
|
||
if (bp->rank == rank_in)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = bp->getdX(i);
|
||
gs->data->uub[i] = bp->bbox[dim + i] - ghost_width * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// NOTE: our dividing structure is (exclude ghost)
|
||
// -1 0
|
||
// 1 2
|
||
// so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to
|
||
// the fortran routine where we always take floor to get index
|
||
gs->data->llb[i] = bp->bbox[i] + (ghost_width - 1) * DH;
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->llb[i] = bp->bbox[i] + ghost_width * DH;
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
// Symmetry consideration
|
||
if (Symmetry > 0)
|
||
{
|
||
double DH = bp->getdX(2);
|
||
if (feq(bp->bbox[2], 0, DH / 2))
|
||
{
|
||
gs->data->llb[2] = bp->bbox[2];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
if (Symmetry > 1)
|
||
{
|
||
for (int i = 0; i < 2; i++)
|
||
{
|
||
DH = bp->getdX(i);
|
||
if (feq(bp->bbox[i], 0, DH / 2))
|
||
{
|
||
gs->data->llb[i] = bp->bbox[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
}
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks without ghost nor buffer for given patch,
|
||
// and delete the ghost_width for interpolation consideration on the patch boundary
|
||
MyList<Parallel::gridseg> *Parallel::build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *bp = BP->data;
|
||
if (bp->rank == rank_in)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = bp->getdX(i);
|
||
gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i];
|
||
gs->data->uub[i] -= ghost_width * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// NOTE: our dividing structure is (exclude ghost)
|
||
// -1 0
|
||
// 1 2
|
||
// so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to
|
||
// the fortran routine where we always take floor to get index
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i];
|
||
gs->data->llb[i] += (ghost_width - 1) * DH;
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i];
|
||
gs->data->llb[i] += ghost_width * DH;
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
// Symmetry consideration
|
||
if (Symmetry > 0)
|
||
{
|
||
double DH = bp->getdX(2);
|
||
if (feq(bp->bbox[2], 0, DH / 2))
|
||
{
|
||
gs->data->llb[2] = bp->bbox[2];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
if (Symmetry > 1)
|
||
{
|
||
for (int i = 0; i < 2; i++)
|
||
{
|
||
DH = bp->getdX(i);
|
||
if (feq(bp->bbox[i], 0, DH / 2))
|
||
{
|
||
gs->data->llb[i] = bp->bbox[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
}
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks without ghost nor buffer for given patch, no extention
|
||
MyList<Parallel::gridseg> *Parallel::build_owned_gsl5(Patch *Pat, int rank_in)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *bp = BP->data;
|
||
if (bp->rank == rank_in)
|
||
{
|
||
if (!cgsl)
|
||
{
|
||
cgsl = gs = new MyList<Parallel::gridseg>;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
gs->next = new MyList<Parallel::gridseg>;
|
||
gs = gs->next;
|
||
gs->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = bp->getdX(i);
|
||
gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i] - ghost_width * DH;
|
||
gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + ghost_width * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
gs->data->Bg = BP->data;
|
||
gs->next = 0;
|
||
}
|
||
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// collect all grid segments or blocks without ghost for given patch list
|
||
// stupid method
|
||
/*
|
||
MyList<Parallel::gridseg> *Parallel::build_owned_gsl(MyList<Patch> *PatL,int rank_in,int type,int Symmetry)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl=0,*gs;
|
||
while(PatL)
|
||
{
|
||
if(!cgsl)
|
||
{
|
||
switch(type)
|
||
{
|
||
case 0:
|
||
cgsl = build_owned_gsl0(PatL->data,rank_in);
|
||
break;
|
||
case 1:
|
||
cgsl = build_owned_gsl1(PatL->data,rank_in);
|
||
break;
|
||
case 2:
|
||
cgsl = build_owned_gsl2(PatL->data,rank_in);
|
||
break;
|
||
case 3:
|
||
cgsl = build_owned_gsl3(PatL->data,rank_in,Symmetry);
|
||
break;
|
||
case 4:
|
||
cgsl = build_owned_gsl4(PatL->data,rank_in,Symmetry);
|
||
break;
|
||
case 5:
|
||
cgsl = build_owned_gsl5(PatL->data,rank_in);
|
||
break;
|
||
default:
|
||
cout<<"Parallel::build_owned_gsl : unknown type = "<<type<<endl;
|
||
MPI_Abort(MPI_COMM_WORLD,1);
|
||
}
|
||
gs = cgsl;
|
||
while(gs && gs->next) gs = gs->next;
|
||
}
|
||
else
|
||
{
|
||
switch(type)
|
||
{
|
||
case 0:
|
||
gs->next = build_owned_gsl0(PatL->data,rank_in);
|
||
break;
|
||
case 1:
|
||
gs->next = build_owned_gsl1(PatL->data,rank_in);
|
||
break;
|
||
case 2:
|
||
gs->next = build_owned_gsl2(PatL->data,rank_in);
|
||
break;
|
||
case 3:
|
||
gs->next = build_owned_gsl3(PatL->data,rank_in,Symmetry);
|
||
break;
|
||
case 4:
|
||
gs->next = build_owned_gsl4(PatL->data,rank_in,Symmetry);
|
||
break;
|
||
case 5:
|
||
gs->next = build_owned_gsl5(PatL->data,rank_in);
|
||
break;
|
||
default:
|
||
cout<<"Parallel::build_owned_gsl : unknown type = "<<type<<endl;
|
||
MPI_Abort(MPI_COMM_WORLD,1);
|
||
}
|
||
while(gs && gs->next) gs = gs->next;
|
||
}
|
||
PatL = PatL->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
*/
|
||
// more clever method
|
||
MyList<Parallel::gridseg> *Parallel::build_owned_gsl(MyList<Patch> *PatL, int rank_in, int type, int Symmetry)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
while (PatL)
|
||
{
|
||
switch (type)
|
||
{
|
||
case 0:
|
||
gs = build_owned_gsl0(PatL->data, rank_in);
|
||
break;
|
||
case 1:
|
||
gs = build_owned_gsl1(PatL->data, rank_in);
|
||
break;
|
||
case 2:
|
||
gs = build_owned_gsl2(PatL->data, rank_in);
|
||
break;
|
||
case 3:
|
||
gs = build_owned_gsl3(PatL->data, rank_in, Symmetry);
|
||
break;
|
||
case 4:
|
||
gs = build_owned_gsl4(PatL->data, rank_in, Symmetry);
|
||
break;
|
||
case 5:
|
||
gs = build_owned_gsl5(PatL->data, rank_in);
|
||
break;
|
||
default:
|
||
cout << "Parallel::build_owned_gsl : unknown type = " << type << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
if (cgsl)
|
||
cgsl->catList(gs);
|
||
else
|
||
cgsl = gs;
|
||
PatL = PatL->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// according to overlape to determine real grid segments
|
||
void Parallel::build_gstl(MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst)
|
||
{
|
||
*out_src = *out_dst = 0;
|
||
|
||
if (!srci || !dsti)
|
||
return;
|
||
|
||
MyList<Parallel::gridseg> *s, *d;
|
||
MyList<Parallel::gridseg> *s2, *d2;
|
||
|
||
double llb[dim], uub[dim];
|
||
|
||
s = srci;
|
||
while (s)
|
||
{
|
||
Parallel::gridseg *sd = s->data;
|
||
d = dsti;
|
||
while (d)
|
||
{
|
||
Parallel::gridseg *dd = d->data;
|
||
bool flag = true;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i);
|
||
llb[i] = Mymax(sd->llb[i], dd->llb[i]);
|
||
uub[i] = Mymin(sd->uub[i], dd->uub[i]);
|
||
// make sure the region boundary is consistent to the grids
|
||
// here we only judge if the domain is empty, so do not need to adjust the align
|
||
double lb = llb[i], ub = uub[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// ---*---
|
||
// x-------x
|
||
// if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) ub = uub[i]-SH/2;
|
||
// else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) ub = uub[i]-DH/2;
|
||
// if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) lb = llb[i]+SH/2;
|
||
// else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) lb = llb[i]+DH/2;
|
||
if (lb > ub + Mymin(SH, DH) / 2)
|
||
{
|
||
flag = false;
|
||
break;
|
||
} // special for isolated point
|
||
#else
|
||
#ifdef Cell
|
||
// |------|
|
||
// |-------------|
|
||
// if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) ub = uub[i]+SH/2;
|
||
// else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) ub = uub[i]+DH/2;
|
||
// |------|
|
||
// |-------------|
|
||
// if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) lb = llb[i]-SH/2;
|
||
// else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) lb = llb[i]-DH/2;
|
||
if (ub - lb < Mymin(SH, DH) / 2)
|
||
{
|
||
flag = false;
|
||
break;
|
||
} // even for isolated point, it has a cell belong to it
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
|
||
if (flag)
|
||
{
|
||
if (!(*out_src))
|
||
{
|
||
*out_src = s2 = new MyList<Parallel::gridseg>;
|
||
*out_dst = d2 = new MyList<Parallel::gridseg>;
|
||
s2->data = new Parallel::gridseg;
|
||
d2->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
s2->next = new MyList<Parallel::gridseg>;
|
||
s2 = s2->next;
|
||
d2->next = new MyList<Parallel::gridseg>;
|
||
d2 = d2->next;
|
||
s2->data = new Parallel::gridseg;
|
||
d2->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i);
|
||
s2->data->llb[i] = d2->data->llb[i] = llb[i];
|
||
s2->data->uub[i] = d2->data->uub[i] = uub[i];
|
||
// using float method to count point, we do not need following consideration (2012 nov 17)
|
||
#if 1
|
||
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
// old code distuinguish vertex and cell
|
||
// if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) s2->data->uub[i] = uub[i]-SH/2;
|
||
// else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) d2->data->uub[i] = uub[i]-DH/2;
|
||
// if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) s2->data->llb[i] = llb[i]+SH/2;
|
||
// else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) d2->data->llb[i] = llb[i]+DH/2;
|
||
// new code: here we concern much more about missing point, because overlaping domain has been gaureented above
|
||
if (int(2 * (sd->uub[i] - uub[i]) / SH + 0.4) % 2 == 1)
|
||
s2->data->uub[i] = uub[i] + SH / 2;
|
||
else if (int(2 * (dd->uub[i] - uub[i]) / DH + 0.4) % 2 == 1)
|
||
d2->data->uub[i] = uub[i] + DH / 2;
|
||
if (int(2 * (llb[i] - sd->llb[i]) / SH + 0.4) % 2 == 1)
|
||
s2->data->llb[i] = llb[i] - SH / 2;
|
||
else if (int(2 * (llb[i] - dd->llb[i]) / DH + 0.4) % 2 == 1)
|
||
d2->data->llb[i] = llb[i] - DH / 2;
|
||
s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4) + 1;
|
||
d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
if (int(2 * (sd->uub[i] - uub[i]) / SH + 0.4) % 2 == 1)
|
||
s2->data->uub[i] = uub[i] + SH / 2;
|
||
else if (int(2 * (dd->uub[i] - uub[i]) / DH + 0.4) % 2 == 1)
|
||
d2->data->uub[i] = uub[i] + DH / 2;
|
||
if (int(2 * (llb[i] - sd->llb[i]) / SH + 0.4) % 2 == 1)
|
||
s2->data->llb[i] = llb[i] - SH / 2;
|
||
else if (int(2 * (llb[i] - dd->llb[i]) / DH + 0.4) % 2 == 1)
|
||
d2->data->llb[i] = llb[i] - DH / 2;
|
||
s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4);
|
||
d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
|
||
#endif
|
||
s2->data->illb[i] = sd->illb[i];
|
||
d2->data->illb[i] = dd->illb[i];
|
||
s2->data->iuub[i] = sd->iuub[i];
|
||
d2->data->iuub[i] = dd->iuub[i];
|
||
}
|
||
s2->data->Bg = sd->Bg;
|
||
s2->next = 0;
|
||
d2->data->Bg = dd->Bg;
|
||
d2->next = 0;
|
||
}
|
||
d = d->next;
|
||
}
|
||
s = s->next;
|
||
}
|
||
}
|
||
// PACK: prepare target data in 'data'
|
||
// UNPACK: copy target data from 'data' to corresponding numerical grids
|
||
int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst, int rank_in, int dir,
|
||
MyList<var> *VarLists /* source */, MyList<var> *VarListd /* target */, int Symmetry)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
int DIM = dim;
|
||
|
||
if (dir != PACK && dir != UNPACK)
|
||
{
|
||
cout << "error dir " << dir << " for data_packer " << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
int size_out = 0;
|
||
|
||
if (!src || !dst)
|
||
return size_out;
|
||
|
||
MyList<var> *varls, *varld;
|
||
|
||
varls = VarLists;
|
||
varld = VarListd;
|
||
while (varls && varld)
|
||
{
|
||
varls = varls->next;
|
||
varld = varld->next;
|
||
}
|
||
|
||
if (varls || varld)
|
||
{
|
||
cout << "error in short data packer, var lists does not match." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
int type; /* 1 copy, 2 restrict, 3 prolong */
|
||
if (src->data->Bg->lev == dst->data->Bg->lev)
|
||
type = 1;
|
||
else if (src->data->Bg->lev > dst->data->Bg->lev)
|
||
type = 2;
|
||
else
|
||
type = 3;
|
||
|
||
while (src && dst)
|
||
{
|
||
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
|
||
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank))
|
||
{
|
||
varls = VarLists;
|
||
varld = VarListd;
|
||
while (varls && varld)
|
||
{
|
||
if (data)
|
||
{
|
||
if (dir == PACK)
|
||
switch (type)
|
||
{
|
||
// attention must be paied to the difference between src's llb,uub and dst's llb,uub
|
||
case 1:
|
||
f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
||
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
||
dst->data->llb, dst->data->uub);
|
||
break;
|
||
case 2:
|
||
f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
||
src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
||
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
|
||
break;
|
||
case 3:
|
||
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
||
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
||
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
|
||
}
|
||
if (dir == UNPACK) // from target data to corresponding grid
|
||
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
|
||
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
||
dst->data->llb, dst->data->uub);
|
||
}
|
||
size_out += dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2];
|
||
varls = varls->next;
|
||
varld = varld->next;
|
||
}
|
||
}
|
||
dst = dst->next;
|
||
src = src->next;
|
||
}
|
||
|
||
return size_out;
|
||
}
|
||
int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst, int rank_in, int dir,
|
||
MyList<var> *VarLists /* source */, MyList<var> *VarListd /* target */, int Symmetry)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
int DIM = dim;
|
||
|
||
if (dir != PACK && dir != UNPACK)
|
||
{
|
||
cout << "Parallel::data_packermix: error dir " << dir << " for data_packermix." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
int size_out = 0;
|
||
|
||
if (!src || !dst)
|
||
return size_out;
|
||
|
||
MyList<var> *varls, *varld;
|
||
|
||
varls = VarLists;
|
||
varld = VarListd;
|
||
while (varls && varld)
|
||
{
|
||
varls = varls->next;
|
||
varld = varld->next;
|
||
}
|
||
|
||
if (varls || varld)
|
||
{
|
||
cout << "error in short data packer, var lists does not match." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
int type; /* 1 copy, 2 restrict, 3 prolong */
|
||
if (src->data->Bg->lev == dst->data->Bg->lev)
|
||
type = 1;
|
||
else if (src->data->Bg->lev > dst->data->Bg->lev)
|
||
type = 2;
|
||
else
|
||
type = 3;
|
||
|
||
if (type != 3)
|
||
{
|
||
cout << "Parallel::data_packermix: error type " << type << " for data_packermix." << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
while (src && dst)
|
||
{
|
||
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
|
||
(dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank))
|
||
{
|
||
varls = VarLists;
|
||
varld = VarListd;
|
||
while (varls && varld)
|
||
{
|
||
if (data)
|
||
{
|
||
if (dir == PACK)
|
||
f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
||
dst->data->llb, dst->data->uub, src->data->shape, data + size_out,
|
||
src->data->llb, src->data->uub, varls->data->SoA, Symmetry);
|
||
if (dir == UNPACK) // from target data to corresponding grid
|
||
f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
|
||
src->data->llb, src->data->uub, src->data->shape, data + size_out,
|
||
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry, dst->data->illb, dst->data->iuub);
|
||
}
|
||
// the symmetry problem should be dealt in prolongcopy3,
|
||
// so we always have ghost_width for both sides
|
||
size_out += (src->data->shape[0] + 2 * ghost_width) * (src->data->shape[1] + 2 * ghost_width) * (src->data->shape[2] + 2 * ghost_width);
|
||
varls = varls->next;
|
||
varld = varld->next;
|
||
}
|
||
}
|
||
dst = dst->next;
|
||
src = src->next;
|
||
}
|
||
|
||
return size_out;
|
||
}
|
||
//
|
||
void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
||
int Symmetry)
|
||
{
|
||
int myrank, cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
int node;
|
||
|
||
MPI_Request *reqs;
|
||
MPI_Status *stats;
|
||
reqs = new MPI_Request[2 * cpusize];
|
||
stats = new MPI_Status[2 * cpusize];
|
||
int req_no = 0;
|
||
|
||
double **send_data, **rec_data;
|
||
send_data = new double *[cpusize];
|
||
rec_data = new double *[cpusize];
|
||
int length;
|
||
|
||
for (node = 0; node < cpusize; node++)
|
||
{
|
||
send_data[node] = rec_data[node] = 0;
|
||
if (node == myrank)
|
||
{
|
||
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
||
{
|
||
rec_data[node] = new double[length];
|
||
if (!rec_data[node])
|
||
{
|
||
cout << "out of memory when new in short transfer, place 1" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
// send from this cpu to cpu#node
|
||
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
||
{
|
||
send_data[node] = new double[length];
|
||
if (!send_data[node])
|
||
{
|
||
cout << "out of memory when new in short transfer, place 2" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
||
}
|
||
// receive from cpu#node to this cpu
|
||
if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
|
||
{
|
||
rec_data[node] = new double[length];
|
||
if (!rec_data[node])
|
||
{
|
||
cout << "out of memory when new in short transfer, place 3" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
||
}
|
||
}
|
||
}
|
||
// wait for all requests to complete
|
||
MPI_Waitall(req_no, reqs, stats);
|
||
|
||
for (node = 0; node < cpusize; node++)
|
||
if (rec_data[node])
|
||
data_packer(rec_data[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
||
|
||
for (node = 0; node < cpusize; node++)
|
||
{
|
||
if (send_data[node])
|
||
delete[] send_data[node];
|
||
if (rec_data[node])
|
||
delete[] rec_data[node];
|
||
}
|
||
|
||
delete[] reqs;
|
||
delete[] stats;
|
||
delete[] send_data;
|
||
delete[] rec_data;
|
||
}
|
||
//
|
||
void Parallel::transfermix(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
|
||
int Symmetry)
|
||
{
|
||
int myrank, cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
int node;
|
||
|
||
MPI_Request *reqs;
|
||
MPI_Status *stats;
|
||
reqs = new MPI_Request[2 * cpusize];
|
||
stats = new MPI_Status[2 * cpusize];
|
||
int req_no = 0;
|
||
|
||
double **send_data, **rec_data;
|
||
send_data = new double *[cpusize];
|
||
rec_data = new double *[cpusize];
|
||
int length;
|
||
|
||
for (node = 0; node < cpusize; node++)
|
||
{
|
||
send_data[node] = rec_data[node] = 0;
|
||
if (node == myrank)
|
||
{
|
||
if (length = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
||
{
|
||
rec_data[node] = new double[length];
|
||
if (!rec_data[node])
|
||
{
|
||
cout << "out of memory when new in short transfer, place 1" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
data_packermix(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
// send from this cpu to cpu#node
|
||
if (length = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
|
||
{
|
||
send_data[node] = new double[length];
|
||
if (!send_data[node])
|
||
{
|
||
cout << "out of memory when new in short transfer, place 2" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
data_packermix(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
||
}
|
||
// receive from cpu#node to this cpu
|
||
if (length = data_packermix(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
|
||
{
|
||
rec_data[node] = new double[length];
|
||
if (!rec_data[node])
|
||
{
|
||
cout << "out of memory when new in short transfer, place 3" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
|
||
}
|
||
}
|
||
}
|
||
// wait for all requests to complete
|
||
MPI_Waitall(req_no, reqs, stats);
|
||
|
||
for (node = 0; node < cpusize; node++)
|
||
if (rec_data[node])
|
||
data_packermix(rec_data[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
||
|
||
for (node = 0; node < cpusize; node++)
|
||
{
|
||
if (send_data[node])
|
||
delete[] send_data[node];
|
||
if (rec_data[node])
|
||
delete[] rec_data[node];
|
||
}
|
||
|
||
delete[] reqs;
|
||
delete[] stats;
|
||
delete[] send_data;
|
||
delete[] rec_data;
|
||
}
|
||
void Parallel::Sync(Patch *Pat, MyList<var> *VarList, int Symmetry)
|
||
{
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_ghost_gsl(Pat); // ghost region only
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl0(Pat, node); // for the part without ghost points and do not extend
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer_src[node], data locate on cpu#node;
|
||
// but for transfer_dst[node] the data may locate on any node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
void Parallel::Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry)
|
||
{
|
||
// Patch inner Synch
|
||
MyList<Patch> *Pp = PatL;
|
||
while (Pp)
|
||
{
|
||
Sync(Pp->data, VarList, Symmetry);
|
||
Pp = Pp->next;
|
||
}
|
||
|
||
// Patch inter Synch
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_buffer_gsl(PatL); // buffer region only
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl(PatL, node, 5, Symmetry); // for the part without ghost nor buffer points and do not extend
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
// Merged Sync: collect all intra-patch and inter-patch grid segment lists,
|
||
// then issue a single transfer() call instead of N+1 separate ones.
|
||
void Parallel::Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry)
|
||
{
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> **combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
MyList<Parallel::gridseg> **combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
for (int node = 0; node < cpusize; node++)
|
||
combined_src[node] = combined_dst[node] = 0;
|
||
|
||
// Phase A: Intra-patch ghost exchange segments
|
||
MyList<Patch> *Pp = PatL;
|
||
while (Pp)
|
||
{
|
||
Patch *Pat = Pp->data;
|
||
MyList<Parallel::gridseg> *dst_ghost = build_ghost_gsl(Pat);
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl0(Pat, node);
|
||
MyList<Parallel::gridseg> *tsrc = 0, *tdst = 0;
|
||
build_gstl(src_owned, dst_ghost, &tsrc, &tdst);
|
||
|
||
if (tsrc)
|
||
{
|
||
if (combined_src[node])
|
||
combined_src[node]->catList(tsrc);
|
||
else
|
||
combined_src[node] = tsrc;
|
||
}
|
||
if (tdst)
|
||
{
|
||
if (combined_dst[node])
|
||
combined_dst[node]->catList(tdst);
|
||
else
|
||
combined_dst[node] = tdst;
|
||
}
|
||
|
||
if (src_owned)
|
||
src_owned->destroyList();
|
||
}
|
||
|
||
if (dst_ghost)
|
||
dst_ghost->destroyList();
|
||
|
||
Pp = Pp->next;
|
||
}
|
||
|
||
// Phase B: Inter-patch buffer exchange segments
|
||
MyList<Parallel::gridseg> *dst_buffer = build_buffer_gsl(PatL);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatL, node, 5, Symmetry);
|
||
MyList<Parallel::gridseg> *tsrc = 0, *tdst = 0;
|
||
build_gstl(src_owned, dst_buffer, &tsrc, &tdst);
|
||
|
||
if (tsrc)
|
||
{
|
||
if (combined_src[node])
|
||
combined_src[node]->catList(tsrc);
|
||
else
|
||
combined_src[node] = tsrc;
|
||
}
|
||
if (tdst)
|
||
{
|
||
if (combined_dst[node])
|
||
combined_dst[node]->catList(tdst);
|
||
else
|
||
combined_dst[node] = tdst;
|
||
}
|
||
|
||
if (src_owned)
|
||
src_owned->destroyList();
|
||
}
|
||
if (dst_buffer)
|
||
dst_buffer->destroyList();
|
||
|
||
// Phase C: Single transfer
|
||
transfer(combined_src, combined_dst, VarList, VarList, Symmetry);
|
||
|
||
// Phase D: Cleanup
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (combined_src[node])
|
||
combined_src[node]->destroyList();
|
||
if (combined_dst[node])
|
||
combined_dst[node]->destroyList();
|
||
}
|
||
delete[] combined_src;
|
||
delete[] combined_dst;
|
||
}
|
||
// SyncCache constructor
|
||
Parallel::SyncCache::SyncCache()
|
||
: valid(false), cpusize(0), combined_src(0), combined_dst(0),
|
||
send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0),
|
||
send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0),
|
||
lengths_valid(false)
|
||
{
|
||
}
|
||
// SyncCache invalidate: free grid segment lists but keep buffers
|
||
void Parallel::SyncCache::invalidate()
|
||
{
|
||
if (!valid)
|
||
return;
|
||
for (int i = 0; i < cpusize; i++)
|
||
{
|
||
if (combined_src[i])
|
||
combined_src[i]->destroyList();
|
||
if (combined_dst[i])
|
||
combined_dst[i]->destroyList();
|
||
combined_src[i] = combined_dst[i] = 0;
|
||
send_lengths[i] = recv_lengths[i] = 0;
|
||
}
|
||
valid = false;
|
||
lengths_valid = false;
|
||
}
|
||
// SyncCache destroy: free everything
|
||
void Parallel::SyncCache::destroy()
|
||
{
|
||
invalidate();
|
||
if (combined_src) delete[] combined_src;
|
||
if (combined_dst) delete[] combined_dst;
|
||
if (send_lengths) delete[] send_lengths;
|
||
if (recv_lengths) delete[] recv_lengths;
|
||
if (send_buf_caps) delete[] send_buf_caps;
|
||
if (recv_buf_caps) delete[] recv_buf_caps;
|
||
for (int i = 0; i < cpusize; i++)
|
||
{
|
||
if (send_bufs && send_bufs[i]) delete[] send_bufs[i];
|
||
if (recv_bufs && recv_bufs[i]) delete[] recv_bufs[i];
|
||
}
|
||
if (send_bufs) delete[] send_bufs;
|
||
if (recv_bufs) delete[] recv_bufs;
|
||
if (reqs) delete[] reqs;
|
||
if (stats) delete[] stats;
|
||
combined_src = combined_dst = 0;
|
||
send_lengths = recv_lengths = 0;
|
||
send_buf_caps = recv_buf_caps = 0;
|
||
send_bufs = recv_bufs = 0;
|
||
reqs = 0; stats = 0;
|
||
cpusize = 0; max_reqs = 0;
|
||
}
|
||
// transfer_cached: reuse pre-allocated buffers from SyncCache
|
||
void Parallel::transfer_cached(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
|
||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||
int Symmetry, SyncCache &cache)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cache.cpusize);
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
int cpusize = cache.cpusize;
|
||
|
||
int req_no = 0;
|
||
int node;
|
||
|
||
for (node = 0; node < cpusize; node++)
|
||
{
|
||
if (node == myrank)
|
||
{
|
||
int length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
cache.recv_lengths[node] = length;
|
||
if (length > 0)
|
||
{
|
||
if (length > cache.recv_buf_caps[node])
|
||
{
|
||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||
cache.recv_bufs[node] = new double[length];
|
||
cache.recv_buf_caps[node] = length;
|
||
}
|
||
data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
// send
|
||
int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
cache.send_lengths[node] = slength;
|
||
if (slength > 0)
|
||
{
|
||
if (slength > cache.send_buf_caps[node])
|
||
{
|
||
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
|
||
cache.send_bufs[node] = new double[slength];
|
||
cache.send_buf_caps[node] = slength;
|
||
}
|
||
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
|
||
}
|
||
// recv
|
||
int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
||
cache.recv_lengths[node] = rlength;
|
||
if (rlength > 0)
|
||
{
|
||
if (rlength > cache.recv_buf_caps[node])
|
||
{
|
||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||
cache.recv_bufs[node] = new double[rlength];
|
||
cache.recv_buf_caps[node] = rlength;
|
||
}
|
||
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
|
||
}
|
||
}
|
||
}
|
||
|
||
MPI_Waitall(req_no, cache.reqs, cache.stats);
|
||
|
||
for (node = 0; node < cpusize; node++)
|
||
if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
|
||
data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
||
}
|
||
// Sync_cached: build grid segment lists on first call, reuse on subsequent calls
|
||
void Parallel::Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache)
|
||
{
|
||
if (!cache.valid)
|
||
{
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
cache.cpusize = cpusize;
|
||
|
||
// Allocate cache arrays if needed
|
||
if (!cache.combined_src)
|
||
{
|
||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.send_lengths = new int[cpusize];
|
||
cache.recv_lengths = new int[cpusize];
|
||
cache.send_bufs = new double *[cpusize];
|
||
cache.recv_bufs = new double *[cpusize];
|
||
cache.send_buf_caps = new int[cpusize];
|
||
cache.recv_buf_caps = new int[cpusize];
|
||
for (int i = 0; i < cpusize; i++)
|
||
{
|
||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
||
}
|
||
cache.max_reqs = 2 * cpusize;
|
||
cache.reqs = new MPI_Request[cache.max_reqs];
|
||
cache.stats = new MPI_Status[cache.max_reqs];
|
||
}
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
cache.combined_src[node] = cache.combined_dst[node] = 0;
|
||
cache.send_lengths[node] = cache.recv_lengths[node] = 0;
|
||
}
|
||
|
||
// Build intra-patch segments (same as Sync_merged Phase A)
|
||
MyList<Patch> *Pp = PatL;
|
||
while (Pp)
|
||
{
|
||
Patch *Pat = Pp->data;
|
||
MyList<Parallel::gridseg> *dst_ghost = build_ghost_gsl(Pat);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl0(Pat, node);
|
||
MyList<Parallel::gridseg> *tsrc = 0, *tdst = 0;
|
||
build_gstl(src_owned, dst_ghost, &tsrc, &tdst);
|
||
if (tsrc)
|
||
{
|
||
if (cache.combined_src[node])
|
||
cache.combined_src[node]->catList(tsrc);
|
||
else
|
||
cache.combined_src[node] = tsrc;
|
||
}
|
||
if (tdst)
|
||
{
|
||
if (cache.combined_dst[node])
|
||
cache.combined_dst[node]->catList(tdst);
|
||
else
|
||
cache.combined_dst[node] = tdst;
|
||
}
|
||
if (src_owned) src_owned->destroyList();
|
||
}
|
||
if (dst_ghost) dst_ghost->destroyList();
|
||
Pp = Pp->next;
|
||
}
|
||
|
||
// Build inter-patch segments (same as Sync_merged Phase B)
|
||
MyList<Parallel::gridseg> *dst_buffer = build_buffer_gsl(PatL);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatL, node, 5, Symmetry);
|
||
MyList<Parallel::gridseg> *tsrc = 0, *tdst = 0;
|
||
build_gstl(src_owned, dst_buffer, &tsrc, &tdst);
|
||
if (tsrc)
|
||
{
|
||
if (cache.combined_src[node])
|
||
cache.combined_src[node]->catList(tsrc);
|
||
else
|
||
cache.combined_src[node] = tsrc;
|
||
}
|
||
if (tdst)
|
||
{
|
||
if (cache.combined_dst[node])
|
||
cache.combined_dst[node]->catList(tdst);
|
||
else
|
||
cache.combined_dst[node] = tdst;
|
||
}
|
||
if (src_owned) src_owned->destroyList();
|
||
}
|
||
if (dst_buffer) dst_buffer->destroyList();
|
||
|
||
cache.valid = true;
|
||
}
|
||
|
||
// Use cached lists with buffer-reusing transfer
|
||
transfer_cached(cache.combined_src, cache.combined_dst, VarList, VarList, Symmetry, cache);
|
||
}
|
||
// Sync_start: pack and post MPI_Isend/Irecv, return immediately
|
||
void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
|
||
SyncCache &cache, AsyncSyncState &state)
|
||
{
|
||
// Ensure cache is built
|
||
if (!cache.valid)
|
||
{
|
||
// Build cache (same logic as Sync_cached)
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
cache.cpusize = cpusize;
|
||
|
||
if (!cache.combined_src)
|
||
{
|
||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.send_lengths = new int[cpusize];
|
||
cache.recv_lengths = new int[cpusize];
|
||
cache.send_bufs = new double *[cpusize];
|
||
cache.recv_bufs = new double *[cpusize];
|
||
cache.send_buf_caps = new int[cpusize];
|
||
cache.recv_buf_caps = new int[cpusize];
|
||
for (int i = 0; i < cpusize; i++)
|
||
{
|
||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
||
}
|
||
cache.max_reqs = 2 * cpusize;
|
||
cache.reqs = new MPI_Request[cache.max_reqs];
|
||
cache.stats = new MPI_Status[cache.max_reqs];
|
||
}
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
cache.combined_src[node] = cache.combined_dst[node] = 0;
|
||
cache.send_lengths[node] = cache.recv_lengths[node] = 0;
|
||
}
|
||
|
||
MyList<Patch> *Pp = PatL;
|
||
while (Pp)
|
||
{
|
||
Patch *Pat = Pp->data;
|
||
MyList<Parallel::gridseg> *dst_ghost = build_ghost_gsl(Pat);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl0(Pat, node);
|
||
MyList<Parallel::gridseg> *tsrc = 0, *tdst = 0;
|
||
build_gstl(src_owned, dst_ghost, &tsrc, &tdst);
|
||
if (tsrc)
|
||
{
|
||
if (cache.combined_src[node])
|
||
cache.combined_src[node]->catList(tsrc);
|
||
else
|
||
cache.combined_src[node] = tsrc;
|
||
}
|
||
if (tdst)
|
||
{
|
||
if (cache.combined_dst[node])
|
||
cache.combined_dst[node]->catList(tdst);
|
||
else
|
||
cache.combined_dst[node] = tdst;
|
||
}
|
||
if (src_owned) src_owned->destroyList();
|
||
}
|
||
if (dst_ghost) dst_ghost->destroyList();
|
||
Pp = Pp->next;
|
||
}
|
||
|
||
MyList<Parallel::gridseg> *dst_buffer = build_buffer_gsl(PatL);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatL, node, 5, Symmetry);
|
||
MyList<Parallel::gridseg> *tsrc = 0, *tdst = 0;
|
||
build_gstl(src_owned, dst_buffer, &tsrc, &tdst);
|
||
if (tsrc)
|
||
{
|
||
if (cache.combined_src[node])
|
||
cache.combined_src[node]->catList(tsrc);
|
||
else
|
||
cache.combined_src[node] = tsrc;
|
||
}
|
||
if (tdst)
|
||
{
|
||
if (cache.combined_dst[node])
|
||
cache.combined_dst[node]->catList(tdst);
|
||
else
|
||
cache.combined_dst[node] = tdst;
|
||
}
|
||
if (src_owned) src_owned->destroyList();
|
||
}
|
||
if (dst_buffer) dst_buffer->destroyList();
|
||
cache.valid = true;
|
||
}
|
||
|
||
// Now pack and post async MPI operations
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
int cpusize = cache.cpusize;
|
||
state.req_no = 0;
|
||
state.active = true;
|
||
|
||
MyList<Parallel::gridseg> **src = cache.combined_src;
|
||
MyList<Parallel::gridseg> **dst = cache.combined_dst;
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (node == myrank)
|
||
{
|
||
int length;
|
||
if (!cache.lengths_valid) {
|
||
length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||
cache.recv_lengths[node] = length;
|
||
} else {
|
||
length = cache.recv_lengths[node];
|
||
}
|
||
if (length > 0)
|
||
{
|
||
if (length > cache.recv_buf_caps[node])
|
||
{
|
||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||
cache.recv_bufs[node] = new double[length];
|
||
cache.recv_buf_caps[node] = length;
|
||
}
|
||
data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
int slength;
|
||
if (!cache.lengths_valid) {
|
||
slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||
cache.send_lengths[node] = slength;
|
||
} else {
|
||
slength = cache.send_lengths[node];
|
||
}
|
||
if (slength > 0)
|
||
{
|
||
if (slength > cache.send_buf_caps[node])
|
||
{
|
||
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
|
||
cache.send_bufs[node] = new double[slength];
|
||
cache.send_buf_caps[node] = slength;
|
||
}
|
||
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
||
}
|
||
int rlength;
|
||
if (!cache.lengths_valid) {
|
||
rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry);
|
||
cache.recv_lengths[node] = rlength;
|
||
} else {
|
||
rlength = cache.recv_lengths[node];
|
||
}
|
||
if (rlength > 0)
|
||
{
|
||
if (rlength > cache.recv_buf_caps[node])
|
||
{
|
||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||
cache.recv_bufs[node] = new double[rlength];
|
||
cache.recv_buf_caps[node] = rlength;
|
||
}
|
||
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
||
}
|
||
}
|
||
}
|
||
cache.lengths_valid = true;
|
||
}
|
||
// Sync_finish: wait for async MPI operations and unpack
|
||
void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state,
|
||
MyList<var> *VarList, int Symmetry)
|
||
{
|
||
if (!state.active)
|
||
return;
|
||
|
||
MPI_Waitall(state.req_no, cache.reqs, cache.stats);
|
||
|
||
int cpusize = cache.cpusize;
|
||
MyList<Parallel::gridseg> **src = cache.combined_src;
|
||
MyList<Parallel::gridseg> **dst = cache.combined_dst;
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
|
||
data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry);
|
||
|
||
state.active = false;
|
||
}
|
||
// collect buffer grid segments or blocks for the periodic boundary condition of given patch
|
||
// ---------------------------------------------------
|
||
// |con | |con |
|
||
// |ner | PhysBD |ner |
|
||
// |-------------------------------------------------|
|
||
// | | | |
|
||
// |Phy | |Phy |
|
||
// |sBD | |BD |
|
||
// | | | |
|
||
// | | | |
|
||
// | | | |
|
||
// |-------------------------------------------------|
|
||
// |con | PhysBD |con |
|
||
// |ner | |ner |
|
||
// ---------------------------------------------------
|
||
// first order derivetive does not need conner information,
|
||
// but second order derivative needs!
|
||
/* the following code does not include conner part
|
||
MyList<Parallel::gridseg> *Parallel::build_PhysBD_gsl(Patch *Pat)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl,*gsc,*gsb=0,*p;
|
||
gsc = build_ghost_gsl(Pat);
|
||
for(int i=0;i<dim;i++)
|
||
{
|
||
double DH = gsc->data->Bg->getdX(i);
|
||
// lower boundary
|
||
if(gsb)
|
||
{
|
||
p = new MyList<Parallel::gridseg>;
|
||
p->data = new Parallel::gridseg;
|
||
p->next=gsb;
|
||
gsb=p;
|
||
}
|
||
else
|
||
{
|
||
gsb = new MyList<Parallel::gridseg>;
|
||
gsb->data = new Parallel::gridseg;
|
||
gsb->next=0;
|
||
}
|
||
for(int j=0;j<dim;j++)
|
||
{
|
||
if(i == j)
|
||
{
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gsb->data->llb[i] = Pat->bbox[i]-ghost_width*DH;
|
||
gsb->data->uub[i] = Pat->bbox[i]-DH;
|
||
#else
|
||
#ifdef Cell
|
||
gsb->data->llb[i] = Pat->bbox[i]-ghost_width*DH;
|
||
gsb->data->uub[i] = Pat->bbox[i];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
gsb->data->shape[i] = ghost_width;
|
||
}
|
||
else
|
||
{
|
||
gsb->data->llb[j] = Pat->bbox[j];
|
||
gsb->data->uub[j] = Pat->bbox[j+dim];
|
||
gsb->data->shape[j] = Pat->shape[j];
|
||
}
|
||
}
|
||
gsb->data->Bg = 0; //vertual grid segment
|
||
// upper boundary
|
||
p = new MyList<Parallel::gridseg>;
|
||
p->data = new Parallel::gridseg;
|
||
p->next=gsb;
|
||
gsb=p;
|
||
for(int j=0;j<dim;j++)
|
||
{
|
||
if(i == j)
|
||
{
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gsb->data->llb[i] = Pat->bbox[i+dim]+DH;
|
||
gsb->data->uub[i] = Pat->bbox[i+dim]+ghost_width*DH;
|
||
#else
|
||
#ifdef Cell
|
||
gsb->data->llb[i] = Pat->bbox[i+dim];
|
||
gsb->data->uub[i] = Pat->bbox[i+dim]+ghost_width*DH;
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
gsb->data->shape[i] = ghost_width;
|
||
}
|
||
else
|
||
{
|
||
gsb->data->llb[j] = Pat->bbox[j];
|
||
gsb->data->uub[j] = Pat->bbox[j+dim];
|
||
gsb->data->shape[j] = Pat->shape[j];
|
||
}
|
||
}
|
||
gsb->data->Bg = 0; //vertual grid segment
|
||
}
|
||
|
||
cgsl = gsl_and(gsc,gsb);
|
||
|
||
gsc->destroyList();
|
||
gsb->destroyList();
|
||
|
||
return cgsl;
|
||
}
|
||
*/
|
||
// the following code includes conner part
|
||
MyList<Parallel::gridseg> *Parallel::build_PhysBD_gsl(Patch *Pat)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl, *gsc, *gsb = 0, *p;
|
||
|
||
gsc = build_complete_gsl(Pat);
|
||
|
||
gsb = new MyList<Parallel::gridseg>;
|
||
gsb->data = new Parallel::gridseg;
|
||
gsb->next = 0;
|
||
gsb->data->Bg = 0;
|
||
|
||
for (int j = 0; j < dim; j++)
|
||
{
|
||
gsb->data->llb[j] = Pat->bbox[j];
|
||
gsb->data->uub[j] = Pat->bbox[j + dim];
|
||
gsb->data->shape[j] = Pat->shape[j];
|
||
}
|
||
|
||
p = gsl_subtract(gsc, gsb);
|
||
|
||
gsc->destroyList();
|
||
gsb->destroyList();
|
||
|
||
cgsl = divide_gsl(p, Pat);
|
||
|
||
p->destroyList();
|
||
|
||
return cgsl;
|
||
}
|
||
MyList<Parallel::gridseg> *Parallel::divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0;
|
||
while (p)
|
||
{
|
||
if (cgsl)
|
||
cgsl->catList(divide_gs(p, Pat));
|
||
else
|
||
cgsl = divide_gs(p, Pat);
|
||
p = p->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// divide the gs into pices which locate either totally outside of the given Patch coordinate range
|
||
// or totally inside it. It's usefull for periodic boundary condition
|
||
MyList<Parallel::gridseg> *Parallel::divide_gs(MyList<Parallel::gridseg> *p, Patch *Pat)
|
||
{
|
||
double DH[dim];
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
DH[i] = p->data->Bg->getdX(i);
|
||
}
|
||
|
||
int num[dim];
|
||
double llb[3][dim], uub[3][dim];
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
if (p->data->llb[i] < Pat->bbox[i] - DH[i] / 2)
|
||
{
|
||
if (p->data->uub[i] > Pat->bbox[i + dim] + DH[i] / 2)
|
||
{
|
||
num[i] = 3;
|
||
llb[0][i] = p->data->llb[i];
|
||
llb[1][i] = Pat->bbox[i];
|
||
uub[1][i] = Pat->bbox[i + dim];
|
||
uub[2][i] = p->data->uub[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
uub[0][i] = Pat->bbox[i] - DH[i];
|
||
llb[2][i] = Pat->bbox[i + dim] + DH[i];
|
||
#else
|
||
#ifdef Cell
|
||
uub[0][i] = Pat->bbox[i];
|
||
llb[2][i] = Pat->bbox[i + dim];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
else if (p->data->uub[i] > Pat->bbox[i] + DH[i] / 2)
|
||
{
|
||
num[i] = 2;
|
||
llb[0][i] = p->data->llb[i];
|
||
llb[1][i] = Pat->bbox[i];
|
||
uub[1][i] = p->data->uub[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
uub[0][i] = Pat->bbox[i] - DH[i];
|
||
#else
|
||
#ifdef Cell
|
||
uub[0][i] = Pat->bbox[i];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
else
|
||
{
|
||
num[i] = 1;
|
||
llb[0][i] = p->data->llb[i];
|
||
uub[0][i] = p->data->uub[i];
|
||
}
|
||
}
|
||
else if (p->data->llb[i] < Pat->bbox[i + dim] - DH[i] / 2)
|
||
{
|
||
if (p->data->uub[i] > Pat->bbox[i + dim] + DH[i] / 2)
|
||
{
|
||
num[i] = 2;
|
||
llb[0][i] = p->data->llb[i];
|
||
uub[0][i] = Pat->bbox[i + dim];
|
||
uub[1][i] = p->data->uub[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
llb[1][i] = Pat->bbox[i + dim] + DH[i];
|
||
#else
|
||
#ifdef Cell
|
||
llb[1][i] = Pat->bbox[i + dim];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
else
|
||
{
|
||
num[i] = 1;
|
||
llb[0][i] = p->data->llb[i];
|
||
uub[0][i] = p->data->uub[i];
|
||
}
|
||
}
|
||
else
|
||
{
|
||
num[i] = 1;
|
||
llb[0][i] = p->data->llb[i];
|
||
uub[0][i] = p->data->uub[i];
|
||
}
|
||
}
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gg;
|
||
int NN = 1;
|
||
for (int i = 0; i < dim; i++)
|
||
NN = NN * num[i];
|
||
|
||
for (int i = 0; i < NN; i++)
|
||
{
|
||
int ind[dim];
|
||
getarrayindex(dim, num, ind, i);
|
||
gg = clone_gsl(p, true);
|
||
for (int k = 0; k < dim; k++)
|
||
{
|
||
gg->data->llb[k] = llb[ind[k]][k];
|
||
gg->data->uub[k] = uub[ind[k]][k];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gg->data->shape[k] = int((uub[ind[k]][k] - llb[ind[k]][k]) / DH[k] + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gg->data->shape[k] = int((uub[ind[k]][k] - llb[ind[k]][k]) / DH[k] + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
|
||
if (cgsl)
|
||
cgsl->catList(gg);
|
||
else
|
||
cgsl = gg;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
// after mod operation, according to overlape to determine real grid segments
|
||
void Parallel::build_PhysBD_gstl(Patch *Pat, MyList<Parallel::gridseg> *srci, MyList<Parallel::gridseg> *dsti,
|
||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst)
|
||
{
|
||
*out_src = *out_dst = 0;
|
||
|
||
if (!srci || !dsti)
|
||
return;
|
||
|
||
MyList<Parallel::gridseg> *s, *d;
|
||
MyList<Parallel::gridseg> *s2, *d2;
|
||
|
||
double llb[dim], uub[dim];
|
||
|
||
s = srci;
|
||
while (s)
|
||
{
|
||
Parallel::gridseg *sd = s->data;
|
||
d = dsti;
|
||
while (d)
|
||
{
|
||
Parallel::gridseg *dd = d->data;
|
||
bool flag = true;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i);
|
||
if (!feq(SH, DH, SH / 2))
|
||
{
|
||
cout << "Parallel::build_PhysBD_gstl meets different grid space SH = " << SH << ", DH = " << DH << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
// we assume dst and src locate on the same Patch
|
||
if (dd->llb[i] < Pat->bbox[i])
|
||
llb[i] = Mymax(sd->llb[i], dd->llb[i] + Pat->bbox[dim + i] - Pat->bbox[i]);
|
||
else if (dd->llb[i] > Pat->bbox[i + dim])
|
||
llb[i] = Mymax(sd->llb[i], dd->llb[i] - Pat->bbox[dim + i] + Pat->bbox[i]);
|
||
else
|
||
llb[i] = Mymax(sd->llb[i], dd->llb[i]);
|
||
|
||
if (dd->uub[i] < Pat->bbox[i])
|
||
uub[i] = Mymin(sd->uub[i], dd->uub[i] + Pat->bbox[dim + i] - Pat->bbox[i]);
|
||
else if (dd->uub[i] > Pat->bbox[dim + i])
|
||
uub[i] = Mymin(sd->uub[i], dd->uub[i] - Pat->bbox[dim + i] + Pat->bbox[i]);
|
||
else
|
||
uub[i] = Mymin(sd->uub[i], dd->uub[i]);
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
if (llb[i] > uub[i] + SH / 2)
|
||
{
|
||
flag = false;
|
||
break;
|
||
} // special for isolated point
|
||
#else
|
||
#ifdef Cell
|
||
if (llb[i] > uub[i])
|
||
{
|
||
flag = false;
|
||
break;
|
||
}
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
|
||
if (flag)
|
||
{
|
||
if (!(*out_src))
|
||
{
|
||
*out_src = s2 = new MyList<Parallel::gridseg>;
|
||
*out_dst = d2 = new MyList<Parallel::gridseg>;
|
||
s2->data = new Parallel::gridseg;
|
||
d2->data = new Parallel::gridseg;
|
||
}
|
||
else
|
||
{
|
||
s2->next = new MyList<Parallel::gridseg>;
|
||
s2 = s2->next;
|
||
d2->next = new MyList<Parallel::gridseg>;
|
||
d2 = d2->next;
|
||
s2->data = new Parallel::gridseg;
|
||
d2->data = new Parallel::gridseg;
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i);
|
||
s2->data->llb[i] = llb[i];
|
||
s2->data->uub[i] = uub[i];
|
||
|
||
if (dd->llb[i] < Pat->bbox[i])
|
||
d2->data->llb[i] = llb[i] - Pat->bbox[dim + i] + Pat->bbox[i];
|
||
else if (dd->llb[i] > Pat->bbox[i + dim])
|
||
d2->data->llb[i] = llb[i] + Pat->bbox[dim + i] - Pat->bbox[i];
|
||
else
|
||
d2->data->llb[i] = llb[i];
|
||
|
||
if (dd->uub[i] < Pat->bbox[i])
|
||
d2->data->uub[i] = uub[i] - Pat->bbox[dim + i] + Pat->bbox[i];
|
||
else if (dd->uub[i] > Pat->bbox[dim + i])
|
||
d2->data->uub[i] = uub[i] + Pat->bbox[dim + i] - Pat->bbox[i];
|
||
else
|
||
d2->data->uub[i] = uub[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4) + 1;
|
||
d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4);
|
||
d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
s2->data->Bg = sd->Bg;
|
||
s2->next = 0;
|
||
d2->data->Bg = dd->Bg;
|
||
d2->next = 0;
|
||
}
|
||
d = d->next;
|
||
}
|
||
s = s->next;
|
||
}
|
||
}
|
||
void Parallel::PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry)
|
||
{
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_PhysBD_gsl(Pat);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl0(Pat, node); // for the part without ghost points and do not extend
|
||
build_PhysBD_gstl(Pat, src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
double Parallel::L2Norm(Patch *Pat, var *vf)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
double tvf, dtvf = 0;
|
||
int BDW = ghost_width;
|
||
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *cg = BP->data;
|
||
if (myrank == cg->rank)
|
||
{
|
||
f_l2normhelper(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||
Pat->bbox[0], Pat->bbox[1], Pat->bbox[2],
|
||
Pat->bbox[3], Pat->bbox[4], Pat->bbox[5],
|
||
cg->fgfs[vf->sgfn], tvf, BDW);
|
||
dtvf += tvf;
|
||
}
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||
|
||
tvf = sqrt(tvf);
|
||
|
||
return tvf;
|
||
}
|
||
double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here)
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
double tvf, dtvf = 0;
|
||
int BDW = ghost_width;
|
||
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *cg = BP->data;
|
||
if (myrank == cg->rank)
|
||
{
|
||
f_l2normhelper(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||
Pat->bbox[0], Pat->bbox[1], Pat->bbox[2],
|
||
Pat->bbox[3], Pat->bbox[4], Pat->bbox[5],
|
||
cg->fgfs[vf->sgfn], tvf, BDW);
|
||
dtvf += tvf;
|
||
}
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
|
||
MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, Comm_here);
|
||
|
||
tvf = sqrt(tvf);
|
||
|
||
return tvf;
|
||
}
|
||
void Parallel::checkgsl(MyList<Parallel::gridseg> *pp, bool first_only)
|
||
{
|
||
int myrank = 0;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
if (myrank == 0)
|
||
{
|
||
if (!pp)
|
||
cout << " Parallel::checkgsl meets empty gsl" << endl;
|
||
while (pp)
|
||
{
|
||
if (pp->data->Bg)
|
||
cout << " on node#" << pp->data->Bg->rank << endl;
|
||
else
|
||
cout << " virtual grid segment" << endl;
|
||
cout << " shape: (";
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
if (i < dim - 1)
|
||
cout << pp->data->shape[i] << ",";
|
||
else
|
||
cout << pp->data->shape[i] << ")" << endl;
|
||
}
|
||
cout << " range: (";
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
if (i < dim - 1)
|
||
cout << pp->data->llb[i] << ":" << pp->data->uub[i] << ",";
|
||
else
|
||
cout << pp->data->llb[i] << ":" << pp->data->uub[i] << ")" << endl;
|
||
}
|
||
if (first_only)
|
||
return;
|
||
pp = pp->next;
|
||
}
|
||
}
|
||
}
|
||
void Parallel::checkvarl(MyList<var> *pp, bool first_only)
|
||
{
|
||
int myrank = 0;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
if (myrank == 0)
|
||
{
|
||
while (pp)
|
||
{
|
||
cout << "name: " << pp->data->name << endl;
|
||
cout << "SoA = (" << pp->data->SoA[0] << "," << pp->data->SoA[1] << "," << pp->data->SoA[2] << ")" << endl;
|
||
cout << "sgfn = " << pp->data->sgfn << endl;
|
||
if (first_only)
|
||
return;
|
||
pp = pp->next;
|
||
}
|
||
}
|
||
}
|
||
void Parallel::prepare_inter_time_level(MyList<Patch> *PatL,
|
||
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
||
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex)
|
||
{
|
||
while (PatL)
|
||
{
|
||
prepare_inter_time_level(PatL->data, VarList1, VarList2, VarList3, tindex);
|
||
PatL = PatL->next;
|
||
}
|
||
}
|
||
void Parallel::prepare_inter_time_level(Patch *Pat,
|
||
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
||
MyList<var> *VarList3 /* target (t+a*dt) */, int tindex)
|
||
{
|
||
int myrank = 0;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
MyList<var> *varl1;
|
||
MyList<var> *varl2;
|
||
MyList<var> *varl3;
|
||
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *cg = BP->data;
|
||
if (myrank == cg->rank)
|
||
{
|
||
varl1 = VarList1;
|
||
varl2 = VarList2;
|
||
varl3 = VarList3;
|
||
while (varl1)
|
||
{
|
||
if (tindex == 0)
|
||
f_average(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], cg->fgfs[varl3->data->sgfn]);
|
||
else if (tindex == 1)
|
||
f_average3(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], cg->fgfs[varl3->data->sgfn]);
|
||
else if (tindex == -1)
|
||
// just change data order to use average3
|
||
f_average3(cg->shape, cg->fgfs[varl2->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varl3->data->sgfn]);
|
||
else
|
||
{
|
||
cout << "error tindex in Parallel::prepare_inter_time_level" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
varl1 = varl1->next;
|
||
varl2 = varl2->next;
|
||
varl3 = varl3->next;
|
||
}
|
||
}
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
}
|
||
void Parallel::prepare_inter_time_level(MyList<Patch> *PatL,
|
||
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
||
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex)
|
||
{
|
||
while (PatL)
|
||
{
|
||
prepare_inter_time_level(PatL->data, VarList1, VarList2, VarList3, VarList4, tindex);
|
||
PatL = PatL->next;
|
||
}
|
||
}
|
||
void Parallel::prepare_inter_time_level(Patch *Pat,
|
||
MyList<var> *VarList1 /* source (t+dt) */, MyList<var> *VarList2 /* source (t) */,
|
||
MyList<var> *VarList3 /* source (t-dt) */, MyList<var> *VarList4 /* target (t+a*dt) */, int tindex)
|
||
{
|
||
int myrank = 0;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
MyList<var> *varl1;
|
||
MyList<var> *varl2;
|
||
MyList<var> *varl3;
|
||
MyList<var> *varl4;
|
||
|
||
MyList<Block> *BP = Pat->blb;
|
||
while (BP)
|
||
{
|
||
Block *cg = BP->data;
|
||
if (myrank == cg->rank)
|
||
{
|
||
varl1 = VarList1;
|
||
varl2 = VarList2;
|
||
varl3 = VarList3;
|
||
varl4 = VarList4;
|
||
while (varl1)
|
||
{
|
||
if (tindex == 0)
|
||
f_average2(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn],
|
||
cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]);
|
||
else if (tindex == 1)
|
||
f_average2p(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn],
|
||
cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]);
|
||
else if (tindex == -1)
|
||
f_average2m(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn],
|
||
cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]);
|
||
else
|
||
{
|
||
cout << "error tindex in long cgh::prepare_inter_time_level" << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
varl1 = varl1->next;
|
||
varl2 = varl2->next;
|
||
varl3 = varl3->next;
|
||
varl4 = varl4->next;
|
||
}
|
||
}
|
||
if (BP == Pat->ble)
|
||
break;
|
||
BP = BP->next;
|
||
}
|
||
}
|
||
void Parallel::Prolong(Patch *Patc, Patch *Patf,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||
int Symmetry)
|
||
{
|
||
if (Patc->lev >= Patf->lev)
|
||
{
|
||
cout << "Parallel::Prolong: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_complete_gsl(Patf); // including ghost
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
void Parallel::Restrict(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||
int Symmetry)
|
||
{
|
||
if (PatcL->data->lev >= PatfL->data->lev)
|
||
{
|
||
cout << "Parallel::Restrict: meet requst of Restrict from lev#" << PatfL->data->lev << " to lev#" << PatcL->data->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_complete_gsl(PatcL); // including ghost
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
#if 0
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
src[node]=build_owned_gsl(PatfL,node,2,Symmetry); // - buffer - ghost
|
||
#else
|
||
#ifdef Cell
|
||
src[node]=build_owned_gsl(PatfL,node,4,Symmetry); // - buffer - ghost - BD ghost
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
#else
|
||
// it seems bam always use this
|
||
src[node] = build_owned_gsl(PatfL, node, 2, Symmetry); // - buffer - ghost
|
||
#endif
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
void Parallel::Restrict_after(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||
int Symmetry)
|
||
{
|
||
if (PatcL->data->lev >= PatfL->data->lev)
|
||
{
|
||
cout << "Parallel::Restrict: meet requst of Restrict from lev#" << PatfL->data->lev << " to lev#" << PatcL->data->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_complete_gsl(PatcL); // including ghost
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl(PatfL, node, 3, Symmetry); // - ghost - BD ghost
|
||
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
// for the same time level
|
||
void Parallel::OutBdLow2Hi(Patch *Patc, Patch *Patf,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||
int Symmetry)
|
||
{
|
||
if (Patc->lev >= Patf->lev)
|
||
{
|
||
cout << "Parallel::OutBdLow2Hi: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_buffer_gsl(Patf); // buffer region only
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
void Parallel::OutBdLow2Hi(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||
int Symmetry)
|
||
{
|
||
MyList<Patch> *Pp, *Ppc;
|
||
Ppc = PatcL;
|
||
while (Ppc)
|
||
{
|
||
Pp = PatfL;
|
||
while (Pp)
|
||
{
|
||
if (Ppc->data->lev >= Pp->data->lev)
|
||
{
|
||
cout << "Parallel::OutBdLow2Hi(list): meet requst of Prolong from lev#" << Ppc->data->lev << " to lev#" << Pp->data->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
Pp = Pp->next;
|
||
}
|
||
Ppc = Ppc->next;
|
||
}
|
||
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_buffer_gsl(PatfL); // buffer region only
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
// for the same time level
|
||
void Parallel::OutBdLow2Himix(Patch *Patc, Patch *Patf,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||
int Symmetry)
|
||
{
|
||
if (Patc->lev >= Patf->lev)
|
||
{
|
||
cout << "Parallel::OutBdLow2Himix: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_buffer_gsl(Patf); // buffer region only
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfermix(transfer_src, transfer_dst, VarList1, VarList2, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
|
||
// do not need this, we have done after calling of this routine in ProlongRestrict or RestrictProlong
|
||
// Sync(Patf,VarList2,Symmetry); // fine level points may be not enough for interpolation
|
||
}
|
||
void Parallel::OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||
int Symmetry)
|
||
{
|
||
MyList<Patch> *Pp, *Ppc;
|
||
Ppc = PatcL;
|
||
while (Ppc)
|
||
{
|
||
Pp = PatfL;
|
||
while (Pp)
|
||
{
|
||
if (Ppc->data->lev >= Pp->data->lev)
|
||
{
|
||
cout << "Parallel::OutBdLow2Himix(list): meet requst of Prolong from lev#" << Ppc->data->lev << " to lev#" << Pp->data->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
Pp = Pp->next;
|
||
}
|
||
Ppc = Ppc->next;
|
||
}
|
||
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_buffer_gsl(PatfL); // buffer region only
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfermix(transfer_src, transfer_dst, VarList1, VarList2, Symmetry);
|
||
|
||
if (dst)
|
||
dst->destroyList();
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
}
|
||
|
||
// Restrict_cached: cache grid segment lists, reuse buffers via transfer_cached
|
||
void Parallel::Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||
int Symmetry, SyncCache &cache)
|
||
{
|
||
if (!cache.valid)
|
||
{
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
cache.cpusize = cpusize;
|
||
|
||
if (!cache.combined_src)
|
||
{
|
||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.send_lengths = new int[cpusize];
|
||
cache.recv_lengths = new int[cpusize];
|
||
cache.send_bufs = new double *[cpusize];
|
||
cache.recv_bufs = new double *[cpusize];
|
||
cache.send_buf_caps = new int[cpusize];
|
||
cache.recv_buf_caps = new int[cpusize];
|
||
for (int i = 0; i < cpusize; i++)
|
||
{
|
||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
||
}
|
||
cache.max_reqs = 2 * cpusize;
|
||
cache.reqs = new MPI_Request[cache.max_reqs];
|
||
cache.stats = new MPI_Status[cache.max_reqs];
|
||
}
|
||
|
||
MyList<Parallel::gridseg> *dst = build_complete_gsl(PatcL);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatfL, node, 2, Symmetry);
|
||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
||
if (src_owned) src_owned->destroyList();
|
||
}
|
||
if (dst) dst->destroyList();
|
||
|
||
cache.valid = true;
|
||
}
|
||
|
||
transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache);
|
||
}
|
||
|
||
// OutBdLow2Hi_cached: cache grid segment lists, reuse buffers via transfer_cached
|
||
void Parallel::OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||
int Symmetry, SyncCache &cache)
|
||
{
|
||
if (!cache.valid)
|
||
{
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
cache.cpusize = cpusize;
|
||
|
||
if (!cache.combined_src)
|
||
{
|
||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.send_lengths = new int[cpusize];
|
||
cache.recv_lengths = new int[cpusize];
|
||
cache.send_bufs = new double *[cpusize];
|
||
cache.recv_bufs = new double *[cpusize];
|
||
cache.send_buf_caps = new int[cpusize];
|
||
cache.recv_buf_caps = new int[cpusize];
|
||
for (int i = 0; i < cpusize; i++)
|
||
{
|
||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
||
}
|
||
cache.max_reqs = 2 * cpusize;
|
||
cache.reqs = new MPI_Request[cache.max_reqs];
|
||
cache.stats = new MPI_Status[cache.max_reqs];
|
||
}
|
||
|
||
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry);
|
||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
||
if (src_owned) src_owned->destroyList();
|
||
}
|
||
if (dst) dst->destroyList();
|
||
|
||
cache.valid = true;
|
||
}
|
||
|
||
transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache);
|
||
}
|
||
|
||
// OutBdLow2Himix_cached: same as OutBdLow2Hi_cached but uses transfermix for unpacking
|
||
void Parallel::OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||
int Symmetry, SyncCache &cache)
|
||
{
|
||
if (!cache.valid)
|
||
{
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
cache.cpusize = cpusize;
|
||
|
||
if (!cache.combined_src)
|
||
{
|
||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
cache.send_lengths = new int[cpusize];
|
||
cache.recv_lengths = new int[cpusize];
|
||
cache.send_bufs = new double *[cpusize];
|
||
cache.recv_bufs = new double *[cpusize];
|
||
cache.send_buf_caps = new int[cpusize];
|
||
cache.recv_buf_caps = new int[cpusize];
|
||
for (int i = 0; i < cpusize; i++)
|
||
{
|
||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
||
}
|
||
cache.max_reqs = 2 * cpusize;
|
||
cache.reqs = new MPI_Request[cache.max_reqs];
|
||
cache.stats = new MPI_Status[cache.max_reqs];
|
||
}
|
||
|
||
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry);
|
||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
||
if (src_owned) src_owned->destroyList();
|
||
}
|
||
if (dst) dst->destroyList();
|
||
|
||
cache.valid = true;
|
||
}
|
||
|
||
// Use transfermix instead of transfer for mix-mode interpolation
|
||
int myrank;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cache.cpusize);
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
int cpusize = cache.cpusize;
|
||
|
||
int req_no = 0;
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (node == myrank)
|
||
{
|
||
int length = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
cache.recv_lengths[node] = length;
|
||
if (length > 0)
|
||
{
|
||
if (length > cache.recv_buf_caps[node])
|
||
{
|
||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||
cache.recv_bufs[node] = new double[length];
|
||
cache.recv_buf_caps[node] = length;
|
||
}
|
||
data_packermix(cache.recv_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
int slength = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
cache.send_lengths[node] = slength;
|
||
if (slength > 0)
|
||
{
|
||
if (slength > cache.send_buf_caps[node])
|
||
{
|
||
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
|
||
cache.send_bufs[node] = new double[slength];
|
||
cache.send_buf_caps[node] = slength;
|
||
}
|
||
data_packermix(cache.send_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
||
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
|
||
}
|
||
int rlength = data_packermix(0, cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
||
cache.recv_lengths[node] = rlength;
|
||
if (rlength > 0)
|
||
{
|
||
if (rlength > cache.recv_buf_caps[node])
|
||
{
|
||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
||
cache.recv_bufs[node] = new double[rlength];
|
||
cache.recv_buf_caps[node] = rlength;
|
||
}
|
||
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
|
||
}
|
||
}
|
||
}
|
||
|
||
MPI_Waitall(req_no, cache.reqs, cache.stats);
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
|
||
data_packermix(cache.recv_bufs[node], cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
||
}
|
||
|
||
// collect all buffer grid segments or blocks for given patch
|
||
MyList<Parallel::gridseg> *Parallel::build_buffer_gsl(Patch *Pat)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl, *gsc, *gsb;
|
||
|
||
gsc = build_complete_gsl(Pat); // including ghost
|
||
|
||
gsb = new MyList<Parallel::gridseg>;
|
||
gsb->data = new Parallel::gridseg;
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = Pat->blb->data->getdX(i);
|
||
gsb->data->uub[i] = Pat->bbox[dim + i] - Pat->uui[i] * DH;
|
||
gsb->data->llb[i] = Pat->bbox[i] + Pat->lli[i] * DH;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
gsb->data->shape[i] = int((gsb->data->uub[i] - gsb->data->llb[i]) / DH + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
gsb->data->shape[i] = int((gsb->data->uub[i] - gsb->data->llb[i]) / DH + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
gsb->data->Bg = 0;
|
||
gsb->next = 0;
|
||
|
||
cgsl = gsl_subtract(gsc, gsb);
|
||
|
||
gsc->destroyList();
|
||
gsb->destroyList();
|
||
|
||
// set illb and iuub
|
||
gsb = cgsl;
|
||
while (gsb)
|
||
{
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double DH = Pat->blb->data->getdX(i);
|
||
gsb->data->iuub[i] = Pat->bbox[dim + i] - Pat->uui[i] * DH;
|
||
gsb->data->illb[i] = Pat->bbox[i] + Pat->lli[i] * DH;
|
||
}
|
||
gsb = gsb->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
MyList<Parallel::gridseg> *Parallel::build_buffer_gsl(MyList<Patch> *PatL)
|
||
{
|
||
MyList<Parallel::gridseg> *cgsl = 0, *gs;
|
||
while (PatL)
|
||
{
|
||
if (cgsl)
|
||
{
|
||
gs->next = build_buffer_gsl(PatL->data);
|
||
gs = gs->next;
|
||
if (gs)
|
||
while (gs->next)
|
||
gs = gs->next;
|
||
}
|
||
else
|
||
{
|
||
cgsl = build_buffer_gsl(PatL->data);
|
||
gs = cgsl;
|
||
if (gs)
|
||
while (gs->next)
|
||
gs = gs->next;
|
||
}
|
||
PatL = PatL->next;
|
||
}
|
||
|
||
return cgsl;
|
||
}
|
||
void Parallel::Prolongint(Patch *Patc, Patch *Patf,
|
||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||
int Symmetry)
|
||
{
|
||
if (Patc->lev >= Patf->lev)
|
||
{
|
||
cout << "Parallel::Prolong: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
int myrank = 0;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
|
||
int num_var = 0;
|
||
MyList<var> *varl;
|
||
varl = VarList1;
|
||
while (varl)
|
||
{
|
||
num_var++;
|
||
varl = varl->next;
|
||
}
|
||
|
||
MyList<Block> *BP = Patf->blb;
|
||
while (BP)
|
||
{
|
||
int Npts;
|
||
if (myrank == BP->data->rank)
|
||
Npts = BP->data->shape[0] * BP->data->shape[1] * BP->data->shape[2];
|
||
MPI_Bcast(&Npts, 1, MPI_INT, BP->data->rank, MPI_COMM_WORLD);
|
||
double *pox[3];
|
||
for (int i = 0; i < 3; i++)
|
||
pox[i] = new double[Npts];
|
||
if (myrank == BP->data->rank)
|
||
{
|
||
for (int i = 0; i < Npts; i++)
|
||
{
|
||
int ind[3];
|
||
Parallel::getarrayindex(3, BP->data->shape, ind, i);
|
||
pox[0][i] = BP->data->X[0][ind[0]];
|
||
pox[1][i] = BP->data->X[1][ind[1]];
|
||
pox[2][i] = BP->data->X[2][ind[2]];
|
||
}
|
||
}
|
||
for (int i = 0; i < 3; i++)
|
||
MPI_Bcast(pox[i], Npts, MPI_DOUBLE, BP->data->rank, MPI_COMM_WORLD);
|
||
double *res;
|
||
res = new double[num_var * Npts];
|
||
Patc->Interp_Points(VarList1, Npts, pox, res, Symmetry); // because this operation is a global operation (for all processors)
|
||
// we have to isolate it out of myrank==BP->data->rank
|
||
if (myrank == BP->data->rank)
|
||
{
|
||
for (int i = 0; i < Npts; i++)
|
||
{
|
||
varl = VarList2;
|
||
int j = 0;
|
||
while (varl)
|
||
{
|
||
(BP->data->fgfs[varl->data->sgfn])[i] = res[j + i * num_var];
|
||
j++;
|
||
varl = varl->next;
|
||
}
|
||
}
|
||
}
|
||
delete[] pox[0];
|
||
delete[] pox[1];
|
||
delete[] pox[2];
|
||
delete[] res;
|
||
BP = BP->next;
|
||
}
|
||
}
|
||
//
|
||
void Parallel::merge_gsl(MyList<gridseg> *&A, const double ratio)
|
||
{
|
||
if (!A)
|
||
return;
|
||
|
||
MyList<gridseg> *B, *C, *D = A;
|
||
bool flag = false;
|
||
while (D->next)
|
||
{
|
||
B = D->next;
|
||
while (B)
|
||
{
|
||
flag = merge_gs(D, B, C, ratio);
|
||
if (flag)
|
||
break;
|
||
B = B->next;
|
||
}
|
||
if (flag)
|
||
break;
|
||
D = D->next;
|
||
}
|
||
|
||
if (flag)
|
||
{
|
||
// delete D and B from A
|
||
MyList<gridseg> *E = A;
|
||
while (E->next)
|
||
{
|
||
MyList<gridseg> *tp = E->next;
|
||
if (D == tp || B == tp)
|
||
{
|
||
E->next = (tp->next) ? tp->next : 0;
|
||
delete tp->data;
|
||
delete tp;
|
||
}
|
||
if (E->next)
|
||
E = E->next;
|
||
}
|
||
|
||
if (D == A)
|
||
{
|
||
MyList<gridseg> *tp = A;
|
||
A = (A->next) ? A->next : 0;
|
||
delete tp->data;
|
||
delete tp;
|
||
}
|
||
// cat C to A
|
||
if (A)
|
||
A->catList(C);
|
||
else
|
||
A = C;
|
||
|
||
merge_gsl(A, ratio);
|
||
}
|
||
}
|
||
//
|
||
bool Parallel::merge_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C, const double ratio)
|
||
{
|
||
if (!B || !D)
|
||
return false;
|
||
|
||
C = 0;
|
||
double llb[dim], uub[dim], DH[dim];
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double tdh;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
DH[i] = (D->data->uub[i] - D->data->llb[i]) / (D->data->shape[i] - 1);
|
||
tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1);
|
||
#else
|
||
#ifdef Cell
|
||
DH[i] = (D->data->uub[i] - D->data->llb[i]) / D->data->shape[i];
|
||
tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
if (!feq(DH[i], tdh, DH[i] / 2))
|
||
{
|
||
cout << "Parallel::merge_gs meets different grid segment " << DH[i] << " vs " << tdh << endl;
|
||
checkgsl(B, true);
|
||
checkgsl(D, true);
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
llb[i] = Mymax(D->data->llb[i], B->data->llb[i]);
|
||
uub[i] = Mymin(D->data->uub[i], B->data->uub[i]);
|
||
// if(uub[i]-llb[i] < DH[i]/2) return false; //here this is valid for both vertex and cell
|
||
|
||
// use 0 instead of DH[i]/2, we consider contact case, 2012 Aug 8
|
||
if (uub[i] - llb[i] < 0)
|
||
return false; // here this is valid for both vertex and cell
|
||
}
|
||
|
||
// vb: volume of B
|
||
// vd: volume of D
|
||
// vo: volume of overlap
|
||
// vt: volume of smallest common box (virtual merged box)
|
||
double vd = 1, vb = 1, vt = 1, vo = 1;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
vt = vt * (Mymax(D->data->uub[i], B->data->uub[i]) - Mymin(D->data->llb[i], B->data->llb[i]));
|
||
vo = vo * (uub[i] - llb[i]);
|
||
vd = vd * (D->data->uub[i] - D->data->llb[i]);
|
||
vb = vb * (B->data->uub[i] - B->data->llb[i]);
|
||
}
|
||
|
||
// smller ratio, more possible to merge
|
||
if ((vd + vb - vo) / vt > ratio)
|
||
{
|
||
C = new MyList<gridseg>;
|
||
C->data = new gridseg;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
C->data->uub[i] = Mymax(D->data->uub[i], B->data->uub[i]);
|
||
C->data->llb[i] = Mymin(D->data->llb[i], B->data->llb[i]);
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
if (D->data->Bg == B->data->Bg)
|
||
C->data->Bg = D->data->Bg;
|
||
else
|
||
C->data->Bg = 0;
|
||
|
||
C->next = 0;
|
||
|
||
return true;
|
||
}
|
||
else
|
||
{
|
||
return false;
|
||
}
|
||
}
|
||
// Add ghost region to tangent plane
|
||
// we assume the grids have the same resolution
|
||
void Parallel::add_ghost_touch(MyList<gridseg> *&A)
|
||
{
|
||
if (!A || !(A->next))
|
||
return;
|
||
|
||
double DH[dim];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
for (int i = 0; i < dim; i++)
|
||
DH[i] = (A->data->uub[i] - A->data->llb[i]) / (A->data->shape[i] - 1) / 2;
|
||
#else
|
||
#ifdef Cell
|
||
for (int i = 0; i < dim; i++)
|
||
DH[i] = (A->data->uub[i] - A->data->llb[i]) / A->data->shape[i] / 2;
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
|
||
MyList<gridseg> *C1, *C2, *A1 = A, *A2, *dc;
|
||
dc = C1 = clone_gsl(A, false);
|
||
while (C1)
|
||
{
|
||
C2 = C1->next;
|
||
A2 = A1->next;
|
||
while (C2)
|
||
{
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
if (feq(C1->data->llb[i], C2->data->uub[i], DH[i]))
|
||
{
|
||
// direction i touch, other directions overlap
|
||
bool flag = true;
|
||
for (int j = 0; j < i; j++)
|
||
if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 &&
|
||
(C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0)
|
||
flag = false;
|
||
for (int j = i + 1; j < dim; j++)
|
||
if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 &&
|
||
(C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0)
|
||
flag = false;
|
||
|
||
if (flag)
|
||
{
|
||
// only add one ghost region
|
||
if (feq(A1->data->llb[i], C1->data->llb[i], DH[i]))
|
||
{
|
||
A1->data->llb[i] -= ghost_width * 2 * DH[i];
|
||
A1->data->shape[i] += ghost_width;
|
||
}
|
||
if (feq(A2->data->uub[i], C2->data->uub[i], DH[i]))
|
||
{
|
||
A2->data->uub[i] += ghost_width * 2 * DH[i];
|
||
A2->data->shape[i] += ghost_width;
|
||
}
|
||
}
|
||
}
|
||
if (feq(C1->data->uub[i], C2->data->llb[i], DH[i]))
|
||
{
|
||
// direction i touch, other directions overlap
|
||
bool flag = true;
|
||
for (int j = 0; j < i; j++)
|
||
if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 &&
|
||
(C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0)
|
||
flag = false;
|
||
for (int j = i + 1; j < dim; j++)
|
||
if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 &&
|
||
(C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0)
|
||
flag = false;
|
||
|
||
if (flag)
|
||
{
|
||
// only add one ghost region
|
||
if (feq(A1->data->uub[i], C1->data->uub[i], DH[i]))
|
||
{
|
||
A1->data->uub[i] += ghost_width * 2 * DH[i];
|
||
A1->data->shape[i] += ghost_width;
|
||
}
|
||
if (feq(A2->data->llb[i], C2->data->llb[i], DH[i]))
|
||
{
|
||
A2->data->llb[i] -= ghost_width * 2 * DH[i];
|
||
A2->data->shape[i] += ghost_width;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
C2 = C2->next;
|
||
A2 = A2->next;
|
||
}
|
||
C1 = C1->next;
|
||
A1 = A1->next;
|
||
}
|
||
|
||
if (dc)
|
||
dc->destroyList();
|
||
}
|
||
// According to overlap to cut the gsl into recular pices
|
||
void Parallel::cut_gsl(MyList<gridseg> *&A)
|
||
{
|
||
if (!A)
|
||
return;
|
||
|
||
MyList<gridseg> *B, *C, *D = A;
|
||
bool flag = false;
|
||
while (D->next)
|
||
{
|
||
B = D->next;
|
||
while (B)
|
||
{
|
||
flag = cut_gs(D, B, C);
|
||
if (flag)
|
||
break;
|
||
B = B->next;
|
||
}
|
||
if (flag)
|
||
break;
|
||
D = D->next;
|
||
}
|
||
|
||
if (flag)
|
||
{
|
||
// delete D and B from A
|
||
MyList<gridseg> *E = A;
|
||
while (E->next)
|
||
{
|
||
MyList<gridseg> *tp = E->next;
|
||
if (D == tp || B == tp)
|
||
{
|
||
E->next = (tp->next) ? tp->next : 0;
|
||
delete tp->data;
|
||
delete tp;
|
||
}
|
||
if (E->next)
|
||
E = E->next;
|
||
}
|
||
|
||
if (D == A)
|
||
{
|
||
MyList<gridseg> *tp = A;
|
||
A = (A->next) ? A->next : 0;
|
||
delete tp->data;
|
||
delete tp;
|
||
}
|
||
// cat C to A
|
||
if (A)
|
||
A->catList(C);
|
||
else
|
||
A = C;
|
||
|
||
cut_gsl(A);
|
||
}
|
||
}
|
||
// when D and B have overlap, cut them into C and return true
|
||
// otherwise return false and C=0
|
||
bool Parallel::cut_gs(MyList<gridseg> *D, MyList<gridseg> *B, MyList<gridseg> *&C)
|
||
{
|
||
C = 0;
|
||
double llb[dim], uub[dim], DH[dim];
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double tdh;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
DH[i] = (D->data->uub[i] - D->data->llb[i]) / (D->data->shape[i] - 1);
|
||
tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1);
|
||
#else
|
||
#ifdef Cell
|
||
DH[i] = (D->data->uub[i] - D->data->llb[i]) / D->data->shape[i];
|
||
tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
if (!feq(DH[i], tdh, DH[i] / 2))
|
||
{
|
||
cout << "Parallel::cut_gs meets different grid segment " << DH[i] << " vs " << tdh << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
llb[i] = Mymax(D->data->llb[i], B->data->llb[i]);
|
||
uub[i] = Mymin(D->data->uub[i], B->data->uub[i]);
|
||
// for efficiency we ask the width of the patch at least 2(buffer+ghost+BD ghost)
|
||
if (uub[i] - llb[i] < DH[i] * 2 * (buffer_width + 2 * ghost_width))
|
||
return false; // here this is valid for both vertex and cell
|
||
}
|
||
|
||
// this part code results in 5 patches generally
|
||
|
||
C = new MyList<gridseg>;
|
||
C->data = new gridseg;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
C->data->llb[i] = llb[i];
|
||
C->data->uub[i] = uub[i];
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
if (D->data->Bg == B->data->Bg)
|
||
C->data->Bg = D->data->Bg;
|
||
else
|
||
C->data->Bg = 0;
|
||
|
||
C->next = gs_subtract_virtual(D, C);
|
||
|
||
MyList<gridseg> *E = C;
|
||
|
||
while (E->next)
|
||
E = E->next;
|
||
|
||
E->next = gs_subtract_virtual(B, C);
|
||
|
||
// this part code results in 3 patches generally
|
||
/*
|
||
C = clone_gsl(D,true);
|
||
C->next = gs_subtract_virtual(B,C);
|
||
*/
|
||
|
||
return true;
|
||
}
|
||
// note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center
|
||
MyList<Parallel::gridseg> *Parallel::gs_subtract_virtual(MyList<Parallel::gridseg> *A, MyList<Parallel::gridseg> *B)
|
||
{
|
||
if (!A)
|
||
return 0;
|
||
if (!B)
|
||
return clone_gsl(A, true);
|
||
|
||
double cut_plane[2 * dim], DH[dim];
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
double tdh;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
DH[i] = (A->data->uub[i] - A->data->llb[i]) / (A->data->shape[i] - 1);
|
||
tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1);
|
||
#else
|
||
#ifdef Cell
|
||
DH[i] = (A->data->uub[i] - A->data->llb[i]) / A->data->shape[i];
|
||
tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i];
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
if (!feq(DH[i], tdh, DH[i] / 2))
|
||
{
|
||
cout << "Parallel::gs_subtract_virtual meets different grid segment " << DH[i] << " vs " << tdh << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
}
|
||
|
||
MyList<Parallel::gridseg> *C = 0, *q;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
if (B->data->llb[i] > A->data->uub[i] || B->data->uub[i] < A->data->llb[i])
|
||
return clone_gsl(A, true);
|
||
cut_plane[i] = A->data->llb[i];
|
||
cut_plane[i + dim] = A->data->uub[i];
|
||
}
|
||
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
cut_plane[i] = Mymax(A->data->llb[i], B->data->llb[i]);
|
||
if (cut_plane[i] > A->data->llb[i])
|
||
{
|
||
q = clone_gsl(A, true);
|
||
// prolong the list from head
|
||
if (C)
|
||
q->next = C;
|
||
C = q;
|
||
for (int j = 0; j < dim; j++)
|
||
{
|
||
if (i == j)
|
||
{
|
||
C->data->llb[i] = A->data->llb[i];
|
||
// **note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center**
|
||
C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i]);
|
||
}
|
||
else
|
||
{
|
||
C->data->llb[j] = cut_plane[j];
|
||
C->data->uub[j] = cut_plane[j + dim];
|
||
}
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
}
|
||
|
||
cut_plane[i + dim] = Mymin(A->data->uub[i], B->data->uub[i]);
|
||
if (cut_plane[i + dim] < A->data->uub[i])
|
||
{
|
||
q = clone_gsl(A, true);
|
||
if (C)
|
||
q->next = C;
|
||
C = q;
|
||
for (int j = 0; j < dim; j++)
|
||
{
|
||
if (i == j)
|
||
{
|
||
C->data->uub[i] = A->data->uub[i];
|
||
// note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center
|
||
C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim]);
|
||
}
|
||
else
|
||
{
|
||
C->data->llb[j] = cut_plane[j];
|
||
C->data->uub[j] = cut_plane[j + dim];
|
||
}
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
}
|
||
}
|
||
}
|
||
return C;
|
||
}
|
||
// note the data structure
|
||
// if CC is true
|
||
// 1 ----------- 1 ------ ^
|
||
// 0 ------ | t
|
||
// 0 ----------- old ------ |
|
||
//
|
||
// old -----------
|
||
// if CC is false
|
||
// 1 ----------- 1 ------ ^
|
||
// 0 ----------- 0 ------ | t
|
||
// old ----------- old ------ |
|
||
void Parallel::fill_level_data(MyList<Patch> *PatLd, MyList<Patch> *PatLs, MyList<Patch> *PatcL,
|
||
MyList<var> *OldList, MyList<var> *StateList, MyList<var> *FutureList,
|
||
MyList<var> *tmList, int Symmetry, bool BB, bool CC)
|
||
{
|
||
if (PatLd->data->lev != PatLs->data->lev)
|
||
{
|
||
cout << "Parallel::fill_level_data: meet requst from lev#" << PatLs->data->lev << " to lev#" << PatLd->data->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
if (PatLd->data->lev <= PatcL->data->lev)
|
||
{
|
||
cout << "Parallel::fill_level_data: meet prolong requst from lev#" << PatcL->data->lev << " to lev#" << PatLd->data->lev << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
|
||
int cpusize;
|
||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
||
|
||
MyList<var> *VarList = 0;
|
||
MyList<var> *p;
|
||
p = StateList;
|
||
while (p)
|
||
{
|
||
if (VarList)
|
||
VarList->insert(p->data);
|
||
else
|
||
VarList = new MyList<var>(p->data);
|
||
p = p->next;
|
||
}
|
||
p = FutureList;
|
||
while (p)
|
||
{
|
||
if (VarList)
|
||
VarList->insert(p->data);
|
||
else
|
||
VarList = new MyList<var>(p->data);
|
||
p = p->next;
|
||
}
|
||
|
||
MyList<Parallel::gridseg> *dst;
|
||
MyList<Parallel::gridseg> **src, **transfer_src, **transfer_dst;
|
||
src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_src = new MyList<Parallel::gridseg> *[cpusize];
|
||
transfer_dst = new MyList<Parallel::gridseg> *[cpusize];
|
||
|
||
dst = build_complete_gsl(PatLd); // including ghost
|
||
// copy part
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl(PatLs, node, 0, Symmetry); // similar to Sync
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry);
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
MyList<Parallel::gridseg> *dsts, *dstd;
|
||
dsts = build_complete_gsl_virtual(PatLs);
|
||
dstd = dst;
|
||
dst = gsl_subtract(dstd, dsts);
|
||
if (dstd)
|
||
dstd->destroyList();
|
||
if (dsts)
|
||
dsts->destroyList();
|
||
|
||
if (dst)
|
||
{
|
||
// prolongation part
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost
|
||
build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node
|
||
}
|
||
|
||
if (CC)
|
||
{
|
||
// for FutureList
|
||
// restrict first~~~>
|
||
{
|
||
Restrict(PatcL, PatLs, FutureList, FutureList, Symmetry);
|
||
Sync(PatcL, FutureList, Symmetry);
|
||
}
|
||
//<~~~prolong then
|
||
transfer(transfer_src, transfer_dst, FutureList, FutureList, Symmetry);
|
||
|
||
// for StateList
|
||
// time interpolation part
|
||
if (BB)
|
||
prepare_inter_time_level(PatcL, FutureList, StateList, OldList,
|
||
tmList, 0); // use SynchList_pre as temporal storage space
|
||
else
|
||
prepare_inter_time_level(PatcL, FutureList, StateList,
|
||
tmList, 0); // use SynchList_pre as temporal storage space
|
||
// restrict first~~~>
|
||
{
|
||
Restrict(PatcL, PatLs, StateList, tmList, Symmetry);
|
||
Sync(PatcL, tmList, Symmetry);
|
||
}
|
||
//<~~~prolong then
|
||
transfer(transfer_src, transfer_dst, tmList, StateList, Symmetry);
|
||
}
|
||
else
|
||
{
|
||
// for both FutureList and StateList
|
||
// restrict first~~~>
|
||
{
|
||
Restrict(PatcL, PatLs, VarList, VarList, Symmetry);
|
||
Sync(PatcL, VarList, Symmetry);
|
||
}
|
||
//<~~~prolong then
|
||
transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry);
|
||
}
|
||
|
||
for (int node = 0; node < cpusize; node++)
|
||
{
|
||
if (src[node])
|
||
src[node]->destroyList();
|
||
if (transfer_src[node])
|
||
transfer_src[node]->destroyList();
|
||
if (transfer_dst[node])
|
||
transfer_dst[node]->destroyList();
|
||
}
|
||
|
||
dst->destroyList();
|
||
}
|
||
|
||
delete[] src;
|
||
delete[] transfer_src;
|
||
delete[] transfer_dst;
|
||
|
||
VarList->clearList();
|
||
}
|
||
void Parallel::KillBlocks(MyList<Patch> *PatchLIST)
|
||
{
|
||
while (PatchLIST)
|
||
{
|
||
Patch *Pp = PatchLIST->data;
|
||
MyList<Block> *bg;
|
||
while (Pp->blb)
|
||
{
|
||
if (Pp->blb == Pp->ble)
|
||
break;
|
||
bg = (Pp->blb->next) ? Pp->blb->next : 0;
|
||
delete Pp->blb->data;
|
||
delete Pp->blb;
|
||
Pp->blb = bg;
|
||
}
|
||
if (Pp->ble)
|
||
{
|
||
delete Pp->ble->data;
|
||
delete Pp->ble;
|
||
}
|
||
Pp->blb = Pp->ble = 0;
|
||
PatchLIST = PatchLIST->next;
|
||
}
|
||
}
|
||
bool Parallel::PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||
int NN, double **XX,
|
||
double *Shellf, int Symmetry)
|
||
{
|
||
MyList<var> *varl;
|
||
int num_var = 0;
|
||
varl = VarList;
|
||
while (varl)
|
||
{
|
||
num_var++;
|
||
varl = varl->next;
|
||
}
|
||
|
||
double lld[dim], uud[dim];
|
||
double **pox;
|
||
pox = new double *[dim];
|
||
for (int j = 0; j < dim; j++)
|
||
pox[j] = new double[1];
|
||
for (int i = 0; i < NN; i++)
|
||
{
|
||
MyList<Patch> *PL = PatL;
|
||
while (PL)
|
||
{
|
||
bool flag = true;
|
||
for (int j = 0; j < dim; j++)
|
||
{
|
||
double h = PL->data->getdX(j);
|
||
lld[j] = PL->data->lli[j] * h;
|
||
uud[j] = PL->data->uui[j] * h;
|
||
if (XX[j][i] < PL->data->bbox[j] + lld[j] || XX[j][i] > PL->data->bbox[j + dim] - uud[j])
|
||
{
|
||
flag = false;
|
||
break;
|
||
}
|
||
pox[j][0] = XX[j][i];
|
||
}
|
||
if (flag)
|
||
{
|
||
PL->data->Interp_Points(VarList, 1, pox, Shellf + i * num_var, Symmetry);
|
||
break;
|
||
}
|
||
PL = PL->next;
|
||
}
|
||
if (!PL)
|
||
{
|
||
checkpatchlist(PatL, false);
|
||
return false;
|
||
}
|
||
}
|
||
for (int j = 0; j < dim; j++)
|
||
delete[] pox[j];
|
||
delete[] pox;
|
||
|
||
return true;
|
||
}
|
||
bool Parallel::PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||
int NN, double **XX,
|
||
double *Shellf, int Symmetry, MPI_Comm Comm_here)
|
||
{
|
||
MyList<var> *varl;
|
||
int num_var = 0;
|
||
varl = VarList;
|
||
while (varl)
|
||
{
|
||
num_var++;
|
||
varl = varl->next;
|
||
}
|
||
|
||
double lld[dim], uud[dim];
|
||
double **pox;
|
||
pox = new double *[dim];
|
||
for (int j = 0; j < dim; j++)
|
||
pox[j] = new double[1];
|
||
for (int i = 0; i < NN; i++)
|
||
{
|
||
MyList<Patch> *PL = PatL;
|
||
while (PL)
|
||
{
|
||
bool flag = true;
|
||
for (int j = 0; j < dim; j++)
|
||
{
|
||
double h = PL->data->getdX(j);
|
||
lld[j] = PL->data->lli[j] * h;
|
||
uud[j] = PL->data->uui[j] * h;
|
||
if (XX[j][i] < PL->data->bbox[j] + lld[j] || XX[j][i] > PL->data->bbox[j + dim] - uud[j])
|
||
{
|
||
flag = false;
|
||
break;
|
||
}
|
||
pox[j][0] = XX[j][i];
|
||
}
|
||
if (flag)
|
||
{
|
||
PL->data->Interp_Points(VarList, 1, pox, Shellf + i * num_var, Symmetry, Comm_here);
|
||
break;
|
||
}
|
||
PL = PL->next;
|
||
}
|
||
if (!PL)
|
||
{
|
||
checkpatchlist(PatL, false);
|
||
return false;
|
||
}
|
||
}
|
||
for (int j = 0; j < dim; j++)
|
||
delete[] pox[j];
|
||
delete[] pox;
|
||
|
||
return true;
|
||
}
|
||
void Parallel::aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape)
|
||
{
|
||
const double aligntiny = 0.1;
|
||
double DHl, rr;
|
||
int NN;
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
DHl = DH0[i] * pow(0.5, lev);
|
||
rr = bboxl[i] - bbox0[i];
|
||
bboxl[i] = bbox0[i] + int(rr / DHl + 0.4) * DHl;
|
||
rr = bbox0[i + dim] - bboxl[i + dim];
|
||
bboxl[i + dim] = bbox0[i + dim] - int(rr / DHl + 0.4) * DHl;
|
||
#ifdef Vertex
|
||
#ifdef Cell
|
||
#error Both Cell and Vertex are defined
|
||
#endif
|
||
NN = int((bboxl[i + dim] - bboxl[i]) / DHl + 0.4) + 1;
|
||
#else
|
||
#ifdef Cell
|
||
NN = int((bboxl[i + dim] - bboxl[i]) / DHl + 0.4);
|
||
#else
|
||
#error Not define Vertex nor Cell
|
||
#endif
|
||
#endif
|
||
if (NN != shape[i])
|
||
{
|
||
int myrank;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
if (myrank == 0)
|
||
{
|
||
cout << "Parallel::aligncheck want shape " << NN << " for lev#" << lev << ", but " << shape[i] << endl;
|
||
cout << "i = " << i << ", low = " << bboxl[i] << ", up = " << bboxl[i + dim] << endl;
|
||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
bool Parallel::point_locat_gsl(double *pox, MyList<Parallel::gridseg> *gsl)
|
||
{
|
||
bool flag = false;
|
||
while (gsl)
|
||
{
|
||
for (int i = 0; i < dim; i++)
|
||
{
|
||
if (pox[i] > gsl->data->llb[i] && pox[i] < gsl->data->uub[i])
|
||
flag = true;
|
||
else
|
||
{
|
||
flag = false;
|
||
break;
|
||
}
|
||
}
|
||
if (flag)
|
||
break;
|
||
gsl = gsl->next;
|
||
}
|
||
|
||
return flag;
|
||
}
|
||
void Parallel::checkpatchlist(MyList<Patch> *PatL, bool buflog)
|
||
{
|
||
MyList<Patch> *PL = PatL;
|
||
while (PL)
|
||
{
|
||
PL->data->checkPatch(buflog);
|
||
PL = PL->next;
|
||
}
|
||
}
|
||
// Check if load balancing is needed based on interpolation times
|
||
bool Parallel::check_load_balance_need(double *rank_times, int nprocs, int &num_heavy, int *heavy_ranks)
|
||
{
|
||
// Calculate average time
|
||
double avg_time = 0;
|
||
for (int r = 0; r < nprocs; r++)
|
||
{
|
||
avg_time += rank_times[r];
|
||
}
|
||
avg_time /= nprocs;
|
||
|
||
// Identify heavy ranks (time > 1.5x average)
|
||
std::vector<std::pair<int, double>> rank_times_vec;
|
||
for (int r = 0; r < nprocs; r++)
|
||
{
|
||
if (rank_times[r] > avg_time * 1.5)
|
||
{
|
||
rank_times_vec.push_back(std::make_pair(r, rank_times[r]));
|
||
}
|
||
}
|
||
|
||
// Sort by time (descending)
|
||
std::sort(rank_times_vec.begin(), rank_times_vec.end(),
|
||
[](const std::pair<int, double>& a, const std::pair<int, double>& b) {
|
||
return a.second > b.second;
|
||
});
|
||
|
||
// Take top 4 heavy ranks
|
||
num_heavy = std::min(4, (int)rank_times_vec.size());
|
||
if (num_heavy > 0)
|
||
{
|
||
for (int i = 0; i < num_heavy; i++)
|
||
{
|
||
heavy_ranks[i] = rank_times_vec[i].first;
|
||
}
|
||
return true; // Load balancing is needed
|
||
}
|
||
|
||
return false; // No load balancing needed
|
||
}
|
||
|
||
// Split blocks belonging to heavy ranks to improve load balancing
|
||
// Strategy: Split heavy rank blocks in half, merge 8 light ranks to free 4 ranks
|
||
void Parallel::split_heavy_blocks(MyList<Patch> *PatL, int *heavy_ranks, int num_heavy,
|
||
int split_factor, int cpusize, int ingfsi, int fngfsi)
|
||
{
|
||
int myrank, nprocs;
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||
|
||
if (myrank != 0) return; // Only rank 0 performs the analysis
|
||
|
||
cout << "\n=== Load Balancing Strategy ===" << endl;
|
||
cout << "Heavy ranks to split (in half): " << num_heavy << endl;
|
||
for (int i = 0; i < num_heavy; i++)
|
||
cout << " Heavy rank " << heavy_ranks[i] << endl;
|
||
|
||
// Step 1: Identify all blocks and their ranks
|
||
std::vector<int> all_ranks;
|
||
std::map<int, std::vector<Block*>> rank_to_blocks;
|
||
|
||
MyList<Patch> *PL = PatL;
|
||
while (PL)
|
||
{
|
||
Patch *PP = PL->data;
|
||
MyList<Block> *BP = PP->blb;
|
||
while (BP)
|
||
{
|
||
Block *block = BP->data;
|
||
all_ranks.push_back(block->rank);
|
||
rank_to_blocks[block->rank].push_back(block);
|
||
BP = BP->next;
|
||
}
|
||
PL = PL->next;
|
||
}
|
||
|
||
// Step 2: Identify light ranks (not in heavy_ranks list)
|
||
std::set<int> heavy_set(heavy_ranks, heavy_ranks + num_heavy);
|
||
std::vector<int> light_ranks;
|
||
for (int r : all_ranks)
|
||
{
|
||
if (heavy_set.find(r) == heavy_set.end())
|
||
{
|
||
light_ranks.push_back(r);
|
||
}
|
||
}
|
||
|
||
// Remove duplicates from light_ranks
|
||
std::sort(light_ranks.begin(), light_ranks.end());
|
||
light_ranks.erase(std::unique(light_ranks.begin(), light_ranks.end()), light_ranks.end());
|
||
|
||
cout << "Found " << light_ranks.size() << " light ranks (candidates for merging)" << endl;
|
||
|
||
// Step 3: Select 8 light ranks to merge (those with smallest workload)
|
||
// For now, we select the first 8 light ranks
|
||
int num_to_merge = 8;
|
||
if (light_ranks.size() < num_to_merge)
|
||
{
|
||
cout << "WARNING: Not enough light ranks to merge. Found " << light_ranks.size()
|
||
<< ", need " << num_to_merge << endl;
|
||
num_to_merge = light_ranks.size();
|
||
}
|
||
|
||
std::vector<int> ranks_to_merge(light_ranks.begin(), light_ranks.begin() + num_to_merge);
|
||
|
||
cout << "Light ranks to merge (8 -> 4 merged ranks):" << endl;
|
||
for (int i = 0; i < num_to_merge; i++)
|
||
cout << " Rank " << ranks_to_merge[i] << endl;
|
||
|
||
// Step 4: Analyze blocks that need to be split
|
||
cout << "\n=== Analyzing blocks for splitting ===" << endl;
|
||
|
||
struct BlockSplitInfo {
|
||
Block *original_block;
|
||
int split_dim;
|
||
int split_point;
|
||
};
|
||
|
||
std::vector<BlockSplitInfo> blocks_to_split;
|
||
|
||
PL = PatL;
|
||
while (PL)
|
||
{
|
||
Patch *PP = PL->data;
|
||
MyList<Block> *BP = PP->blb;
|
||
while (BP)
|
||
{
|
||
Block *block = BP->data;
|
||
|
||
// Check if this block belongs to a heavy rank
|
||
for (int i = 0; i < num_heavy; i++)
|
||
{
|
||
if (block->rank == heavy_ranks[i])
|
||
{
|
||
// Find the largest dimension for splitting
|
||
int max_dim = 0;
|
||
int max_size = block->shape[0];
|
||
for (int d = 1; d < dim; d++)
|
||
{
|
||
if (block->shape[d] > max_size)
|
||
{
|
||
max_size = block->shape[d];
|
||
max_dim = d;
|
||
}
|
||
}
|
||
|
||
int split_point = max_size / 2;
|
||
|
||
BlockSplitInfo info;
|
||
info.original_block = block;
|
||
info.split_dim = max_dim;
|
||
info.split_point = split_point;
|
||
blocks_to_split.push_back(info);
|
||
|
||
cout << "Block at rank " << block->rank << " will be split" << endl;
|
||
cout << " Shape: [" << block->shape[0] << ", " << block->shape[1] << ", " << block->shape[2] << "]" << endl;
|
||
cout << " Split along dimension " << max_dim << " at index " << split_point << endl;
|
||
break;
|
||
}
|
||
}
|
||
|
||
BP = BP->next;
|
||
}
|
||
PL = PL->next;
|
||
}
|
||
|
||
cout << "\nTotal blocks to split: " << blocks_to_split.size() << endl;
|
||
|
||
// Step 5: Calculate new rank assignments
|
||
// Strategy:
|
||
// - For each heavy rank, its blocks are split in half
|
||
// - First half keeps the original rank
|
||
// - Second half gets a new rank (from the freed light ranks)
|
||
// - 8 light ranks are merged into 4 ranks, freeing up 4 ranks
|
||
|
||
std::vector<int> freed_ranks;
|
||
for (size_t i = 0; i < ranks_to_merge.size(); i += 2)
|
||
{
|
||
// Merge pairs of light ranks: (ranks_to_merge[i], ranks_to_merge[i+1]) -> ranks_to_merge[i]
|
||
// This frees up ranks_to_merge[i+1]
|
||
if (i + 1 < ranks_to_merge.size())
|
||
{
|
||
freed_ranks.push_back(ranks_to_merge[i + 1]);
|
||
cout << "Merging ranks " << ranks_to_merge[i] << " and " << ranks_to_merge[i + 1]
|
||
<< " -> keeping rank " << ranks_to_merge[i] << ", freeing rank " << ranks_to_merge[i + 1] << endl;
|
||
}
|
||
}
|
||
|
||
cout << "\nFreed ranks available for split blocks: ";
|
||
for (int r : freed_ranks)
|
||
cout << r << " ";
|
||
cout << endl;
|
||
|
||
// Step 6: Assign new ranks to split blocks
|
||
int freed_idx = 0;
|
||
for (size_t i = 0; i < blocks_to_split.size(); i++)
|
||
{
|
||
BlockSplitInfo &info = blocks_to_split[i];
|
||
Block *original = info.original_block;
|
||
|
||
if (freed_idx < freed_ranks.size())
|
||
{
|
||
cout << "\nSplitting block at rank " << original->rank << endl;
|
||
cout << " First half: keeps rank " << original->rank << endl;
|
||
cout << " Second half: gets new rank " << freed_ranks[freed_idx] << endl;
|
||
freed_idx++;
|
||
}
|
||
else
|
||
{
|
||
cout << "WARNING: Not enough freed ranks for all split blocks!" << endl;
|
||
break;
|
||
}
|
||
}
|
||
|
||
cout << "\n=== Load Balancing Analysis Complete ===" << endl;
|
||
cout << "Next steps:" << endl;
|
||
cout << " 1. Recompose the grid with new rank assignments" << endl;
|
||
cout << " 2. Data migration will be handled by recompose_cgh" << endl;
|
||
cout << " 3. Ghost zone communication will be updated automatically" << endl;
|
||
}
|
||
|