Compare commits
4 Commits
cjy-dystop
...
yx_new_spl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
588fb675a0 | ||
| aabe74c098 | |||
|
|
f147f79ffa | ||
|
|
8abac8dd88 |
@@ -66,7 +66,8 @@ if os.path.exists(File_directory):
|
||||
## Prompt whether to overwrite the existing directory
|
||||
while True:
|
||||
try:
|
||||
inputvalue = input()
|
||||
## inputvalue = input()
|
||||
inputvalue = "continue"
|
||||
## If the user agrees to overwrite, proceed and remove the existing directory
|
||||
if ( inputvalue == "continue" ):
|
||||
print( " Continue the calculation !!! " )
|
||||
|
||||
@@ -442,7 +442,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
Bp = Bp->next;
|
||||
}
|
||||
}
|
||||
|
||||
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
||||
// Group consecutive points by owner rank and broadcast each group.
|
||||
// Since each point's data is non-zero only on the owner rank,
|
||||
@@ -507,6 +506,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
// Targeted point-to-point overload: each owner sends each point only to
|
||||
// the one rank that needs it for integration (consumer), reducing
|
||||
// communication volume by ~nprocs times compared to the Bcast version.
|
||||
/*
|
||||
double t_calc_end, t_calc_total = 0;
|
||||
double t_calc_start = MPI_Wtime();*/
|
||||
int myrank, nprocs;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||
@@ -607,12 +609,15 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
Bp = Bp->next;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
t_calc_end = MPI_Wtime();
|
||||
t_calc_total = t_calc_end - t_calc_start;*/
|
||||
// --- Error check for unfound points ---
|
||||
for (int j = 0; j < NN; j++)
|
||||
{
|
||||
if (owner_rank[j] < 0 && myrank == 0)
|
||||
{
|
||||
cout<<owner_rank[j-1]<<endl;
|
||||
cout << "ERROR: Patch::Interp_Points fails to find point (";
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
@@ -764,6 +769,63 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
delete[] recv_count;
|
||||
delete[] consumer_rank;
|
||||
delete[] owner_rank;
|
||||
/*
|
||||
// 4. 汇总并输出真正干活最慢的 Top 4
|
||||
struct RankStats {
|
||||
int rank;
|
||||
double calc_time; // 净计算时间
|
||||
};
|
||||
|
||||
// 创建当前进程的统计数据
|
||||
RankStats local_stat;
|
||||
local_stat.rank = myrank;
|
||||
local_stat.calc_time = t_calc_total;
|
||||
|
||||
// 为所有进程的统计数据分配内存
|
||||
RankStats *all_stats = nullptr;
|
||||
if (myrank == 0) {
|
||||
all_stats = new RankStats[nprocs];
|
||||
}
|
||||
|
||||
// 使用MPI_Gather收集所有进程的数据到rank 0
|
||||
MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE,
|
||||
all_stats, sizeof(RankStats), MPI_BYTE,
|
||||
0, MPI_COMM_WORLD);
|
||||
|
||||
// 准备输出前4个rank的信息(所有rank都参与,确保广播后一致)
|
||||
int top10_ranks[10] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
|
||||
double top10_times[10] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
|
||||
int num_top10 = 0;
|
||||
|
||||
if (myrank == 0) {
|
||||
// 按 calc_time(净计算时间)排序
|
||||
std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) {
|
||||
return a.calc_time > b.calc_time;
|
||||
});
|
||||
|
||||
// 取前4个
|
||||
num_top10 = (nprocs < 10) ? nprocs : 10;
|
||||
for (int i = 0; i < num_top10; i++) {
|
||||
top10_ranks[i] = all_stats[i].rank;
|
||||
top10_times[i] = all_stats[i].calc_time;
|
||||
}
|
||||
|
||||
printf("\n--- Top %d Ranks by ACTIVE COMPUTATION (CPU Time) ---\n", num_top10);
|
||||
for (int i = 0; i < num_top10; i++) {
|
||||
printf("Rank [%4d]: Calc %.6f s\n", top10_ranks[i], top10_times[i]);
|
||||
}
|
||||
|
||||
// 清理分配的内存
|
||||
delete[] all_stats;
|
||||
}
|
||||
|
||||
// 广播前4个rank的信息给所有进程
|
||||
MPI_Bcast(&num_top10, 1, MPI_INT, 0, MPI_COMM_WORLD);
|
||||
if (num_top10 > 0) {
|
||||
MPI_Bcast(top10_ranks, 10, MPI_INT, 0, MPI_COMM_WORLD);
|
||||
MPI_Bcast(top10_times, 10, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
*/
|
||||
}
|
||||
void Patch::Interp_Points(MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
|
||||
@@ -24,6 +24,7 @@ using namespace std;
|
||||
#endif
|
||||
|
||||
#include <mpi.h>
|
||||
#include <memory.h>
|
||||
#include "MyList.h"
|
||||
#include "Block.h"
|
||||
#include "Parallel.h"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "prolongrestrict.h"
|
||||
#include "misc.h"
|
||||
#include "parameters.h"
|
||||
#include <set>
|
||||
|
||||
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
|
||||
{
|
||||
@@ -115,7 +116,7 @@ int Parallel::partition3(int *nxyz, int split_size, int *min_width, int cpusize,
|
||||
return nx * ny * nz;
|
||||
#undef SEARCH_SIZE
|
||||
}
|
||||
#elif 1 // Zhihui's idea one on 2013-09-25
|
||||
#elif 0 // Zhihui's idea one on 2013-09-25
|
||||
{
|
||||
int nx, ny, nz;
|
||||
int hmin_width;
|
||||
@@ -150,7 +151,7 @@ int Parallel::partition3(int *nxyz, int split_size, int *min_width, int cpusize,
|
||||
|
||||
return nx * ny * nz;
|
||||
}
|
||||
#elif 1 // Zhihui's idea two on 2013-09-25
|
||||
#elif 0 // Zhihui's idea two on 2013-09-25
|
||||
{
|
||||
int nx, ny, nz;
|
||||
const int hmin_width = 8; // for example we use 8
|
||||
@@ -500,6 +501,547 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
|
||||
|
||||
return BlL;
|
||||
}
|
||||
MyList<Block> *Parallel::distribute_hard(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||
bool periodic, int nodes)
|
||||
{
|
||||
#ifdef USE_GPU_DIVIDE
|
||||
double cpu_part, gpu_part;
|
||||
map<string, double>::iterator iter;
|
||||
iter = parameters::dou_par.find("cpu part");
|
||||
if (iter != parameters::dou_par.end())
|
||||
{
|
||||
cpu_part = iter->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
int myrank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
// read parameter from file
|
||||
const int LEN = 256;
|
||||
char pline[LEN];
|
||||
string str, sgrp, skey, sval;
|
||||
int sind;
|
||||
char pname[50];
|
||||
{
|
||||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||||
if (iter != parameters::str_par.end())
|
||||
{
|
||||
strcpy(pname, (iter->second).c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Error inputpar" << endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
ifstream inf(pname, ifstream::in);
|
||||
if (!inf.good() && myrank == 0)
|
||||
{
|
||||
cout << "Can not open parameter file " << pname << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
for (int i = 1; inf.good(); i++)
|
||||
{
|
||||
inf.getline(pline, LEN);
|
||||
str = pline;
|
||||
|
||||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||||
if (status == -1)
|
||||
{
|
||||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
else if (status == 0)
|
||||
continue;
|
||||
|
||||
if (sgrp == "ABE")
|
||||
{
|
||||
if (skey == "cpu part")
|
||||
cpu_part = atof(sval.c_str());
|
||||
}
|
||||
}
|
||||
inf.close();
|
||||
|
||||
parameters::dou_par.insert(map<string, double>::value_type("cpu part", cpu_part));
|
||||
}
|
||||
iter = parameters::dou_par.find("gpu part");
|
||||
if (iter != parameters::dou_par.end())
|
||||
{
|
||||
gpu_part = iter->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
int myrank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
// read parameter from file
|
||||
const int LEN = 256;
|
||||
char pline[LEN];
|
||||
string str, sgrp, skey, sval;
|
||||
int sind;
|
||||
char pname[50];
|
||||
{
|
||||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||||
if (iter != parameters::str_par.end())
|
||||
{
|
||||
strcpy(pname, (iter->second).c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Error inputpar" << endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
ifstream inf(pname, ifstream::in);
|
||||
if (!inf.good() && myrank == 0)
|
||||
{
|
||||
cout << "Can not open parameter file " << pname << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
for (int i = 1; inf.good(); i++)
|
||||
{
|
||||
inf.getline(pline, LEN);
|
||||
str = pline;
|
||||
|
||||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||||
if (status == -1)
|
||||
{
|
||||
cout << "error reading parameter file " << pname << " in line " << i << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
else if (status == 0)
|
||||
continue;
|
||||
|
||||
if (sgrp == "ABE")
|
||||
{
|
||||
if (skey == "gpu part")
|
||||
gpu_part = atof(sval.c_str());
|
||||
}
|
||||
}
|
||||
inf.close();
|
||||
|
||||
parameters::dou_par.insert(map<string, double>::value_type("gpu part", gpu_part));
|
||||
}
|
||||
|
||||
if (nodes == 0)
|
||||
nodes = cpusize / 2;
|
||||
#else
|
||||
if (nodes == 0)
|
||||
nodes = cpusize;
|
||||
#endif
|
||||
|
||||
if (dim != 3)
|
||||
{
|
||||
cout << "distrivute: now we only support 3-dimension" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
MyList<Block> *BlL = 0;
|
||||
int split_size, min_size, block_size = 0;
|
||||
|
||||
int min_width = 2 * Mymax(ghost_width, buffer_width);
|
||||
int nxyz[dim], mmin_width[dim], min_shape[dim];
|
||||
|
||||
MyList<Patch> *PLi = PatchLIST;
|
||||
for (int i = 0; i < dim; i++)
|
||||
min_shape[i] = PLi->data->shape[i];
|
||||
int lev = PLi->data->lev;
|
||||
PLi = PLi->next;
|
||||
while (PLi)
|
||||
{
|
||||
Patch *PP = PLi->data;
|
||||
for (int i = 0; i < dim; i++)
|
||||
min_shape[i] = Mymin(min_shape[i], PP->shape[i]);
|
||||
if (lev != PLi->data->lev)
|
||||
cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl;
|
||||
PLi = PLi->next;
|
||||
}
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
mmin_width[i] = Mymin(min_width, min_shape[i]);
|
||||
|
||||
min_size = mmin_width[0];
|
||||
for (int i = 1; i < dim; i++)
|
||||
min_size = min_size * mmin_width[i];
|
||||
|
||||
PLi = PatchLIST;
|
||||
while (PLi)
|
||||
{
|
||||
Patch *PP = PLi->data;
|
||||
// PP->checkPatch(true);
|
||||
int bs = PP->shape[0];
|
||||
for (int i = 1; i < dim; i++)
|
||||
bs = bs * PP->shape[i];
|
||||
block_size = block_size + bs;
|
||||
PLi = PLi->next;
|
||||
}
|
||||
split_size = Mymax(min_size, block_size / nodes);
|
||||
split_size = Mymax(1, split_size);
|
||||
|
||||
int n_rank = 0;
|
||||
PLi = PatchLIST;
|
||||
int reacpu = 0;
|
||||
int current_block_id = 0;
|
||||
while (PLi) {
|
||||
Block *ng0, *ng;
|
||||
bool first_block_in_patch = true;
|
||||
Patch *PP = PLi->data;
|
||||
reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
|
||||
|
||||
for (int i = 0; i < nxyz[0]; i++)
|
||||
for (int j = 0; j < nxyz[1]; j++)
|
||||
for (int k = 0; k < nxyz[2]; k++)
|
||||
{
|
||||
// --- 1. 定义局部变量 ---
|
||||
int ibbox_here[6], shape_here[3];
|
||||
double bbox_here[6], dd;
|
||||
Block *current_ng_start = nullptr; // 本次循环产生的第一个(或唯一一个)块
|
||||
|
||||
// --- 2. 核心逻辑分支 ---
|
||||
if (current_block_id == 27 || current_block_id == 28 ||
|
||||
current_block_id == 35 || current_block_id == 36)
|
||||
{
|
||||
// A. 计算原始索引 (不带 Ghost)
|
||||
int ib0 = (PP->shape[0] * i) / nxyz[0];
|
||||
int ib3 = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||||
int jb1 = (PP->shape[1] * j) / nxyz[1];
|
||||
int jb4 = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||||
int kb2 = (PP->shape[2] * k) / nxyz[2];
|
||||
int kb5 = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||||
|
||||
int r_1, r_2, r_3, r_4;
|
||||
Block * split_first_block = nullptr;
|
||||
Block * split_last_block = nullptr;
|
||||
if(current_block_id == 27)
|
||||
{
|
||||
r_1 = 24; r_2 = 25; r_3 = 26; r_4 = 27;
|
||||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||||
PP, r_1,r_2,r_3, r_4, ingfsi, fngfsi, periodic,split_first_block,split_last_block);
|
||||
}
|
||||
|
||||
else if(current_block_id == 28)
|
||||
{
|
||||
r_1 = 28; r_2 = 29;
|
||||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||||
PP, r_1, r_2, ingfsi, fngfsi, periodic,split_first_block,split_last_block);
|
||||
}
|
||||
else if(current_block_id == 35)
|
||||
{
|
||||
r_1 = 34; r_2 = 35;
|
||||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||||
PP, r_1, r_2, ingfsi, fngfsi, periodic,split_first_block,split_last_block);
|
||||
}
|
||||
else
|
||||
{
|
||||
r_1 = 36; r_2 = 37; r_3 = 38; r_4 = 39;
|
||||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||||
PP, r_1,r_2,r_3, r_4, ingfsi, fngfsi, periodic,split_first_block,split_last_block);
|
||||
}
|
||||
|
||||
current_ng_start = split_first_block;
|
||||
ng = split_last_block;
|
||||
}
|
||||
else
|
||||
{
|
||||
// B. 普通块逻辑 (含 Ghost 扩张)
|
||||
ibbox_here[0] = (PP->shape[0] * i) / nxyz[0];
|
||||
ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||||
ibbox_here[1] = (PP->shape[1] * j) / nxyz[1];
|
||||
ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||||
ibbox_here[2] = (PP->shape[2] * k) / nxyz[2];
|
||||
ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||||
|
||||
if (periodic) {
|
||||
for(int d=0; d<3; d++) {
|
||||
ibbox_here[d] -= ghost_width;
|
||||
ibbox_here[d+3] += ghost_width;
|
||||
}
|
||||
} else {
|
||||
ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width);
|
||||
ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width);
|
||||
ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width);
|
||||
ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width);
|
||||
ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width);
|
||||
ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width);
|
||||
}
|
||||
|
||||
for(int d=0; d<3; d++) shape_here[d] = ibbox_here[d+3] - ibbox_here[d] + 1;
|
||||
|
||||
// 物理坐标计算 (根据你的宏定义 Cell/Vertex)
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
// 0--4, 5--10
|
||||
dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1);
|
||||
bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd;
|
||||
bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd;
|
||||
|
||||
dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1);
|
||||
bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd;
|
||||
bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd;
|
||||
|
||||
dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1);
|
||||
bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd;
|
||||
bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd;
|
||||
#else
|
||||
#ifdef Cell
|
||||
// 0--5, 5--10
|
||||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||||
bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd;
|
||||
bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd;
|
||||
|
||||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||||
bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd;
|
||||
bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd;
|
||||
|
||||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||||
bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd;
|
||||
bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd;
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
ng = createMappedBlock(BlL, dim, shape_here, bbox_here, current_block_id, ingfsi, fngfsi, PP->lev);
|
||||
current_ng_start = ng;
|
||||
}
|
||||
|
||||
// --- 3. 统一处理 Patch 起始 Block 指针 ---
|
||||
if (first_block_in_patch) {
|
||||
ng0 = current_ng_start;
|
||||
|
||||
// 立即设置 PP->blb,避免后续循环覆盖 ng0
|
||||
MyList<Block> *Bp_start = BlL;
|
||||
while (Bp_start && Bp_start->data != ng0) Bp_start = Bp_start->next;
|
||||
PP->blb = Bp_start;
|
||||
|
||||
first_block_in_patch = false;
|
||||
}
|
||||
|
||||
current_block_id++;
|
||||
}
|
||||
|
||||
// --- 4. 设置 Patch 结束 Block 指针 ---
|
||||
MyList<Block> *Bp_end = BlL;
|
||||
while (Bp_end && Bp_end->data != ng) Bp_end = Bp_end->next;
|
||||
PP->ble = Bp_end;
|
||||
|
||||
PLi = PLi->next;
|
||||
first_block_in_patch = true;
|
||||
}
|
||||
if (reacpu < nodes * 2 / 3)
|
||||
{
|
||||
int myrank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
if (myrank == 0)
|
||||
cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl;
|
||||
}
|
||||
|
||||
return BlL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief 将当前 Block 几何4等分并存入列表
|
||||
*/
|
||||
|
||||
|
||||
Block* Parallel::splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||||
int ib0_orig, int ib3_orig,
|
||||
int jb1_orig, int jb4_orig,
|
||||
int kb2_orig, int kb5_orig,
|
||||
Patch* PP, int r_1, int r_2, int r_3, int r_4,
|
||||
int ingfsi, int fngfsi, bool periodic,
|
||||
Block* &split_first_block, Block* &split_last_block)
|
||||
{
|
||||
// 1. 计算四分索引区间
|
||||
// 计算 X 方向的总网格数
|
||||
int total_len = ib3_orig - ib0_orig + 1;
|
||||
|
||||
// 计算三个切分点,确保覆盖 [ib0_orig, ib3_orig]
|
||||
// 段 1: [ib0_orig, m1]
|
||||
// 段 2: [m1 + 1, m2]
|
||||
// 段 3: [m2 + 1, m3]
|
||||
// 段 4: [m3 + 1, ib3_orig]
|
||||
int m1 = ib0_orig + total_len / 4 - 1;
|
||||
int m2 = ib0_orig + total_len / 2 - 1;
|
||||
int m3 = ib0_orig + (3 * total_len) / 4 - 1;
|
||||
|
||||
int indices[4][6] = {
|
||||
{ib0_orig, jb1_orig, kb2_orig, m1, jb4_orig, kb5_orig}, // 子块 1
|
||||
{m1 + 1, jb1_orig, kb2_orig, m2, jb4_orig, kb5_orig}, // 子块 2
|
||||
{m2 + 1, jb1_orig, kb2_orig, m3, jb4_orig, kb5_orig}, // 子块 3
|
||||
{m3 + 1, jb1_orig, kb2_orig, ib3_orig, jb4_orig, kb5_orig} // 子块 4
|
||||
};
|
||||
|
||||
int target_ranks[4] = {r_1, r_2, r_3, r_4};
|
||||
|
||||
// 2. 内部处理逻辑 (保持原有的 Ghost 扩张和物理坐标转换)
|
||||
auto createSubBlock = [&](int* ib_raw, int target_rank) {
|
||||
int ib_final[6];
|
||||
int sh_here[3];
|
||||
double bb_here[6], dd;
|
||||
|
||||
// --- 逻辑 A: Ghost 扩张 ---
|
||||
if (periodic) {
|
||||
ib_final[0] = ib_raw[0] - ghost_width;
|
||||
ib_final[3] = ib_raw[3] + ghost_width;
|
||||
ib_final[1] = ib_raw[1] - ghost_width;
|
||||
ib_final[4] = ib_raw[4] + ghost_width;
|
||||
ib_final[2] = ib_raw[2] - ghost_width;
|
||||
ib_final[5] = ib_raw[5] + ghost_width;
|
||||
} else {
|
||||
ib_final[0] = Mymax(0, ib_raw[0] - ghost_width);
|
||||
ib_final[3] = Mymin(PP->shape[0] - 1, ib_raw[3] + ghost_width);
|
||||
ib_final[1] = Mymax(0, ib_raw[1] - ghost_width);
|
||||
ib_final[4] = Mymin(PP->shape[1] - 1, ib_raw[4] + ghost_width);
|
||||
ib_final[2] = Mymax(0, ib_raw[2] - ghost_width);
|
||||
ib_final[5] = Mymin(PP->shape[2] - 1, ib_raw[5] + ghost_width);
|
||||
}
|
||||
|
||||
sh_here[0] = ib_final[3] - ib_final[0] + 1;
|
||||
sh_here[1] = ib_final[4] - ib_final[1] + 1;
|
||||
sh_here[2] = ib_final[5] - ib_final[2] + 1;
|
||||
|
||||
// --- 逻辑 B: 物理坐标计算 ---
|
||||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||||
bb_here[0] = PP->bbox[0] + ib_final[0] * dd;
|
||||
bb_here[3] = PP->bbox[0] + (ib_final[3] + 1) * dd;
|
||||
|
||||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||||
bb_here[1] = PP->bbox[1] + ib_final[1] * dd;
|
||||
bb_here[4] = PP->bbox[1] + (ib_final[4] + 1) * dd;
|
||||
|
||||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||||
bb_here[2] = PP->bbox[2] + ib_final[2] * dd;
|
||||
bb_here[5] = PP->bbox[2] + (ib_final[5] + 1) * dd;
|
||||
|
||||
Block* Bg = new Block(_dim, sh_here, bb_here, target_rank, ingfsi, fngfsi, PP->lev);
|
||||
|
||||
if (BlL) BlL->insert(Bg);
|
||||
else BlL = new MyList<Block>(Bg);
|
||||
|
||||
return Bg;
|
||||
};
|
||||
|
||||
// 3. 执行创建并返回首尾指针
|
||||
split_first_block = createSubBlock(indices[0], target_ranks[0]);
|
||||
createSubBlock(indices[1], target_ranks[1]); // 中间块仅插入 List
|
||||
createSubBlock(indices[2], target_ranks[2]); // 中间块仅插入 List
|
||||
split_last_block = createSubBlock(indices[3], target_ranks[3]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief 将当前 Block 几何二等分并存入列表
|
||||
* @param axis 拆分轴:0-x, 1-y, 2-z (建议选最长轴)
|
||||
*/
|
||||
|
||||
|
||||
Block* Parallel::splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||||
int ib0_orig, int ib3_orig,
|
||||
int jb1_orig, int jb4_orig,
|
||||
int kb2_orig, int kb5_orig,
|
||||
Patch* PP, int r_left, int r_right,
|
||||
int ingfsi, int fngfsi, bool periodic,
|
||||
Block* &split_first_block, Block* &split_last_block)
|
||||
{
|
||||
// 1. 索引二分 (基于无 ghost 的原始索引)
|
||||
int mid = (ib0_orig + ib3_orig) / 2;
|
||||
|
||||
// 左块原始索引: [ib0, mid], 右块原始索引: [mid+1, ib3]
|
||||
int indices_L[6] = {ib0_orig, jb1_orig, kb2_orig, mid, jb4_orig, kb5_orig};
|
||||
int indices_R[6] = {mid + 1, jb1_orig, kb2_orig, ib3_orig, jb4_orig, kb5_orig};
|
||||
|
||||
// 2. 内部处理逻辑 (复刻原 distribute 逻辑)
|
||||
auto createSubBlock = [&](int* ib_raw, int target_rank) {
|
||||
int ib_final[6];
|
||||
int sh_here[3];
|
||||
double bb_here[6], dd;
|
||||
|
||||
// --- 逻辑 A: Ghost 扩张 ---
|
||||
if (periodic) {
|
||||
ib_final[0] = ib_raw[0] - ghost_width;
|
||||
ib_final[3] = ib_raw[3] + ghost_width;
|
||||
ib_final[1] = ib_raw[1] - ghost_width;
|
||||
ib_final[4] = ib_raw[4] + ghost_width;
|
||||
ib_final[2] = ib_raw[2] - ghost_width;
|
||||
ib_final[5] = ib_raw[5] + ghost_width;
|
||||
} else {
|
||||
ib_final[0] = Mymax(0, ib_raw[0] - ghost_width);
|
||||
ib_final[3] = Mymin(PP->shape[0] - 1, ib_raw[3] + ghost_width);
|
||||
ib_final[1] = Mymax(0, ib_raw[1] - ghost_width);
|
||||
ib_final[4] = Mymin(PP->shape[1] - 1, ib_raw[4] + ghost_width);
|
||||
ib_final[2] = Mymax(0, ib_raw[2] - ghost_width);
|
||||
ib_final[5] = Mymin(PP->shape[2] - 1, ib_raw[5] + ghost_width);
|
||||
}
|
||||
|
||||
sh_here[0] = ib_final[3] - ib_final[0] + 1;
|
||||
sh_here[1] = ib_final[4] - ib_final[1] + 1;
|
||||
sh_here[2] = ib_final[5] - ib_final[2] + 1;
|
||||
|
||||
// --- 逻辑 B: 物理坐标计算 (严格匹配 Cell 模式) ---
|
||||
// X 方向
|
||||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||||
bb_here[0] = PP->bbox[0] + ib_final[0] * dd;
|
||||
bb_here[3] = PP->bbox[0] + (ib_final[3] + 1) * dd;
|
||||
|
||||
// Y 方向
|
||||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||||
bb_here[1] = PP->bbox[1] + ib_final[1] * dd;
|
||||
bb_here[4] = PP->bbox[1] + (ib_final[4] + 1) * dd;
|
||||
|
||||
// Z 方向
|
||||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||||
bb_here[2] = PP->bbox[2] + ib_final[2] * dd;
|
||||
bb_here[5] = PP->bbox[2] + (ib_final[5] + 1) * dd;
|
||||
|
||||
Block* Bg = new Block(dim, sh_here, bb_here, target_rank, ingfsi, fngfsi, PP->lev);
|
||||
if (BlL) BlL->insert(Bg);
|
||||
else BlL = new MyList<Block>(Bg);
|
||||
|
||||
|
||||
return Bg;
|
||||
};
|
||||
|
||||
// 执行创建
|
||||
split_first_block = createSubBlock(indices_L, r_left);
|
||||
split_last_block = createSubBlock(indices_R, r_right);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief 创建映射后的 Block
|
||||
*/
|
||||
Block* Parallel::createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
||||
int block_id, int ingfsi, int fngfsi, int lev)
|
||||
{
|
||||
// 映射表逻辑
|
||||
int target_rank = block_id;
|
||||
switch(block_id){
|
||||
case 24: target_rank = 23; break;
|
||||
case 25: target_rank = 23; break;
|
||||
case 26: target_rank = 23; break;
|
||||
case 29: target_rank = 30; break;
|
||||
case 34: target_rank = 33; break;
|
||||
case 37: target_rank = 40; break;
|
||||
case 38: target_rank = 40; break;
|
||||
case 39: target_rank = 40; break;
|
||||
|
||||
}
|
||||
|
||||
Block* ng = new Block(dim, shape, bbox, target_rank, ingfsi, fngfsi, lev);
|
||||
|
||||
if (BlL) BlL->insert(ng);
|
||||
else BlL = new MyList<Block>(ng);
|
||||
|
||||
return ng;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||
MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||
bool periodic, int start_rank, int end_rank, int nodes)
|
||||
@@ -6482,3 +7024,224 @@ void Parallel::checkpatchlist(MyList<Patch> *PatL, bool buflog)
|
||||
PL = PL->next;
|
||||
}
|
||||
}
|
||||
// Check if load balancing is needed based on interpolation times
|
||||
bool Parallel::check_load_balance_need(double *rank_times, int nprocs, int &num_heavy, int *heavy_ranks)
|
||||
{
|
||||
// Calculate average time
|
||||
double avg_time = 0;
|
||||
for (int r = 0; r < nprocs; r++)
|
||||
{
|
||||
avg_time += rank_times[r];
|
||||
}
|
||||
avg_time /= nprocs;
|
||||
|
||||
// Identify heavy ranks (time > 1.5x average)
|
||||
std::vector<std::pair<int, double>> rank_times_vec;
|
||||
for (int r = 0; r < nprocs; r++)
|
||||
{
|
||||
if (rank_times[r] > avg_time * 1.5)
|
||||
{
|
||||
rank_times_vec.push_back(std::make_pair(r, rank_times[r]));
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by time (descending)
|
||||
std::sort(rank_times_vec.begin(), rank_times_vec.end(),
|
||||
[](const std::pair<int, double>& a, const std::pair<int, double>& b) {
|
||||
return a.second > b.second;
|
||||
});
|
||||
|
||||
// Take top 4 heavy ranks
|
||||
num_heavy = std::min(4, (int)rank_times_vec.size());
|
||||
if (num_heavy > 0)
|
||||
{
|
||||
for (int i = 0; i < num_heavy; i++)
|
||||
{
|
||||
heavy_ranks[i] = rank_times_vec[i].first;
|
||||
}
|
||||
return true; // Load balancing is needed
|
||||
}
|
||||
|
||||
return false; // No load balancing needed
|
||||
}
|
||||
|
||||
// Split blocks belonging to heavy ranks to improve load balancing
|
||||
// Strategy: Split heavy rank blocks in half, merge 8 light ranks to free 4 ranks
|
||||
void Parallel::split_heavy_blocks(MyList<Patch> *PatL, int *heavy_ranks, int num_heavy,
|
||||
int split_factor, int cpusize, int ingfsi, int fngfsi)
|
||||
{
|
||||
int myrank, nprocs;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||
|
||||
if (myrank != 0) return; // Only rank 0 performs the analysis
|
||||
|
||||
cout << "\n=== Load Balancing Strategy ===" << endl;
|
||||
cout << "Heavy ranks to split (in half): " << num_heavy << endl;
|
||||
for (int i = 0; i < num_heavy; i++)
|
||||
cout << " Heavy rank " << heavy_ranks[i] << endl;
|
||||
|
||||
// Step 1: Identify all blocks and their ranks
|
||||
std::vector<int> all_ranks;
|
||||
std::map<int, std::vector<Block*>> rank_to_blocks;
|
||||
|
||||
MyList<Patch> *PL = PatL;
|
||||
while (PL)
|
||||
{
|
||||
Patch *PP = PL->data;
|
||||
MyList<Block> *BP = PP->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *block = BP->data;
|
||||
all_ranks.push_back(block->rank);
|
||||
rank_to_blocks[block->rank].push_back(block);
|
||||
BP = BP->next;
|
||||
}
|
||||
PL = PL->next;
|
||||
}
|
||||
|
||||
// Step 2: Identify light ranks (not in heavy_ranks list)
|
||||
std::set<int> heavy_set(heavy_ranks, heavy_ranks + num_heavy);
|
||||
std::vector<int> light_ranks;
|
||||
for (int r : all_ranks)
|
||||
{
|
||||
if (heavy_set.find(r) == heavy_set.end())
|
||||
{
|
||||
light_ranks.push_back(r);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove duplicates from light_ranks
|
||||
std::sort(light_ranks.begin(), light_ranks.end());
|
||||
light_ranks.erase(std::unique(light_ranks.begin(), light_ranks.end()), light_ranks.end());
|
||||
|
||||
cout << "Found " << light_ranks.size() << " light ranks (candidates for merging)" << endl;
|
||||
|
||||
// Step 3: Select 8 light ranks to merge (those with smallest workload)
|
||||
// For now, we select the first 8 light ranks
|
||||
int num_to_merge = 8;
|
||||
if (light_ranks.size() < num_to_merge)
|
||||
{
|
||||
cout << "WARNING: Not enough light ranks to merge. Found " << light_ranks.size()
|
||||
<< ", need " << num_to_merge << endl;
|
||||
num_to_merge = light_ranks.size();
|
||||
}
|
||||
|
||||
std::vector<int> ranks_to_merge(light_ranks.begin(), light_ranks.begin() + num_to_merge);
|
||||
|
||||
cout << "Light ranks to merge (8 -> 4 merged ranks):" << endl;
|
||||
for (int i = 0; i < num_to_merge; i++)
|
||||
cout << " Rank " << ranks_to_merge[i] << endl;
|
||||
|
||||
// Step 4: Analyze blocks that need to be split
|
||||
cout << "\n=== Analyzing blocks for splitting ===" << endl;
|
||||
|
||||
struct BlockSplitInfo {
|
||||
Block *original_block;
|
||||
int split_dim;
|
||||
int split_point;
|
||||
};
|
||||
|
||||
std::vector<BlockSplitInfo> blocks_to_split;
|
||||
|
||||
PL = PatL;
|
||||
while (PL)
|
||||
{
|
||||
Patch *PP = PL->data;
|
||||
MyList<Block> *BP = PP->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *block = BP->data;
|
||||
|
||||
// Check if this block belongs to a heavy rank
|
||||
for (int i = 0; i < num_heavy; i++)
|
||||
{
|
||||
if (block->rank == heavy_ranks[i])
|
||||
{
|
||||
// Find the largest dimension for splitting
|
||||
int max_dim = 0;
|
||||
int max_size = block->shape[0];
|
||||
for (int d = 1; d < dim; d++)
|
||||
{
|
||||
if (block->shape[d] > max_size)
|
||||
{
|
||||
max_size = block->shape[d];
|
||||
max_dim = d;
|
||||
}
|
||||
}
|
||||
|
||||
int split_point = max_size / 2;
|
||||
|
||||
BlockSplitInfo info;
|
||||
info.original_block = block;
|
||||
info.split_dim = max_dim;
|
||||
info.split_point = split_point;
|
||||
blocks_to_split.push_back(info);
|
||||
|
||||
cout << "Block at rank " << block->rank << " will be split" << endl;
|
||||
cout << " Shape: [" << block->shape[0] << ", " << block->shape[1] << ", " << block->shape[2] << "]" << endl;
|
||||
cout << " Split along dimension " << max_dim << " at index " << split_point << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
BP = BP->next;
|
||||
}
|
||||
PL = PL->next;
|
||||
}
|
||||
|
||||
cout << "\nTotal blocks to split: " << blocks_to_split.size() << endl;
|
||||
|
||||
// Step 5: Calculate new rank assignments
|
||||
// Strategy:
|
||||
// - For each heavy rank, its blocks are split in half
|
||||
// - First half keeps the original rank
|
||||
// - Second half gets a new rank (from the freed light ranks)
|
||||
// - 8 light ranks are merged into 4 ranks, freeing up 4 ranks
|
||||
|
||||
std::vector<int> freed_ranks;
|
||||
for (size_t i = 0; i < ranks_to_merge.size(); i += 2)
|
||||
{
|
||||
// Merge pairs of light ranks: (ranks_to_merge[i], ranks_to_merge[i+1]) -> ranks_to_merge[i]
|
||||
// This frees up ranks_to_merge[i+1]
|
||||
if (i + 1 < ranks_to_merge.size())
|
||||
{
|
||||
freed_ranks.push_back(ranks_to_merge[i + 1]);
|
||||
cout << "Merging ranks " << ranks_to_merge[i] << " and " << ranks_to_merge[i + 1]
|
||||
<< " -> keeping rank " << ranks_to_merge[i] << ", freeing rank " << ranks_to_merge[i + 1] << endl;
|
||||
}
|
||||
}
|
||||
|
||||
cout << "\nFreed ranks available for split blocks: ";
|
||||
for (int r : freed_ranks)
|
||||
cout << r << " ";
|
||||
cout << endl;
|
||||
|
||||
// Step 6: Assign new ranks to split blocks
|
||||
int freed_idx = 0;
|
||||
for (size_t i = 0; i < blocks_to_split.size(); i++)
|
||||
{
|
||||
BlockSplitInfo &info = blocks_to_split[i];
|
||||
Block *original = info.original_block;
|
||||
|
||||
if (freed_idx < freed_ranks.size())
|
||||
{
|
||||
cout << "\nSplitting block at rank " << original->rank << endl;
|
||||
cout << " First half: keeps rank " << original->rank << endl;
|
||||
cout << " Second half: gets new rank " << freed_ranks[freed_idx] << endl;
|
||||
freed_idx++;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "WARNING: Not enough freed ranks for all split blocks!" << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cout << "\n=== Load Balancing Analysis Complete ===" << endl;
|
||||
cout << "Next steps:" << endl;
|
||||
cout << " 1. Recompose the grid with new rank assignments" << endl;
|
||||
cout << " 2. Data migration will be handled by recompose_cgh" << endl;
|
||||
cout << " 3. Ghost zone communication will be updated automatically" << endl;
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include <cmath>
|
||||
#include <new>
|
||||
using namespace std;
|
||||
|
||||
#include <memory.h>
|
||||
#include "Parallel_bam.h"
|
||||
#include "var.h"
|
||||
#include "MPatch.h"
|
||||
@@ -32,6 +32,25 @@ namespace Parallel
|
||||
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
|
||||
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
|
||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
||||
|
||||
MyList<Block> *distribute_hard(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
||||
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||||
int ib0_orig, int ib3_orig,
|
||||
int jb1_orig, int jb4_orig,
|
||||
int kb2_orig, int kb5_orig,
|
||||
Patch* PP, int r_1, int r_2,
|
||||
int ingfsi, int fngfsi, bool periodic,
|
||||
Block* &split_first_block, Block* &split_last_block);
|
||||
|
||||
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||||
int ib0_orig, int ib3_orig,
|
||||
int jb1_orig, int jb4_orig,
|
||||
int kb2_orig, int kb5_orig,
|
||||
Patch* PP, int r_1, int r_2, int r_3, int r_4,
|
||||
int ingfsi, int fngfsi, bool periodic,
|
||||
Block* &split_first_block, Block* &split_last_block);
|
||||
Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
||||
int block_id, int ingfsi, int fngfsi, int lev);
|
||||
void KillBlocks(MyList<Patch> *PatchLIST);
|
||||
|
||||
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
||||
@@ -208,6 +227,18 @@ namespace Parallel
|
||||
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||
bool periodic, int start_rank, int end_rank, int nodes = 0);
|
||||
|
||||
// Redistribute blocks with time statistics for load balancing
|
||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, MyList<Block> *OldBlockL,
|
||||
int cpusize, int ingfsi, int fngfsi,
|
||||
bool periodic, int start_rank, int end_rank, int nodes = 0);
|
||||
#endif
|
||||
}
|
||||
#endif /*PARALLEL_H */
|
||||
|
||||
// Dynamic load balancing: split blocks for heavy ranks
|
||||
void split_heavy_blocks(MyList<Patch> *PatL, int *heavy_ranks, int num_heavy,
|
||||
int split_factor, int cpusize, int ingfsi, int fngfsi);
|
||||
|
||||
// Check if load balancing is needed based on interpolation times
|
||||
bool check_load_balance_need(double *rank_times, int nprocs, int &num_heavy, int *heavy_ranks);
|
||||
}
|
||||
#endif /*PARALLEL_H */
|
||||
|
||||
@@ -43,6 +43,14 @@ cgh::cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun,
|
||||
end_rank = 0;
|
||||
#endif
|
||||
|
||||
// Initialize load balancing variables
|
||||
enable_load_balance = false;
|
||||
load_balance_check_interval = 10; // Check every 10 time steps
|
||||
current_time_step = 0;
|
||||
rank_interp_times = nullptr;
|
||||
heavy_ranks = nullptr;
|
||||
num_heavy_ranks = 0;
|
||||
|
||||
if (!checkrun)
|
||||
{
|
||||
read_bbox(Symmetry, filename);
|
||||
@@ -113,6 +121,12 @@ cgh::~cgh()
|
||||
delete[] Porgls[lev];
|
||||
}
|
||||
delete[] Porgls;
|
||||
|
||||
// Clean up load balancing memory
|
||||
if (rank_interp_times)
|
||||
delete[] rank_interp_times;
|
||||
if (heavy_ranks)
|
||||
delete[] heavy_ranks;
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
@@ -130,7 +144,7 @@ void cgh::compose_cgh(int nprocs)
|
||||
for (int lev = 0; lev < levels; lev++)
|
||||
{
|
||||
checkPatchList(PatL[lev], false);
|
||||
Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false);
|
||||
Parallel::distribute_hard(PatL[lev], nprocs, ingfs, fngfs, false);
|
||||
#if (RPB == 1)
|
||||
// we need distributed box of PatL[lev] and PatL[lev-1]
|
||||
if (lev > 0)
|
||||
@@ -1705,3 +1719,121 @@ void cgh::settrfls(const int lev)
|
||||
{
|
||||
trfls = lev;
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
// Load Balancing Functions
|
||||
//================================================================================================
|
||||
|
||||
// Initialize load balancing
|
||||
void cgh::init_load_balance(int nprocs)
|
||||
{
|
||||
if (rank_interp_times)
|
||||
delete[] rank_interp_times;
|
||||
if (heavy_ranks)
|
||||
delete[] heavy_ranks;
|
||||
|
||||
rank_interp_times = new double[nprocs];
|
||||
heavy_ranks = new int[4]; // Maximum 4 heavy ranks
|
||||
num_heavy_ranks = 0;
|
||||
|
||||
for (int i = 0; i < nprocs; i++)
|
||||
rank_interp_times[i] = 0.0;
|
||||
}
|
||||
|
||||
// Update interpolation time for a rank
|
||||
void cgh::update_interp_time(int rank, double time)
|
||||
{
|
||||
if (rank_interp_times && rank >= 0)
|
||||
{
|
||||
rank_interp_times[rank] = time;
|
||||
}
|
||||
}
|
||||
|
||||
// Check and perform load balancing if needed
|
||||
bool cgh::check_and_rebalance(int nprocs, int lev,
|
||||
MyList<var> *OldList, MyList<var> *StateList,
|
||||
MyList<var> *FutureList, MyList<var> *tmList,
|
||||
int Symmetry, bool BB)
|
||||
{
|
||||
int myrank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
|
||||
// Only check at specified intervals
|
||||
current_time_step++;
|
||||
if (current_time_step % load_balance_check_interval != 0)
|
||||
return false;
|
||||
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "\n=== Checking load balance at time step " << current_time_step << " ===" << endl;
|
||||
}
|
||||
|
||||
// Collect all rank times on rank 0
|
||||
double *all_times = nullptr;
|
||||
if (myrank == 0)
|
||||
{
|
||||
all_times = new double[nprocs];
|
||||
}
|
||||
|
||||
MPI_Gather(rank_interp_times, 1, MPI_DOUBLE, all_times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
||||
|
||||
bool need_rebalance = false;
|
||||
|
||||
if (myrank == 0)
|
||||
{
|
||||
// Check if load balancing is needed
|
||||
need_rebalance = Parallel::check_load_balance_need(all_times, nprocs, num_heavy_ranks, heavy_ranks);
|
||||
|
||||
if (need_rebalance)
|
||||
{
|
||||
cout << "=== Load imbalance detected! Need to rebalance ===" << endl;
|
||||
cout << "Top " << num_heavy_ranks << " heavy ranks: ";
|
||||
for (int i = 0; i < num_heavy_ranks; i++)
|
||||
{
|
||||
cout << heavy_ranks[i] << " (" << all_times[heavy_ranks[i]] << " s) ";
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
// Analyze blocks that need to be split
|
||||
Parallel::split_heavy_blocks(PatL[lev], heavy_ranks, num_heavy_ranks, 2, nprocs, ingfs, fngfs);
|
||||
|
||||
// Set lev_flag to trigger recompose_cgh
|
||||
cout << "=== Triggering recompose_cgh for level " << lev << " ===" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "=== Load is balanced, no rebalancing needed ===" << endl;
|
||||
}
|
||||
|
||||
delete[] all_times;
|
||||
}
|
||||
|
||||
// Broadcast the decision to all ranks
|
||||
MPI_Bcast(&need_rebalance, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
|
||||
|
||||
if (need_rebalance)
|
||||
{
|
||||
// Broadcast heavy ranks information
|
||||
MPI_Bcast(&num_heavy_ranks, 1, MPI_INT, 0, MPI_COMM_WORLD);
|
||||
MPI_Bcast(heavy_ranks, num_heavy_ranks, MPI_INT, 0, MPI_COMM_WORLD);
|
||||
|
||||
// Perform recompose_cgh on the specified level
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "=== Performing recompose_cgh ===" << endl;
|
||||
}
|
||||
|
||||
// Call recompose_cgh_Onelevel for the specified level
|
||||
bool *lev_flag = new bool[1];
|
||||
lev_flag[0] = true;
|
||||
recompose_cgh_Onelevel(nprocs, lev, OldList, StateList, FutureList, tmList, Symmetry, BB);
|
||||
delete[] lev_flag;
|
||||
|
||||
// Reset time counter after rebalancing
|
||||
current_time_step = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -87,6 +87,21 @@ public:
|
||||
#if (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||
void construct_mylev(int nprocs);
|
||||
#endif
|
||||
|
||||
// Load balancing support
|
||||
bool enable_load_balance; // Enable load balancing
|
||||
int load_balance_check_interval; // Check interval (in time steps)
|
||||
int current_time_step; // Current time step counter
|
||||
double *rank_interp_times; // Store interpolation times for each rank
|
||||
int *heavy_ranks; // Store heavy rank numbers
|
||||
int num_heavy_ranks; // Number of heavy ranks
|
||||
|
||||
void init_load_balance(int nprocs);
|
||||
void update_interp_time(int rank, double time);
|
||||
bool check_and_rebalance(int nprocs, int lev,
|
||||
MyList<var> *OldList, MyList<var> *StateList,
|
||||
MyList<var> *FutureList, MyList<var> *tmList,
|
||||
int Symmetry, bool BB);
|
||||
};
|
||||
|
||||
#endif /* CGH_H */
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#include <strstream>
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
using namespace std;
|
||||
#else
|
||||
#include <iostream.h>
|
||||
@@ -238,9 +240,6 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
|
||||
shellf = new double[n_tot * InList];
|
||||
|
||||
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax);
|
||||
|
||||
//|~~~~~> Integrate the dot product of Dphi with the surface normal.
|
||||
|
||||
double *RP_out, *IP_out;
|
||||
RP_out = new double[NN];
|
||||
IP_out = new double[NN];
|
||||
|
||||
Reference in New Issue
Block a user