对rank运行时间统计,两个函数分别在不同的计算中被调用,因此我对两个重载的函数分别进行了mpi实际计算时间的统计,对于第一个PatList_Interp_Points 调用 Interp_points,我取排名前三的rank时间,发现每次只有一个rank时间较长,Rank [ 52]: Calc 0.000012 s
Rank [ 20]: Calc 0.000003 s Rank [ 35]: Calc 0.000003 s Rank [ 10]: Calc 0.000010 s Rank [ 17]: Calc 0.000005 s Rank [ 32]: Calc 0.000003 s,而且rank不固定,一般就是rank 10 和 rank 52; 但尽管有很多,比前者时间还是少很多 对于第二个Surf_Wave 调用 Interp_points,我发现前四个rank时间最长,比较固定,就是下面四个rank Rank [ 27]: Calc 0.331978 s Rank [ 35]: Calc 0.242219 s Rank [ 28]: Calc 0.242132 s Rank [ 36]: Calc 0.197024 s 因此下面surf_wave是核心
This commit is contained in:
@@ -66,7 +66,8 @@ if os.path.exists(File_directory):
|
||||
## Prompt whether to overwrite the existing directory
|
||||
while True:
|
||||
try:
|
||||
inputvalue = input()
|
||||
## inputvalue = input()
|
||||
inputvalue = "continue"
|
||||
## If the user agrees to overwrite, proceed and remove the existing directory
|
||||
if ( inputvalue == "continue" ):
|
||||
print( " Continue the calculation !!! " )
|
||||
|
||||
@@ -341,6 +341,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
double *Shellf, int Symmetry)
|
||||
{
|
||||
// NOTE: we do not Synchnize variables here, make sure of that before calling this routine
|
||||
double t_calc_end, t_calc_total = 0;
|
||||
double t_calc_start = MPI_Wtime();
|
||||
int myrank, nprocs;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||
@@ -442,7 +444,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
Bp = Bp->next;
|
||||
}
|
||||
}
|
||||
|
||||
t_calc_end = MPI_Wtime();
|
||||
t_calc_total = t_calc_end - t_calc_start;
|
||||
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
||||
// Group consecutive points by owner rank and broadcast each group.
|
||||
// Since each point's data is non-zero only on the owner rank,
|
||||
@@ -498,6 +501,49 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
}
|
||||
|
||||
delete[] owner_rank;
|
||||
|
||||
|
||||
|
||||
// 4. 汇总并输出真正干活最慢的 Top 10
|
||||
struct RankStats {
|
||||
int rank;
|
||||
double calc_time; // 净计算时间
|
||||
double comm_time; // 等待时间
|
||||
};
|
||||
|
||||
// 创建当前进程的统计数据
|
||||
RankStats local_stat;
|
||||
local_stat.rank = myrank;
|
||||
local_stat.calc_time = t_calc_total;
|
||||
local_stat.comm_time = 0; // 此函数中未跟踪通信时间
|
||||
|
||||
// 为所有进程的统计数据分配内存
|
||||
RankStats *all_stats = nullptr;
|
||||
if (myrank == 0) {
|
||||
all_stats = new RankStats[nprocs];
|
||||
}
|
||||
|
||||
// 使用MPI_Gather收集所有进程的数据到rank 0
|
||||
MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE,
|
||||
all_stats, sizeof(RankStats), MPI_BYTE,
|
||||
0, MPI_COMM_WORLD);
|
||||
|
||||
if (myrank == 0) {
|
||||
// 按 calc_time(净计算时间)排序
|
||||
std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) {
|
||||
return a.calc_time > b.calc_time;
|
||||
});
|
||||
|
||||
printf("\n--- Top 10 Ranks by ACTIVE COMPUTATION (CPU Time) ---\n");
|
||||
int display_count = (nprocs < 10) ? nprocs : 10;
|
||||
for (int i = 0; i < display_count; i++) {
|
||||
printf("Rank [%4d]: Calc %.6f s\n",
|
||||
all_stats[i].rank, all_stats[i].calc_time);
|
||||
}
|
||||
|
||||
// 清理分配的内存
|
||||
delete[] all_stats;
|
||||
}
|
||||
}
|
||||
void Patch::Interp_Points(MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
@@ -507,6 +553,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
// Targeted point-to-point overload: each owner sends each point only to
|
||||
// the one rank that needs it for integration (consumer), reducing
|
||||
// communication volume by ~nprocs times compared to the Bcast version.
|
||||
double t_calc_end, t_calc_total = 0;
|
||||
double t_calc_start = MPI_Wtime();
|
||||
int myrank, nprocs;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||
@@ -607,7 +655,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
Bp = Bp->next;
|
||||
}
|
||||
}
|
||||
|
||||
t_calc_end = MPI_Wtime();
|
||||
t_calc_total = t_calc_end - t_calc_start;
|
||||
// --- Error check for unfound points ---
|
||||
for (int j = 0; j < NN; j++)
|
||||
{
|
||||
@@ -764,6 +813,48 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
delete[] recv_count;
|
||||
delete[] consumer_rank;
|
||||
delete[] owner_rank;
|
||||
|
||||
// 4. 汇总并输出真正干活最慢的 Top 10
|
||||
struct RankStats {
|
||||
int rank;
|
||||
double calc_time; // 净计算时间
|
||||
double comm_time; // 等待时间
|
||||
};
|
||||
|
||||
// 创建当前进程的统计数据
|
||||
RankStats local_stat;
|
||||
local_stat.rank = myrank;
|
||||
local_stat.calc_time = t_calc_total;
|
||||
local_stat.comm_time = 0; // 此函数中未跟踪通信时间
|
||||
|
||||
// 为所有进程的统计数据分配内存
|
||||
RankStats *all_stats = nullptr;
|
||||
if (myrank == 0) {
|
||||
all_stats = new RankStats[nprocs];
|
||||
}
|
||||
|
||||
// 使用MPI_Gather收集所有进程的数据到rank 0
|
||||
MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE,
|
||||
all_stats, sizeof(RankStats), MPI_BYTE,
|
||||
0, MPI_COMM_WORLD);
|
||||
|
||||
if (myrank == 0) {
|
||||
// 按 calc_time(净计算时间)排序
|
||||
std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) {
|
||||
return a.calc_time > b.calc_time;
|
||||
});
|
||||
/*
|
||||
printf("\n--- Top 10 Ranks by ACTIVE COMPUTATION (CPU Time) ---\n");
|
||||
int display_count = (nprocs < 10) ? nprocs : 10;
|
||||
for (int i = 0; i < display_count; i++) {
|
||||
printf("Rank [%4d]: Calc %.6f s\n",
|
||||
all_stats[i].rank, all_stats[i].calc_time);
|
||||
}*/
|
||||
|
||||
// 清理分配的内存
|
||||
delete[] all_stats;
|
||||
}
|
||||
|
||||
}
|
||||
void Patch::Interp_Points(MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
|
||||
Reference in New Issue
Block a user