对rank运行时间统计,两个函数分别在不同的计算中被调用,因此我对两个重载的函数分别进行了mpi实际计算时间的统计,对于第一个PatList_Interp_Points 调用 Interp_points,我取排名前三的rank时间,发现每次只有一个rank时间较长,Rank [ 52]: Calc 0.000012 s
Rank [ 20]: Calc 0.000003 s Rank [ 35]: Calc 0.000003 s Rank [ 10]: Calc 0.000010 s Rank [ 17]: Calc 0.000005 s Rank [ 32]: Calc 0.000003 s,而且rank不固定,一般就是rank 10 和 rank 52; 但尽管有很多,比前者时间还是少很多 对于第二个Surf_Wave 调用 Interp_points,我发现前四个rank时间最长,比较固定,就是下面四个rank Rank [ 27]: Calc 0.331978 s Rank [ 35]: Calc 0.242219 s Rank [ 28]: Calc 0.242132 s Rank [ 36]: Calc 0.197024 s 因此下面surf_wave是核心
This commit is contained in:
@@ -66,7 +66,8 @@ if os.path.exists(File_directory):
|
|||||||
## Prompt whether to overwrite the existing directory
|
## Prompt whether to overwrite the existing directory
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
inputvalue = input()
|
## inputvalue = input()
|
||||||
|
inputvalue = "continue"
|
||||||
## If the user agrees to overwrite, proceed and remove the existing directory
|
## If the user agrees to overwrite, proceed and remove the existing directory
|
||||||
if ( inputvalue == "continue" ):
|
if ( inputvalue == "continue" ):
|
||||||
print( " Continue the calculation !!! " )
|
print( " Continue the calculation !!! " )
|
||||||
|
|||||||
@@ -341,6 +341,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
double *Shellf, int Symmetry)
|
double *Shellf, int Symmetry)
|
||||||
{
|
{
|
||||||
// NOTE: we do not Synchnize variables here, make sure of that before calling this routine
|
// NOTE: we do not Synchnize variables here, make sure of that before calling this routine
|
||||||
|
double t_calc_end, t_calc_total = 0;
|
||||||
|
double t_calc_start = MPI_Wtime();
|
||||||
int myrank, nprocs;
|
int myrank, nprocs;
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||||
@@ -442,7 +444,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
Bp = Bp->next;
|
Bp = Bp->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
t_calc_end = MPI_Wtime();
|
||||||
|
t_calc_total = t_calc_end - t_calc_start;
|
||||||
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
||||||
// Group consecutive points by owner rank and broadcast each group.
|
// Group consecutive points by owner rank and broadcast each group.
|
||||||
// Since each point's data is non-zero only on the owner rank,
|
// Since each point's data is non-zero only on the owner rank,
|
||||||
@@ -498,6 +501,49 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
}
|
}
|
||||||
|
|
||||||
delete[] owner_rank;
|
delete[] owner_rank;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// 4. 汇总并输出真正干活最慢的 Top 10
|
||||||
|
struct RankStats {
|
||||||
|
int rank;
|
||||||
|
double calc_time; // 净计算时间
|
||||||
|
double comm_time; // 等待时间
|
||||||
|
};
|
||||||
|
|
||||||
|
// 创建当前进程的统计数据
|
||||||
|
RankStats local_stat;
|
||||||
|
local_stat.rank = myrank;
|
||||||
|
local_stat.calc_time = t_calc_total;
|
||||||
|
local_stat.comm_time = 0; // 此函数中未跟踪通信时间
|
||||||
|
|
||||||
|
// 为所有进程的统计数据分配内存
|
||||||
|
RankStats *all_stats = nullptr;
|
||||||
|
if (myrank == 0) {
|
||||||
|
all_stats = new RankStats[nprocs];
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用MPI_Gather收集所有进程的数据到rank 0
|
||||||
|
MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE,
|
||||||
|
all_stats, sizeof(RankStats), MPI_BYTE,
|
||||||
|
0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
if (myrank == 0) {
|
||||||
|
// 按 calc_time(净计算时间)排序
|
||||||
|
std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) {
|
||||||
|
return a.calc_time > b.calc_time;
|
||||||
|
});
|
||||||
|
|
||||||
|
printf("\n--- Top 10 Ranks by ACTIVE COMPUTATION (CPU Time) ---\n");
|
||||||
|
int display_count = (nprocs < 10) ? nprocs : 10;
|
||||||
|
for (int i = 0; i < display_count; i++) {
|
||||||
|
printf("Rank [%4d]: Calc %.6f s\n",
|
||||||
|
all_stats[i].rank, all_stats[i].calc_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 清理分配的内存
|
||||||
|
delete[] all_stats;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void Patch::Interp_Points(MyList<var> *VarList,
|
void Patch::Interp_Points(MyList<var> *VarList,
|
||||||
int NN, double **XX,
|
int NN, double **XX,
|
||||||
@@ -507,6 +553,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
// Targeted point-to-point overload: each owner sends each point only to
|
// Targeted point-to-point overload: each owner sends each point only to
|
||||||
// the one rank that needs it for integration (consumer), reducing
|
// the one rank that needs it for integration (consumer), reducing
|
||||||
// communication volume by ~nprocs times compared to the Bcast version.
|
// communication volume by ~nprocs times compared to the Bcast version.
|
||||||
|
double t_calc_end, t_calc_total = 0;
|
||||||
|
double t_calc_start = MPI_Wtime();
|
||||||
int myrank, nprocs;
|
int myrank, nprocs;
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||||
@@ -607,7 +655,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
Bp = Bp->next;
|
Bp = Bp->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
t_calc_end = MPI_Wtime();
|
||||||
|
t_calc_total = t_calc_end - t_calc_start;
|
||||||
// --- Error check for unfound points ---
|
// --- Error check for unfound points ---
|
||||||
for (int j = 0; j < NN; j++)
|
for (int j = 0; j < NN; j++)
|
||||||
{
|
{
|
||||||
@@ -764,6 +813,48 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
delete[] recv_count;
|
delete[] recv_count;
|
||||||
delete[] consumer_rank;
|
delete[] consumer_rank;
|
||||||
delete[] owner_rank;
|
delete[] owner_rank;
|
||||||
|
|
||||||
|
// 4. 汇总并输出真正干活最慢的 Top 10
|
||||||
|
struct RankStats {
|
||||||
|
int rank;
|
||||||
|
double calc_time; // 净计算时间
|
||||||
|
double comm_time; // 等待时间
|
||||||
|
};
|
||||||
|
|
||||||
|
// 创建当前进程的统计数据
|
||||||
|
RankStats local_stat;
|
||||||
|
local_stat.rank = myrank;
|
||||||
|
local_stat.calc_time = t_calc_total;
|
||||||
|
local_stat.comm_time = 0; // 此函数中未跟踪通信时间
|
||||||
|
|
||||||
|
// 为所有进程的统计数据分配内存
|
||||||
|
RankStats *all_stats = nullptr;
|
||||||
|
if (myrank == 0) {
|
||||||
|
all_stats = new RankStats[nprocs];
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用MPI_Gather收集所有进程的数据到rank 0
|
||||||
|
MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE,
|
||||||
|
all_stats, sizeof(RankStats), MPI_BYTE,
|
||||||
|
0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
if (myrank == 0) {
|
||||||
|
// 按 calc_time(净计算时间)排序
|
||||||
|
std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) {
|
||||||
|
return a.calc_time > b.calc_time;
|
||||||
|
});
|
||||||
|
/*
|
||||||
|
printf("\n--- Top 10 Ranks by ACTIVE COMPUTATION (CPU Time) ---\n");
|
||||||
|
int display_count = (nprocs < 10) ? nprocs : 10;
|
||||||
|
for (int i = 0; i < display_count; i++) {
|
||||||
|
printf("Rank [%4d]: Calc %.6f s\n",
|
||||||
|
all_stats[i].rank, all_stats[i].calc_time);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
// 清理分配的内存
|
||||||
|
delete[] all_stats;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
void Patch::Interp_Points(MyList<var> *VarList,
|
void Patch::Interp_Points(MyList<var> *VarList,
|
||||||
int NN, double **XX,
|
int NN, double **XX,
|
||||||
|
|||||||
Reference in New Issue
Block a user