diff --git a/AMSS_NCKU_Program.py b/AMSS_NCKU_Program.py index 6a7952a..4d5adfa 100755 --- a/AMSS_NCKU_Program.py +++ b/AMSS_NCKU_Program.py @@ -66,7 +66,8 @@ if os.path.exists(File_directory): ## Prompt whether to overwrite the existing directory while True: try: - inputvalue = input() + ## inputvalue = input() + inputvalue = "continue" ## If the user agrees to overwrite, proceed and remove the existing directory if ( inputvalue == "continue" ): print( " Continue the calculation !!! " ) diff --git a/AMSS_NCKU_source/MPatch.C b/AMSS_NCKU_source/MPatch.C index 91ead8a..e712a74 100644 --- a/AMSS_NCKU_source/MPatch.C +++ b/AMSS_NCKU_source/MPatch.C @@ -341,6 +341,8 @@ void Patch::Interp_Points(MyList *VarList, double *Shellf, int Symmetry) { // NOTE: we do not Synchnize variables here, make sure of that before calling this routine + double t_calc_end, t_calc_total = 0; + double t_calc_start = MPI_Wtime(); int myrank, nprocs; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); @@ -442,7 +444,8 @@ void Patch::Interp_Points(MyList *VarList, Bp = Bp->next; } } - + t_calc_end = MPI_Wtime(); + t_calc_total = t_calc_end - t_calc_start; // Replace MPI_Allreduce with per-owner MPI_Bcast: // Group consecutive points by owner rank and broadcast each group. // Since each point's data is non-zero only on the owner rank, @@ -498,6 +501,49 @@ void Patch::Interp_Points(MyList *VarList, } delete[] owner_rank; + + + + // 4. 汇总并输出真正干活最慢的 Top 10 + struct RankStats { + int rank; + double calc_time; // 净计算时间 + double comm_time; // 等待时间 + }; + + // 创建当前进程的统计数据 + RankStats local_stat; + local_stat.rank = myrank; + local_stat.calc_time = t_calc_total; + local_stat.comm_time = 0; // 此函数中未跟踪通信时间 + + // 为所有进程的统计数据分配内存 + RankStats *all_stats = nullptr; + if (myrank == 0) { + all_stats = new RankStats[nprocs]; + } + + // 使用MPI_Gather收集所有进程的数据到rank 0 + MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE, + all_stats, sizeof(RankStats), MPI_BYTE, + 0, MPI_COMM_WORLD); + + if (myrank == 0) { + // 按 calc_time(净计算时间)排序 + std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) { + return a.calc_time > b.calc_time; + }); + + printf("\n--- Top 10 Ranks by ACTIVE COMPUTATION (CPU Time) ---\n"); + int display_count = (nprocs < 10) ? nprocs : 10; + for (int i = 0; i < display_count; i++) { + printf("Rank [%4d]: Calc %.6f s\n", + all_stats[i].rank, all_stats[i].calc_time); + } + + // 清理分配的内存 + delete[] all_stats; + } } void Patch::Interp_Points(MyList *VarList, int NN, double **XX, @@ -507,6 +553,8 @@ void Patch::Interp_Points(MyList *VarList, // Targeted point-to-point overload: each owner sends each point only to // the one rank that needs it for integration (consumer), reducing // communication volume by ~nprocs times compared to the Bcast version. + double t_calc_end, t_calc_total = 0; + double t_calc_start = MPI_Wtime(); int myrank, nprocs; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); @@ -607,7 +655,8 @@ void Patch::Interp_Points(MyList *VarList, Bp = Bp->next; } } - + t_calc_end = MPI_Wtime(); + t_calc_total = t_calc_end - t_calc_start; // --- Error check for unfound points --- for (int j = 0; j < NN; j++) { @@ -764,6 +813,48 @@ void Patch::Interp_Points(MyList *VarList, delete[] recv_count; delete[] consumer_rank; delete[] owner_rank; + + // 4. 汇总并输出真正干活最慢的 Top 10 + struct RankStats { + int rank; + double calc_time; // 净计算时间 + double comm_time; // 等待时间 + }; + + // 创建当前进程的统计数据 + RankStats local_stat; + local_stat.rank = myrank; + local_stat.calc_time = t_calc_total; + local_stat.comm_time = 0; // 此函数中未跟踪通信时间 + + // 为所有进程的统计数据分配内存 + RankStats *all_stats = nullptr; + if (myrank == 0) { + all_stats = new RankStats[nprocs]; + } + + // 使用MPI_Gather收集所有进程的数据到rank 0 + MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE, + all_stats, sizeof(RankStats), MPI_BYTE, + 0, MPI_COMM_WORLD); + + if (myrank == 0) { + // 按 calc_time(净计算时间)排序 + std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) { + return a.calc_time > b.calc_time; + }); +/* + printf("\n--- Top 10 Ranks by ACTIVE COMPUTATION (CPU Time) ---\n"); + int display_count = (nprocs < 10) ? nprocs : 10; + for (int i = 0; i < display_count; i++) { + printf("Rank [%4d]: Calc %.6f s\n", + all_stats[i].rank, all_stats[i].calc_time); + }*/ + + // 清理分配的内存 + delete[] all_stats; + } + } void Patch::Interp_Points(MyList *VarList, int NN, double **XX, @@ -1668,4 +1759,4 @@ bool Patch::Find_Point(double *XX) delete[] DH; return true; -} +} \ No newline at end of file