Files
mysysy/src/midend/Pass/Analysis/CallGraphAnalysis.cpp
2025-08-08 16:24:13 +08:00

418 lines
13 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "CallGraphAnalysis.h"
#include "SysYIRPrinter.h"
#include <iostream>
#include <stack>
#include <unordered_set>
extern int DEBUG;
namespace sysy {
// 静态成员初始化
void* CallGraphAnalysisPass::ID = (void*)&CallGraphAnalysisPass::ID;
// ========== CallGraphAnalysisResult 实现 ==========
CallGraphAnalysisResult::Statistics CallGraphAnalysisResult::getStatistics() const {
Statistics stats = {};
stats.totalFunctions = nodes.size();
size_t totalCallEdges = 0;
size_t recursiveFunctions = 0;
size_t selfRecursiveFunctions = 0;
size_t totalCallers = 0;
size_t totalCallees = 0;
for (const auto& pair : nodes) {
const auto& node = pair.second;
totalCallEdges += node->callees.size();
totalCallers += node->callers.size();
totalCallees += node->callees.size();
if (node->isRecursive) recursiveFunctions++;
if (node->isSelfRecursive) selfRecursiveFunctions++;
}
stats.totalCallEdges = totalCallEdges;
stats.recursiveFunctions = recursiveFunctions;
stats.selfRecursiveFunctions = selfRecursiveFunctions;
stats.stronglyConnectedComponents = sccs.size();
// 计算最大SCC大小
size_t maxSCCSize = 0;
for (const auto& scc : sccs) {
maxSCCSize = std::max(maxSCCSize, scc.size());
}
stats.maxSCCSize = maxSCCSize;
// 计算平均值
if (stats.totalFunctions > 0) {
stats.avgCallersPerFunction = static_cast<double>(totalCallers) / stats.totalFunctions;
stats.avgCalleesPerFunction = static_cast<double>(totalCallees) / stats.totalFunctions;
}
return stats;
}
void CallGraphAnalysisResult::print() const {
std::cout << "---- Call Graph Analysis Results for Module ----\n";
// 打印基本统计信息
auto stats = getStatistics();
std::cout << " Statistics:\n";
std::cout << " Total Functions: " << stats.totalFunctions << "\n";
std::cout << " Total Call Edges: " << stats.totalCallEdges << "\n";
std::cout << " Recursive Functions: " << stats.recursiveFunctions << "\n";
std::cout << " Self-Recursive Functions: " << stats.selfRecursiveFunctions << "\n";
std::cout << " Strongly Connected Components: " << stats.stronglyConnectedComponents << "\n";
std::cout << " Max SCC Size: " << stats.maxSCCSize << "\n";
std::cout << " Avg Callers per Function: " << stats.avgCallersPerFunction << "\n";
std::cout << " Avg Callees per Function: " << stats.avgCalleesPerFunction << "\n";
// 打印拓扑排序结果
std::cout << " Topological Order (" << topologicalOrder.size() << "):\n";
for (size_t i = 0; i < topologicalOrder.size(); ++i) {
std::cout << " " << i << ": " << topologicalOrder[i]->getName() << "\n";
}
// 打印强连通分量
if (!sccs.empty()) {
std::cout << " Strongly Connected Components:\n";
for (size_t i = 0; i < sccs.size(); ++i) {
std::cout << " SCC " << i << " (size " << sccs[i].size() << "): ";
for (size_t j = 0; j < sccs[i].size(); ++j) {
if (j > 0) std::cout << ", ";
std::cout << sccs[i][j]->getName();
}
std::cout << "\n";
}
}
// 打印每个函数的详细信息
std::cout << " Function Details:\n";
for (const auto& pair : nodes) {
const auto& node = pair.second;
std::cout << " Function: " << node->function->getName();
if (node->isRecursive) {
std::cout << " (Recursive";
if (node->isSelfRecursive) std::cout << ", Self";
if (node->recursiveDepth >= 0) std::cout << ", Depth=" << node->recursiveDepth;
std::cout << ")";
}
std::cout << "\n";
if (!node->callers.empty()) {
std::cout << " Callers (" << node->callers.size() << "): ";
bool first = true;
for (Function* caller : node->callers) {
if (!first) std::cout << ", ";
std::cout << caller->getName();
first = false;
}
std::cout << "\n";
}
if (!node->callees.empty()) {
std::cout << " Callees (" << node->callees.size() << "): ";
bool first = true;
for (Function* callee : node->callees) {
if (!first) std::cout << ", ";
std::cout << callee->getName();
first = false;
}
std::cout << "\n";
}
}
std::cout << "--------------------------------------------------\n";
}
void CallGraphAnalysisResult::addNode(Function* F) {
if (nodes.find(F) == nodes.end()) {
nodes[F] = std::make_unique<CallGraphNode>(F);
}
}
void CallGraphAnalysisResult::addCallEdge(Function* caller, Function* callee) {
// 确保两个函数都有对应的节点
addNode(caller);
addNode(callee);
// 添加调用边
nodes[caller]->callees.insert(callee);
nodes[callee]->callers.insert(caller);
// 更新统计信息
nodes[caller]->totalCallees = nodes[caller]->callees.size();
nodes[callee]->totalCallers = nodes[callee]->callers.size();
// 检查自递归
if (caller == callee) {
nodes[caller]->isSelfRecursive = true;
nodes[caller]->isRecursive = true;
}
}
void CallGraphAnalysisResult::computeTopologicalOrder() {
topologicalOrder.clear();
std::unordered_set<Function*> visited;
// 对每个未访问的函数进行DFS
for (const auto& pair : nodes) {
Function* F = pair.first;
if (visited.find(F) == visited.end()) {
dfsTopological(F, visited, topologicalOrder);
}
}
// 反转结果(因为我们在后序遍历中添加)
std::reverse(topologicalOrder.begin(), topologicalOrder.end());
}
void CallGraphAnalysisResult::dfsTopological(Function* F, std::unordered_set<Function*>& visited,
std::vector<Function*>& result) {
visited.insert(F);
auto node = getNode(F);
if (node) {
// 先访问所有被调用的函数
for (Function* callee : node->callees) {
if (visited.find(callee) == visited.end()) {
dfsTopological(callee, visited, result);
}
}
}
// 后序遍历:访问完所有子节点后添加当前节点
result.push_back(F);
}
void CallGraphAnalysisResult::computeStronglyConnectedComponents() {
tarjanSCC();
// 为每个函数设置其所属的SCC
functionToSCC.clear();
for (size_t i = 0; i < sccs.size(); ++i) {
for (Function* F : sccs[i]) {
functionToSCC[F] = static_cast<int>(i);
}
}
}
void CallGraphAnalysisResult::tarjanSCC() {
sccs.clear();
std::vector<int> indices(nodes.size(), -1);
std::vector<int> lowlinks(nodes.size(), -1);
std::vector<Function*> stack;
std::unordered_set<Function*> onStack;
int index = 0;
// 为函数分配索引
std::map<Function*, int> functionIndex;
int idx = 0;
for (const auto& pair : nodes) {
functionIndex[pair.first] = idx++;
}
// 对每个未访问的函数运行Tarjan算法
for (const auto& pair : nodes) {
Function* F = pair.first;
int fIdx = functionIndex[F];
if (indices[fIdx] == -1) {
tarjanDFS(F, index, indices, lowlinks, stack, onStack);
}
}
}
void CallGraphAnalysisResult::tarjanDFS(Function* F, int& index, std::vector<int>& indices,
std::vector<int>& lowlinks, std::vector<Function*>& stack,
std::unordered_set<Function*>& onStack) {
// 这里需要函数到索引的映射,简化实现
// 在实际实现中应该维护一个全局的函数索引映射
static std::map<Function*, int> functionIndex;
static int nextIndex = 0;
if (functionIndex.find(F) == functionIndex.end()) {
functionIndex[F] = nextIndex++;
}
int fIdx = functionIndex[F];
// 确保向量足够大
if (fIdx >= static_cast<int>(indices.size())) {
indices.resize(fIdx + 1, -1);
lowlinks.resize(fIdx + 1, -1);
}
indices[fIdx] = index;
lowlinks[fIdx] = index;
index++;
stack.push_back(F);
onStack.insert(F);
auto node = getNode(F);
if (node) {
for (Function* callee : node->callees) {
int calleeIdx = functionIndex[callee];
// 确保向量足够大
if (calleeIdx >= static_cast<int>(indices.size())) {
indices.resize(calleeIdx + 1, -1);
lowlinks.resize(calleeIdx + 1, -1);
}
if (indices[calleeIdx] == -1) {
// 递归访问
tarjanDFS(callee, index, indices, lowlinks, stack, onStack);
lowlinks[fIdx] = std::min(lowlinks[fIdx], lowlinks[calleeIdx]);
} else if (onStack.find(callee) != onStack.end()) {
// 后向边
lowlinks[fIdx] = std::min(lowlinks[fIdx], indices[calleeIdx]);
}
}
}
// 如果F是SCC的根
if (lowlinks[fIdx] == indices[fIdx]) {
std::vector<Function*> scc;
Function* w;
do {
w = stack.back();
stack.pop_back();
onStack.erase(w);
scc.push_back(w);
} while (w != F);
sccs.push_back(std::move(scc));
}
}
void CallGraphAnalysisResult::analyzeRecursion() {
// 基于SCC分析递归
for (const auto& scc : sccs) {
if (scc.size() > 1) {
// 多函数的SCC标记为相互递归
for (Function* F : scc) {
auto* node = getMutableNode(F);
if (node) {
node->isRecursive = true;
node->recursiveDepth = -1; // 相互递归,深度未定义
}
}
} else if (scc.size() == 1) {
// 单函数SCC检查是否自递归
Function* F = scc[0];
auto* node = getMutableNode(F);
if (node && node->callees.count(F) > 0) {
node->isSelfRecursive = true;
node->isRecursive = true;
node->recursiveDepth = -1; // 简化:不计算递归深度
}
}
}
}
// ========== CallGraphAnalysisPass 实现 ==========
bool CallGraphAnalysisPass::runOnModule(Module* M, AnalysisManager& AM) {
if (DEBUG) {
std::cout << "Running Call Graph Analysis on module\n";
}
// 创建分析结果
CurrentResult = std::make_unique<CallGraphAnalysisResult>(M);
// 执行主要分析步骤
buildCallGraph(M);
CurrentResult->computeTopologicalOrder();
CurrentResult->computeStronglyConnectedComponents();
CurrentResult->analyzeRecursion();
if (DEBUG) {
CurrentResult->print();
}
return false; // 分析遍不修改IR
}
void CallGraphAnalysisPass::buildCallGraph(Module* M) {
// 1. 为所有函数创建节点(包括声明但未定义的函数)
for (auto& pair : M->getFunctions()) {
Function* F = pair.second.get();
if (!isLibraryFunction(F) && !isIntrinsicFunction(F)) {
CurrentResult->addNode(F);
}
}
// 2. 扫描所有函数的调用关系
for (auto& pair : M->getFunctions()) {
Function* F = pair.second.get();
if (!isLibraryFunction(F) && !isIntrinsicFunction(F)) {
scanFunctionCalls(F);
}
}
}
void CallGraphAnalysisPass::scanFunctionCalls(Function* F) {
// 遍历函数中的所有基本块和指令
for (auto& BB : F->getBasicBlocks_NoRange()) {
for (auto& I : BB->getInstructions()) {
if (CallInst* call = dynamic_cast<CallInst*>(I.get())) {
processCallInstruction(call, F);
}
}
}
}
void CallGraphAnalysisPass::processCallInstruction(CallInst* call, Function* caller) {
Function* callee = call->getCallee();
if (!callee) {
// 间接调用,无法静态确定目标函数
return;
}
if (isLibraryFunction(callee) || isIntrinsicFunction(callee)) {
// 跳过标准库函数和内置函数
return;
}
// 添加调用边
CurrentResult->addCallEdge(caller, callee);
// 更新调用点统计
auto* node = CurrentResult->getMutableNode(caller);
if (node) {
node->callSiteCount++;
}
}
bool CallGraphAnalysisPass::isLibraryFunction(Function* F) const {
std::string name = F->getName();
// SysY标准库函数
return name == "getint" || name == "getch" || name == "getfloat" ||
name == "getarray" || name == "getfarray" ||
name == "putint" || name == "putch" || name == "putfloat" ||
name == "putarray" || name == "putfarray" ||
name == "_sysy_starttime" || name == "_sysy_stoptime";
}
bool CallGraphAnalysisPass::isIntrinsicFunction(Function* F) const {
std::string name = F->getName();
// 编译器内置函数(后续可以增加某些内置函数)
return name.substr(0, 5) == "llvm." || name.substr(0, 5) == "sysy.";
}
void CallGraphAnalysisPass::printStatistics() const {
if (CurrentResult) {
CurrentResult->print();
}
}
} // namespace sysy