#include "SysYIRAnalyser.h" #include namespace sysy { void ControlFlowAnalysis::init() { // 初始化分析器 auto &functions = pModule->getFunctions(); for (const auto &function : functions) { auto func = function.second.get(); auto basicBlocks = func->getBasicBlocks(); for (auto &basicBlock : basicBlocks) { blockAnalysisInfo[basicBlock.get()] = new BlockAnalysisInfo(); blockAnalysisInfo[basicBlock.get()]->clear(); } functionAnalysisInfo[func] = new FunctionAnalysisInfo(); functionAnalysisInfo[func]->clear(); } } void ControlFlowAnalysis::runControlFlowAnalysis() { // 运行控制流分析 clear(); // 清空之前的分析结果 init(); // 初始化分析器 computeDomNode(); computeDomTree(); computeDomFrontierAllBlk(); } void ControlFlowAnalysis::intersectOP4Dom(std::unordered_set &dom, const std::unordered_set &other) { // 计算交集 for (auto it = dom.begin(); it != dom.end();) { if (other.find(*it) == other.end()) { // 如果other中没有这个基本块,则从dom中删除 it = dom.erase(it); } else { ++it; } } } auto ControlFlowAnalysis::findCommonDominator(BasicBlock *a, BasicBlock *b) -> BasicBlock * { // 查找两个基本块的共同支配结点 while (a != b) { BlockAnalysisInfo* infoA = blockAnalysisInfo[a]; BlockAnalysisInfo* infoB = blockAnalysisInfo[b]; // 如果深度不同,则向上移动到直接支配结点 // TODO:空间换时间倍增优化,优先级较低 while (infoA->getDomDepth() > infoB->getDomDepth()) { a = const_cast(infoA->getIdom()); infoA = blockAnalysisInfo[a]; } while (infoB->getDomDepth() > infoA->getDomDepth()) { b = const_cast(infoB->getIdom()); infoB = blockAnalysisInfo[b]; } if (a == b) break; a = const_cast(infoA->getIdom()); b = const_cast(infoB->getIdom()); } return a; } void ControlFlowAnalysis::computeDomNode(){ auto &functions = pModule->getFunctions(); // 分析每个函数内的基本块 for (const auto &function : functions) { auto func = function.second.get(); auto basicBlocks = func->getBasicBlocks(); std::unordered_set domSetTmp; // 一开始把domSetTmp置为所有block auto entry_block = func->getEntryBlock(); entry_block->setName("Entry"); blockAnalysisInfo[entry_block]->addDominants(entry_block); for (auto &basicBlock : basicBlocks) { domSetTmp.emplace(basicBlock.get()); } // 初始化 for (auto &basicBlock : basicBlocks) { if (basicBlock.get() != entry_block) { blockAnalysisInfo[basicBlock.get()]->setDominants(domSetTmp); // 先把所有block的必经结点都设为N } } // 支配节点计算公式 //DOM[B]={B}∪ {⋂P∈pred(B) DOM[P]} // 其中pred(B)是B的所有前驱结点 // 迭代计算支配结点,直到不再变化 // 这里使用迭代法,直到支配结点不再变化 // TODO:Lengauer-Tarjan 算法可以更高效地计算支配结点 // 或者按照CFG拓扑序遍历效率更高 bool changed = true; while (changed) { changed = false; // 循环非start结点 for (auto &basicBlock : basicBlocks) { if (basicBlock.get() != entry_block) { auto olddom = blockAnalysisInfo[basicBlock.get()]->getDominants(); std::unordered_set dom = blockAnalysisInfo[basicBlock->getPredecessors().front()]->getDominants(); // 对于每个基本块,计算其支配结点 // 取其前驱结点的支配结点的交集和自己 for (auto pred : basicBlock->getPredecessors()) { intersectOP4Dom(dom, blockAnalysisInfo[pred]->getDominants()); } dom.emplace(basicBlock.get()); blockAnalysisInfo[basicBlock.get()]->setDominants(dom); if (dom != olddom) { changed = true; } } } } } } // TODO: SEMI-NCA算法改进 void ControlFlowAnalysis::computeDomTree() { // 构造支配树 auto &functions = pModule->getFunctions(); for (const auto &function : functions) { auto func = function.second.get(); auto basicBlocks = func->getBasicBlocks(); auto entry_block = func->getEntryBlock(); blockAnalysisInfo[entry_block]->setIdom(entry_block); blockAnalysisInfo[entry_block]->setDomDepth(0); // 入口块深度为0 bool changed = true; while (changed) { changed = false; for (auto &basicBlock : basicBlocks) { if (basicBlock.get() == entry_block) continue; BasicBlock *new_idom = nullptr; for (auto pred : basicBlock->getPredecessors()) { // 跳过未处理的前驱 if (blockAnalysisInfo[pred]->getIdom() == nullptr) continue; // new_idom = (new_idom == nullptr) ? pred : findCommonDominator(new_idom, pred); if (new_idom == nullptr) new_idom = pred; else new_idom = findCommonDominator(new_idom, pred); } // 更新直接支配节点 if (new_idom && new_idom != blockAnalysisInfo[basicBlock.get()]->getIdom()) { // 移除旧的支配关系 if (blockAnalysisInfo[basicBlock.get()]->getIdom()) { blockAnalysisInfo[const_cast(blockAnalysisInfo[basicBlock.get()]->getIdom())]->removeSdoms(basicBlock.get()); } // 设置新的支配关系 // std::cout << "Block: " << basicBlock->getName() // << " New Idom: " << new_idom->getName() << std::endl; blockAnalysisInfo[basicBlock.get()]->setIdom(new_idom); blockAnalysisInfo[new_idom]->addSdoms(basicBlock.get()); // 更新深度 = 直接支配节点深度 + 1 blockAnalysisInfo[basicBlock.get()]->setDomDepth( blockAnalysisInfo[new_idom]->getDomDepth() + 1); changed = true; } } } } // for (auto &basicBlock : basicBlocks) { // if (basicBlock.get() != func->getEntryBlock()) { // auto dominats = // blockAnalysisInfo[basicBlock.get()]->getDominants(); // bool found = false; // // 从前驱结点开始寻找直接支配结点 // std::queue q; // for (auto pred : basicBlock->getPredecessors()) { // q.push(pred); // } // // BFS遍历前驱结点,直到找到直接支配结点 // while (!found && !q.empty()) { // auto curr = q.front(); // q.pop(); // if (curr == basicBlock.get()) // continue; // if (dominats.count(curr) != 0U) { // blockAnalysisInfo[basicBlock.get()]->setIdom(curr); // blockAnalysisInfo[curr]->addSdoms(basicBlock.get()); // found = true; // } else { // for (auto pred : curr->getPredecessors()) { // q.push(pred); // } // } // } // } // } } // std::unordered_set ControlFlowAnalysis::computeDomFrontier(BasicBlock *block) { // std::unordered_set ret_list; // // 计算 localDF // for (auto local_successor : block->getSuccessors()) { // if (local_successor->getIdom() != block) { // ret_list.emplace(local_successor); // } // } // // 计算 upDF // for (auto up_successor : block->getSdoms()) { // auto childrenDF = computeDF(up_successor); // for (auto w : childrenDF) { // if (block != w->getIdom() || block == w) { // ret_list.emplace(w); // } // } // } // return ret_list; // } void ControlFlowAnalysis::computeDomFrontierAllBlk() { auto &functions = pModule->getFunctions(); for (const auto &function : functions) { auto func = function.second.get(); auto basicBlocks = func->getBasicBlocks(); // 按支配树深度排序(从深到浅) std::vector orderedBlocks; for (auto &bb : basicBlocks) { orderedBlocks.push_back(bb.get()); } std::sort(orderedBlocks.begin(), orderedBlocks.end(), [this](BasicBlock *a, BasicBlock *b) { return blockAnalysisInfo[a]->getDomDepth() > blockAnalysisInfo[b]->getDomDepth(); }); // 计算支配边界 for (auto block : orderedBlocks) { std::unordered_set df; // Local DF: 直接后继中不被当前块支配的 for (auto succ : block->getSuccessors()) { // 当前块不支配该后继(即不是其直接支配节点) if (blockAnalysisInfo[succ]->getIdom() != block) { df.insert(succ); } } // Up DF: 从支配子树中继承 for (auto child : blockAnalysisInfo[block]->getSdoms()) { for (auto w : blockAnalysisInfo[child]->getDomFrontiers()) { // 如果w不被当前块支配 if (block != blockAnalysisInfo[w]->getIdom()) { df.insert(w); } } } blockAnalysisInfo[block]->setDomFrontiers(df); } } } // ========================== // dataflow analysis utils // ========================== // 先引用学长的代码 // TODO: Worklist 增加逆后序遍历机制 void DataFlowAnalysisUtils::forwardAnalyze(Module *pModule){ std::map workAnalysis; for (auto &dataflow : forwardAnalysisList) { dataflow->init(pModule); } for (const auto &function : pModule->getFunctions()) { for (auto &dataflow : forwardAnalysisList) { workAnalysis.emplace(dataflow, false); } while (!workAnalysis.empty()) { for (const auto &block : function.second->getBasicBlocks()) { for (auto &elem : workAnalysis) { if (elem.first->analyze(pModule, block.get())) { elem.second = true; } } } std::map tmp; std::remove_copy_if(workAnalysis.begin(), workAnalysis.end(), std::inserter(tmp, tmp.end()), [](const std::pair &elem) -> bool { return !elem.second; }); workAnalysis.swap(tmp); for (auto &elem : workAnalysis) { elem.second = false; } } } } void DataFlowAnalysisUtils::backwardAnalyze(Module *pModule) { std::map workAnalysis; for (auto &dataflow : backwardAnalysisList) { dataflow->init(pModule); } for (const auto &function : pModule->getFunctions()) { for (auto &dataflow : backwardAnalysisList) { workAnalysis.emplace(dataflow, false); } while (!workAnalysis.empty()) { for (const auto &block : function.second->getBasicBlocks()) { for (auto &elem : workAnalysis) { if (elem.first->analyze(pModule, block.get())) { elem.second = true; } } } std::map tmp; std::remove_copy_if(workAnalysis.begin(), workAnalysis.end(), std::inserter(tmp, tmp.end()), [](const std::pair &elem) -> bool { return !elem.second; }); workAnalysis.swap(tmp); for (auto &elem : workAnalysis) { elem.second = false; } } } } std::set ActiveVarAnalysis::getUsedSet(Instruction *inst) { using Kind = Instruction::Kind; std::vector operands; for (const auto &operand : inst->getOperands()) { operands.emplace_back(dynamic_cast(operand->getValue())); } std::set result; switch (inst->getKind()) { // phi op case Kind::kPhi: case Kind::kCall: result.insert(std::next(operands.begin()), operands.end()); break; case Kind::kCondBr: result.insert(operands[0]); break; case Kind::kBr: case Kind::kAlloca: break; // mem op case Kind::kStore: // StoreInst 的第一个操作数是被存储的值,第二个操作数是存储的变量 // 后续的是可能的数组维度 result.insert(operands[0]); result.insert(operands.begin() + 2, operands.end()); break; case Kind::kLoad: case Kind::kLa: { auto variable = dynamic_cast(operands[0]); auto global = dynamic_cast(operands[0]); auto constArray = dynamic_cast(operands[0]); if ((variable != nullptr && variable->getNumDims() == 0) || (global != nullptr && global->getNumDims() == 0) || (constArray != nullptr && constArray->getNumDims() == 0)) { result.insert(operands[0]); } result.insert(std::next(operands.begin()), operands.end()); break; } case Kind::kGetSubArray: { for (unsigned i = 2; i < operands.size(); i++) { // 数组的维度信息 result.insert(operands[i]); } break; } case Kind::kMemset: { result.insert(std::next(operands.begin()), operands.end()); break; } case Kind::kInvalid: // Binary case Kind::kAdd: case Kind::kSub: case Kind::kMul: case Kind::kDiv: case Kind::kRem: case Kind::kICmpEQ: case Kind::kICmpNE: case Kind::kICmpLT: case Kind::kICmpLE: case Kind::kICmpGT: case Kind::kICmpGE: case Kind::kFAdd: case Kind::kFSub: case Kind::kFMul: case Kind::kFDiv: case Kind::kFCmpEQ: case Kind::kFCmpNE: case Kind::kFCmpLT: case Kind::kFCmpLE: case Kind::kFCmpGT: case Kind::kFCmpGE: case Kind::kAnd: case Kind::kOr: // Unary case Kind::kNeg: case Kind::kNot: case Kind::kFNot: case Kind::kFNeg: case Kind::kFtoI: case Kind::kItoF: // terminator case Kind::kReturn: result.insert(operands.begin(), operands.end()); break; default: assert(false); break; } result.erase(nullptr); return result; } User * ActiveVarAnalysis::getDefine(Instruction *inst) { User *result = nullptr; if (inst->isStore()) { StoreInst* store = dynamic_cast(inst); auto operand = store->getPointer(); AllocaInst* variable = dynamic_cast(operand); GlobalValue* global = dynamic_cast(operand); if ((variable != nullptr && variable->getNumDims() != 0) || (global != nullptr && global->getNumDims() != 0)) { // 如果是数组变量或者全局变量,则不返回定义 // TODO:兼容数组变量 result = nullptr; } else { result = dynamic_cast(operand); } } else if (inst->isPhi()) { result = dynamic_cast(inst->getOperand(0)); } else if (inst->isBinary() || inst->isUnary() || inst->isCall() || inst->isLoad() || inst->isLa()) { result = dynamic_cast(inst); } return result; } void ActiveVarAnalysis::init(Module *pModule) { for (const auto &function : pModule->getFunctions()) { for (const auto &block : function.second->getBasicBlocks()) { activeTable.emplace(block.get(), std::vector>{}); for (unsigned i = 0; i < block->getNumInstructions() + 1; i++) activeTable.at(block.get()).emplace_back(); } } } // 活跃变量分析公式 每个块内的分析动作供分析器调用 bool ActiveVarAnalysis::analyze(Module *pModule, BasicBlock *block) { bool changed = false; // 标记数据流结果是否有变化 std::set activeSet{}; // 当前计算的活跃变量集合 // 步骤1: 计算基本块出口的活跃变量集 (OUT[B]) // 公式: OUT[B] = ∪_{S ∈ succ(B)} IN[S] for (const auto &succ : block->getSuccessors()) { // 获取后继块入口的活跃变量集 (IN[S]) auto succActiveSet = activeTable.at(succ).front(); // 合并所有后继块的入口活跃变量 activeSet.insert(succActiveSet.begin(), succActiveSet.end()); } // 步骤2: 处理基本块出口处的活跃变量集 const auto &instructions = block->getInstructions(); const auto numInstructions = instructions.size(); // 获取旧的出口活跃变量集 (block出口对应索引numInstructions) const auto &oldEndActiveSet = activeTable.at(block)[numInstructions]; // 检查出口活跃变量集是否有变化 if (!std::equal(activeSet.begin(), activeSet.end(), oldEndActiveSet.begin(), oldEndActiveSet.end())) { changed = true; // 标记变化 activeTable.at(block)[numInstructions] = activeSet; // 更新出口活跃变量集 } // 步骤3: 逆序遍历基本块中的指令 // 从最后一条指令开始向前计算每个程序点的活跃变量 auto instructionIter = instructions.end(); instructionIter--; // 指向最后一条指令 // 从出口向入口遍历 (索引从numInstructions递减到1) for (unsigned i = numInstructions; i > 0; i--) { auto inst = instructionIter->get(); // 当前指令 auto used = getUsedSet(inst); User *defined = getDefine(inst); // 步骤3.3: 计算指令入口的活跃变量 (IN[i]) // 公式: IN[i] = use_i ∪ (OUT[i] - def_i) activeSet.erase(defined); // 移除被定义的变量 (OUT[i] - def_i) activeSet.insert(used.begin(), used.end()); // 添加使用的变量 // 获取旧的入口活跃变量集 (位置i-1对应当前指令的入口) const auto &oldActiveSet = activeTable.at(block)[i - 1]; // 检查活跃变量集是否有变化 if (!std::equal(activeSet.begin(), activeSet.end(), oldActiveSet.begin(), oldActiveSet.end())) { changed = true; // 标记变化 activeTable.at(block)[i - 1] = activeSet; // 更新入口活跃变量集 } instructionIter--; // 移动到前一条指令 } return changed; // 返回数据流结果是否变化 } } // namespace sysy