#include "GVN.h" #include "Dom.h" #include "SysYIROptUtils.h" #include #include #include extern int DEBUG; namespace sysy { // GVN 遍的静态 ID void *GVN::ID = (void *)&GVN::ID; // ====================================================================== // GVN 类的实现 // ====================================================================== bool GVN::runOnFunction(Function *func, AnalysisManager &AM) { if (func->getBasicBlocks().empty()) { return false; } if (DEBUG) { std::cout << "\n=== Running GVN on function: " << func->getName() << " ===" << std::endl; } bool changed = false; GVNContext context; context.run(func, &AM, changed); if (DEBUG) { if (changed) { std::cout << "GVN: Function " << func->getName() << " was modified" << std::endl; } else { std::cout << "GVN: Function " << func->getName() << " was not modified" << std::endl; } std::cout << "=== GVN completed for function: " << func->getName() << " ===" << std::endl; } return changed; } void GVN::getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const { // GVN依赖以下分析: // 1. 支配树分析 - 用于检查指令的支配关系,确保替换的安全性 analysisDependencies.insert(&DominatorTreeAnalysisPass::ID); // 2. 副作用分析 - 用于判断函数调用是否可以进行GVN analysisDependencies.insert(&SysYSideEffectAnalysisPass::ID); // GVN不会使任何分析失效,因为: // - GVN只删除冗余计算,不改变CFG结构 // - GVN不修改程序的语义,只是消除重复计算 // - 支配关系保持不变 // - 副作用分析结果保持不变 // analysisInvalidations 保持为空 if (DEBUG) { std::cout << "GVN: Declared analysis dependencies (DominatorTree, SideEffectAnalysis)" << std::endl; } } // ====================================================================== // GVNContext 类的实现 // ====================================================================== void GVNContext::run(Function *func, AnalysisManager *AM, bool &changed) { if (DEBUG) { std::cout << " Starting GVN analysis for function: " << func->getName() << std::endl; } // 获取分析结果 if (AM) { domTree = AM->getAnalysisResult(func); sideEffectAnalysis = AM->getAnalysisResult(); if (DEBUG) { if (domTree) { std::cout << " GVN: Using dominator tree analysis" << std::endl; } else { std::cout << " GVN: Warning - dominator tree analysis not available" << std::endl; } if (sideEffectAnalysis) { std::cout << " GVN: Using side effect analysis" << std::endl; } else { std::cout << " GVN: Warning - side effect analysis not available" << std::endl; } } } // 清空状态 hashtable.clear(); visited.clear(); rpoBlocks.clear(); needRemove.clear(); // 计算逆后序遍历 computeRPO(func); if (DEBUG) { std::cout << " Computed RPO with " << rpoBlocks.size() << " blocks" << std::endl; } // 按逆后序遍历基本块进行GVN int blockCount = 0; for (auto bb : rpoBlocks) { if (DEBUG) { std::cout << " Processing block " << ++blockCount << "/" << rpoBlocks.size() << ": " << bb->getName() << std::endl; } int instCount = 0; for (auto &instPtr : bb->getInstructions()) { if (DEBUG) { std::cout << " Processing instruction " << ++instCount << ": " << instPtr->getName() << std::endl; } visitInstruction(instPtr.get()); } } if (DEBUG) { std::cout << " Found " << needRemove.size() << " redundant instructions to remove" << std::endl; } // 删除冗余指令 int removeCount = 0; for (auto inst : needRemove) { auto bb = inst->getParent(); if (DEBUG) { std::cout << " Removing redundant instruction " << ++removeCount << "/" << needRemove.size() << ": " << inst->getName() << std::endl; } // 删除指令前先断开所有使用关系 inst->replaceAllUsesWith(nullptr); // 使用基本块的删除方法 // bb->removeInst(inst); SysYIROptUtils::usedelete(inst); changed = true; } if (DEBUG) { std::cout << " GVN analysis completed for function: " << func->getName() << std::endl; std::cout << " Total instructions analyzed: " << hashtable.size() << std::endl; std::cout << " Instructions eliminated: " << needRemove.size() << std::endl; } } void GVNContext::computeRPO(Function *func) { rpoBlocks.clear(); visited.clear(); auto entry = func->getEntryBlock(); if (entry) { dfs(entry); std::reverse(rpoBlocks.begin(), rpoBlocks.end()); } } void GVNContext::dfs(BasicBlock *bb) { if (!bb || visited.count(bb)) { return; } visited.insert(bb); // 访问所有后继基本块 for (auto succ : bb->getSuccessors()) { if (visited.find(succ) == visited.end()) { dfs(succ); } } rpoBlocks.push_back(bb); } Value *GVNContext::checkHashtable(Value *value) { // 避免无限递归:如果已经在哈希表中,直接返回映射的值 if (auto it = hashtable.find(value); it != hashtable.end()) { if (DEBUG >= 2) { std::cout << " Found " << value->getName() << " in hashtable, mapped to " << it->second->getName() << std::endl; } return it->second; } // 如果是指令,尝试获取其值编号 if (auto inst = dynamic_cast(value)) { if (auto valueNumber = getValueNumber(inst)) { // 如果找到了等价的值,建立映射关系 if (valueNumber != inst) { hashtable[value] = valueNumber; if (DEBUG >= 2) { std::cout << " Mapping " << value->getName() << " to equivalent value " << valueNumber->getName() << std::endl; } return valueNumber; } } } // 没有找到等价值,将自己映射到自己 hashtable[value] = value; if (DEBUG >= 2) { std::cout << " Mapping " << value->getName() << " to itself (unique)" << std::endl; } return value; } Value *GVNContext::getValueNumber(Instruction *inst) { if (auto binary = dynamic_cast(inst)) { return getValueNumber(binary); } else if (auto unary = dynamic_cast(inst)) { return getValueNumber(unary); } else if (auto gep = dynamic_cast(inst)) { return getValueNumber(gep); } else if (auto load = dynamic_cast(inst)) { return getValueNumber(load); } else if (auto call = dynamic_cast(inst)) { // 只为无副作用的函数调用进行GVN if (sideEffectAnalysis && sideEffectAnalysis->isPureFunction(call->getCallee())) { return getValueNumber(call); } return nullptr; } return nullptr; } Value *GVNContext::getValueNumber(BinaryInst *inst) { auto lhs = checkHashtable(inst->getLhs()); auto rhs = checkHashtable(inst->getRhs()); if (DEBUG) { std::cout << " Checking binary instruction: " << inst->getName() << " (kind: " << static_cast(inst->getKind()) << ")" << std::endl; } for (auto [key, value] : hashtable) { if (auto binary = dynamic_cast(key)) { auto binLhs = checkHashtable(binary->getLhs()); auto binRhs = checkHashtable(binary->getRhs()); if (binary->getKind() == inst->getKind()) { // 检查操作数是否匹配 bool operandsMatch = false; if (lhs == binLhs && rhs == binRhs) { operandsMatch = true; } else if (inst->isCommutative() && lhs == binRhs && rhs == binLhs) { operandsMatch = true; } if (operandsMatch) { // 检查支配关系,确保替换是安全的 if (canReplace(inst, binary)) { // 对于涉及load指令的情况,需要特别检查 bool hasLoadOperands = (dynamic_cast(lhs) != nullptr) || (dynamic_cast(rhs) != nullptr); if (hasLoadOperands) { // 检查是否有任何load操作数之间有intervening store bool hasIntervening = false; auto loadLhs = dynamic_cast(lhs); auto loadRhs = dynamic_cast(rhs); auto binLoadLhs = dynamic_cast(binLhs); auto binLoadRhs = dynamic_cast(binRhs); if (loadLhs && binLoadLhs) { if (hasInterveningStore(binLoadLhs, loadLhs, checkHashtable(loadLhs->getPointer()))) { hasIntervening = true; } } if (!hasIntervening && loadRhs && binLoadRhs) { if (hasInterveningStore(binLoadRhs, loadRhs, checkHashtable(loadRhs->getPointer()))) { hasIntervening = true; } } // 对于交换操作数的情况,也需要检查 if (!hasIntervening && inst->isCommutative()) { if (loadLhs && binLoadRhs) { if (hasInterveningStore(binLoadRhs, loadLhs, checkHashtable(loadLhs->getPointer()))) { hasIntervening = true; } } if (!hasIntervening && loadRhs && binLoadLhs) { if (hasInterveningStore(binLoadLhs, loadRhs, checkHashtable(loadRhs->getPointer()))) { hasIntervening = true; } } } if (hasIntervening) { if (DEBUG) { std::cout << " Found equivalent binary but load operands have intervening store, skipping" << std::endl; } continue; } } if (DEBUG) { std::cout << " Found equivalent binary instruction: " << binary->getName() << std::endl; } return value; } else { if (DEBUG) { std::cout << " Found equivalent binary but dominance check failed: " << binary->getName() << std::endl; } } } } } } if (DEBUG) { std::cout << " No equivalent binary instruction found" << std::endl; } return inst; } Value *GVNContext::getValueNumber(UnaryInst *inst) { auto operand = checkHashtable(inst->getOperand()); for (auto [key, value] : hashtable) { if (auto unary = dynamic_cast(key)) { auto unOperand = checkHashtable(unary->getOperand()); if (unary->getKind() == inst->getKind() && operand == unOperand) { return value; } } } return inst; } Value *GVNContext::getValueNumber(GetElementPtrInst *inst) { auto ptr = checkHashtable(inst->getBasePointer()); std::vector indices; // 使用正确的索引访问方法 for (unsigned i = 0; i < inst->getNumIndices(); ++i) { indices.push_back(checkHashtable(inst->getIndex(i))); } for (auto [key, value] : hashtable) { if (auto gep = dynamic_cast(key)) { auto gepPtr = checkHashtable(gep->getBasePointer()); if (ptr == gepPtr && gep->getNumIndices() == inst->getNumIndices()) { bool indicesMatch = true; for (unsigned i = 0; i < inst->getNumIndices(); ++i) { if (checkHashtable(gep->getIndex(i)) != indices[i]) { indicesMatch = false; break; } } if (indicesMatch && inst->getType() == gep->getType()) { return value; } } } } return inst; } Value *GVNContext::getValueNumber(LoadInst *inst) { auto ptr = checkHashtable(inst->getPointer()); if (DEBUG) { std::cout << " Checking load instruction: " << inst->getName() << " from address: " << ptr->getName() << std::endl; } for (auto [key, value] : hashtable) { if (auto load = dynamic_cast(key)) { auto loadPtr = checkHashtable(load->getPointer()); if (ptr == loadPtr && inst->getType() == load->getType()) { if (DEBUG) { std::cout << " Found potential equivalent load: " << load->getName() << std::endl; } // 检查支配关系:load 必须支配 inst if (!canReplace(inst, load)) { if (DEBUG) { std::cout << " Equivalent load does not dominate current load, skipping" << std::endl; } continue; } // 检查是否有中间的store指令影响 if (hasInterveningStore(load, inst, ptr)) { if (DEBUG) { std::cout << " Found intervening store, cannot reuse load value" << std::endl; } continue; // 如果有store指令,不能复用之前的load } if (DEBUG) { std::cout << " Can safely reuse load value from: " << load->getName() << std::endl; } return value; } } } if (DEBUG) { std::cout << " No equivalent load found" << std::endl; } return inst; } Value *GVNContext::getValueNumber(CallInst *inst) { // 此时已经确认是无副作用的函数调用,可以安全进行GVN for (auto [key, value] : hashtable) { if (auto call = dynamic_cast(key)) { if (call->getCallee() == inst->getCallee() && call->getNumOperands() == inst->getNumOperands()) { bool argsMatch = true; // 跳过第一个操作数(函数指针),从参数开始比较 for (size_t i = 1; i < inst->getNumOperands(); ++i) { if (checkHashtable(inst->getOperand(i)) != checkHashtable(call->getOperand(i))) { argsMatch = false; break; } } if (argsMatch) { return value; } } } } return inst; } void GVNContext::visitInstruction(Instruction *inst) { // 跳过分支指令 if (inst->isBranch()) { if (DEBUG) { std::cout << " Skipping branch instruction: " << inst->getName() << std::endl; } return; } // 如果是store指令,需要清理hashtable中可能被影响的load指令 if (auto storeInst = dynamic_cast(inst)) { invalidateLoadsAffectedByStore(storeInst); } if (DEBUG) { std::cout << " Visiting instruction: " << inst->getName() << " (kind: " << static_cast(inst->getKind()) << ")" << std::endl; } auto value = checkHashtable(inst); if (inst != value) { if (auto instValue = dynamic_cast(value)) { if (canReplace(inst, instValue)) { inst->replaceAllUsesWith(instValue); needRemove.insert(inst); if (DEBUG) { std::cout << " GVN: Replacing redundant instruction " << inst->getName() << " with existing instruction " << instValue->getName() << std::endl; } } else { if (DEBUG) { std::cout << " Cannot replace instruction " << inst->getName() << " with " << instValue->getName() << " (dominance check failed)" << std::endl; } } } } else { if (DEBUG) { std::cout << " Instruction " << inst->getName() << " is unique" << std::endl; } } } bool GVNContext::canReplace(Instruction *original, Value *replacement) { auto replInst = dynamic_cast(replacement); if (!replInst) { return true; // 替换为常量总是安全的 } auto originalBB = original->getParent(); auto replBB = replInst->getParent(); // 如果replacement是Call指令,需要特殊处理 if (auto callInst = dynamic_cast(replInst)) { if (sideEffectAnalysis && !sideEffectAnalysis->isPureFunction(callInst->getCallee())) { // 对于有副作用的函数,只有在同一个基本块且相邻时才能替换 if (originalBB != replBB) { return false; } // 检查指令顺序 auto &insts = originalBB->getInstructions(); auto origIt = std::find_if(insts.begin(), insts.end(), [original](const auto &ptr) { return ptr.get() == original; }); auto replIt = std::find_if(insts.begin(), insts.end(), [replInst](const auto &ptr) { return ptr.get() == replInst; }); if (origIt == insts.end() || replIt == insts.end()) { return false; } return std::abs(std::distance(origIt, replIt)) == 1; } } // 简单的支配关系检查:如果在同一个基本块,检查指令顺序 if (originalBB == replBB) { auto &insts = originalBB->getInstructions(); auto origIt = std::find_if(insts.begin(), insts.end(), [original](const auto &ptr) { return ptr.get() == original; }); auto replIt = std::find_if(insts.begin(), insts.end(), [replInst](const auto &ptr) { return ptr.get() == replInst; }); if (origIt == insts.end() || replIt == insts.end()) { if (DEBUG) { std::cout << " Cannot find instructions in basic block for dominance check" << std::endl; } return false; } // 替换指令必须在原指令之前(支配原指令) bool canRepl = std::distance(insts.begin(), replIt) < std::distance(insts.begin(), origIt); if (DEBUG) { std::cout << " Same block dominance check: " << (canRepl ? "PASS" : "FAIL") << " (repl at " << std::distance(insts.begin(), replIt) << ", orig at " << std::distance(insts.begin(), origIt) << ")" << std::endl; } return canRepl; } // 使用支配关系检查(如果支配树分析可用) if (domTree) { auto dominators = domTree->getDominators(originalBB); if (dominators && dominators->count(replBB)) { return true; } } return false; } bool GVNContext::hasInterveningStore(LoadInst* earlierLoad, LoadInst* laterLoad, Value* ptr) { // 如果两个load在不同的基本块,需要更复杂的分析 auto earlierBB = earlierLoad->getParent(); auto laterBB = laterLoad->getParent(); if (earlierBB != laterBB) { // 跨基本块的情况:为了安全起见,暂时认为有intervening store // 这是保守的做法,可能会错过一些优化机会,但确保正确性 if (DEBUG) { std::cout << " Cross-block load optimization: conservatively assuming intervening store" << std::endl; } return true; } // 同一基本块内的情况:检查指令序列 auto &insts = earlierBB->getInstructions(); // 找到两个load指令的位置 auto earlierIt = std::find_if(insts.begin(), insts.end(), [earlierLoad](const auto &ptr) { return ptr.get() == earlierLoad; }); auto laterIt = std::find_if(insts.begin(), insts.end(), [laterLoad](const auto &ptr) { return ptr.get() == laterLoad; }); if (earlierIt == insts.end() || laterIt == insts.end()) { if (DEBUG) { std::cout << " Could not find load instructions in basic block" << std::endl; } return true; // 找不到指令,保守返回true } // 确定实际的执行顺序(哪个load在前,哪个在后) auto firstIt = earlierIt; auto secondIt = laterIt; if (std::distance(insts.begin(), earlierIt) > std::distance(insts.begin(), laterIt)) { // 如果"earlier"实际上在"later"之后,交换它们 firstIt = laterIt; secondIt = earlierIt; if (DEBUG) { std::cout << " Swapped load order: " << laterLoad->getName() << " actually comes before " << earlierLoad->getName() << std::endl; } } // 检查两个load之间的所有指令 for (auto it = std::next(firstIt); it != secondIt; ++it) { auto inst = it->get(); // 检查是否是store指令 if (auto storeInst = dynamic_cast(inst)) { auto storePtr = checkHashtable(storeInst->getPointer()); // 如果store的目标地址与load的地址相同,说明内存被修改了 if (storePtr == ptr) { if (DEBUG) { std::cout << " Found intervening store to same address: " << storeInst->getName() << std::endl; } return true; } // TODO: 这里还应该检查别名分析,看store是否可能影响load的地址 // 为了简化,现在只检查精确匹配 } // 检查函数调用是否可能修改内存 if (auto callInst = dynamic_cast(inst)) { if (sideEffectAnalysis && !sideEffectAnalysis->isPureFunction(callInst->getCallee())) { // 如果是有副作用的函数调用,且load的是全局变量,则可能被修改 if (auto globalPtr = dynamic_cast(ptr)) { if (DEBUG) { std::cout << " Found function call that may modify global variable: " << callInst->getName() << std::endl; } return true; } // TODO: 这里还应该检查函数是否可能修改通过指针参数传递的内存 } } } if (DEBUG) { std::cout << " No intervening store found between loads" << std::endl; } return false; // 没有找到会修改内存的指令 } void GVNContext::invalidateLoadsAffectedByStore(StoreInst* storeInst) { auto storePtr = checkHashtable(storeInst->getPointer()); if (DEBUG) { std::cout << " Invalidating loads affected by store to address" << std::endl; } // 查找hashtable中所有可能被这个store影响的指令 std::vector toRemove; std::set invalidatedLoads; // 第一步:找到所有被直接影响的load指令 for (auto& [key, value] : hashtable) { if (auto loadInst = dynamic_cast(key)) { auto loadPtr = checkHashtable(loadInst->getPointer()); // 如果load的地址与store的地址相同,则需要从hashtable中移除 if (loadPtr == storePtr) { toRemove.push_back(key); invalidatedLoads.insert(loadInst); if (DEBUG) { std::cout << " Invalidating load from same address: " << loadInst->getName() << std::endl; } } } } // 第二步:找到所有依赖被失效load的指令(如binary指令) bool foundMore = true; while (foundMore) { foundMore = false; std::vector additionalToRemove; for (auto& [key, value] : hashtable) { // 跳过已经标记要删除的指令 if (std::find(toRemove.begin(), toRemove.end(), key) != toRemove.end()) { continue; } bool shouldInvalidate = false; // 检查binary指令的操作数 if (auto binaryInst = dynamic_cast(key)) { auto lhs = checkHashtable(binaryInst->getLhs()); auto rhs = checkHashtable(binaryInst->getRhs()); if (invalidatedLoads.count(lhs) || invalidatedLoads.count(rhs)) { shouldInvalidate = true; if (DEBUG) { std::cout << " Invalidating binary instruction due to invalidated operand: " << binaryInst->getName() << std::endl; } } } // 检查unary指令的操作数 else if (auto unaryInst = dynamic_cast(key)) { auto operand = checkHashtable(unaryInst->getOperand()); if (invalidatedLoads.count(operand)) { shouldInvalidate = true; if (DEBUG) { std::cout << " Invalidating unary instruction due to invalidated operand: " << unaryInst->getName() << std::endl; } } } // 检查GEP指令的操作数 else if (auto gepInst = dynamic_cast(key)) { auto basePtr = checkHashtable(gepInst->getBasePointer()); if (invalidatedLoads.count(basePtr)) { shouldInvalidate = true; } else { // 检查索引操作数 for (unsigned i = 0; i < gepInst->getNumIndices(); ++i) { if (invalidatedLoads.count(checkHashtable(gepInst->getIndex(i)))) { shouldInvalidate = true; break; } } } if (shouldInvalidate && DEBUG) { std::cout << " Invalidating GEP instruction due to invalidated operand: " << gepInst->getName() << std::endl; } } if (shouldInvalidate) { additionalToRemove.push_back(key); if (auto inst = dynamic_cast(key)) { invalidatedLoads.insert(inst); } foundMore = true; } } // 将新找到的失效指令加入移除列表 toRemove.insert(toRemove.end(), additionalToRemove.begin(), additionalToRemove.end()); } // 从hashtable中移除所有被影响的指令 for (auto key : toRemove) { hashtable.erase(key); } if (DEBUG && toRemove.size() > invalidatedLoads.size()) { std::cout << " Total invalidated instructions: " << toRemove.size() << " (including " << (toRemove.size() - invalidatedLoads.size()) << " dependent instructions)" << std::endl; } } std::string GVNContext::getCanonicalExpression(Instruction *inst) { std::ostringstream oss; if (auto binary = dynamic_cast(inst)) { oss << "binary_" << static_cast(binary->getKind()) << "_"; oss << checkHashtable(binary->getLhs()) << "_"; oss << checkHashtable(binary->getRhs()); } else if (auto unary = dynamic_cast(inst)) { oss << "unary_" << static_cast(unary->getKind()) << "_"; oss << checkHashtable(unary->getOperand()); } else if (auto gep = dynamic_cast(inst)) { oss << "gep_" << checkHashtable(gep->getBasePointer()); for (unsigned i = 0; i < gep->getNumIndices(); ++i) { oss << "_" << checkHashtable(gep->getIndex(i)); } } return oss.str(); } } // namespace sysy