Files
mysysy/src/midend/Pass/Optimize/GVN.cpp

774 lines
26 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "GVN.h"
#include "Dom.h"
#include "SysYIROptUtils.h"
#include <algorithm>
#include <cassert>
#include <iostream>
extern int DEBUG;
namespace sysy {
// GVN 遍的静态 ID
void *GVN::ID = (void *)&GVN::ID;
// ======================================================================
// GVN 类的实现
// ======================================================================
bool GVN::runOnFunction(Function *func, AnalysisManager &AM) {
if (func->getBasicBlocks().empty()) {
return false;
}
if (DEBUG) {
std::cout << "\n=== Running GVN on function: " << func->getName() << " ===" << std::endl;
}
bool changed = false;
GVNContext context;
context.run(func, &AM, changed);
if (DEBUG) {
if (changed) {
std::cout << "GVN: Function " << func->getName() << " was modified" << std::endl;
} else {
std::cout << "GVN: Function " << func->getName() << " was not modified" << std::endl;
}
std::cout << "=== GVN completed for function: " << func->getName() << " ===" << std::endl;
}
return changed;
}
void GVN::getAnalysisUsage(std::set<void *> &analysisDependencies, std::set<void *> &analysisInvalidations) const {
// GVN依赖以下分析
// 1. 支配树分析 - 用于检查指令的支配关系,确保替换的安全性
analysisDependencies.insert(&DominatorTreeAnalysisPass::ID);
// 2. 副作用分析 - 用于判断函数调用是否可以进行GVN
analysisDependencies.insert(&SysYSideEffectAnalysisPass::ID);
// GVN不会使任何分析失效因为
// - GVN只删除冗余计算不改变CFG结构
// - GVN不修改程序的语义只是消除重复计算
// - 支配关系保持不变
// - 副作用分析结果保持不变
// analysisInvalidations 保持为空
if (DEBUG) {
std::cout << "GVN: Declared analysis dependencies (DominatorTree, SideEffectAnalysis)" << std::endl;
}
}
// ======================================================================
// GVNContext 类的实现
// ======================================================================
void GVNContext::run(Function *func, AnalysisManager *AM, bool &changed) {
if (DEBUG) {
std::cout << " Starting GVN analysis for function: " << func->getName() << std::endl;
}
// 获取分析结果
if (AM) {
domTree = AM->getAnalysisResult<DominatorTree, DominatorTreeAnalysisPass>(func);
sideEffectAnalysis = AM->getAnalysisResult<SideEffectAnalysisResult, SysYSideEffectAnalysisPass>();
if (DEBUG) {
if (domTree) {
std::cout << " GVN: Using dominator tree analysis" << std::endl;
} else {
std::cout << " GVN: Warning - dominator tree analysis not available" << std::endl;
}
if (sideEffectAnalysis) {
std::cout << " GVN: Using side effect analysis" << std::endl;
} else {
std::cout << " GVN: Warning - side effect analysis not available" << std::endl;
}
}
}
// 清空状态
hashtable.clear();
visited.clear();
rpoBlocks.clear();
needRemove.clear();
// 计算逆后序遍历
computeRPO(func);
if (DEBUG) {
std::cout << " Computed RPO with " << rpoBlocks.size() << " blocks" << std::endl;
}
// 按逆后序遍历基本块进行GVN
int blockCount = 0;
for (auto bb : rpoBlocks) {
if (DEBUG) {
std::cout << " Processing block " << ++blockCount << "/" << rpoBlocks.size()
<< ": " << bb->getName() << std::endl;
}
int instCount = 0;
for (auto &instPtr : bb->getInstructions()) {
if (DEBUG) {
std::cout << " Processing instruction " << ++instCount
<< ": " << instPtr->getName() << std::endl;
}
visitInstruction(instPtr.get());
}
}
if (DEBUG) {
std::cout << " Found " << needRemove.size() << " redundant instructions to remove" << std::endl;
}
// 删除冗余指令
int removeCount = 0;
for (auto inst : needRemove) {
auto bb = inst->getParent();
if (DEBUG) {
std::cout << " Removing redundant instruction " << ++removeCount
<< "/" << needRemove.size() << ": " << inst->getName() << std::endl;
}
// 删除指令前先断开所有使用关系
inst->replaceAllUsesWith(nullptr);
// 使用基本块的删除方法
// bb->removeInst(inst);
SysYIROptUtils::usedelete(inst);
changed = true;
}
if (DEBUG) {
std::cout << " GVN analysis completed for function: " << func->getName() << std::endl;
std::cout << " Total instructions analyzed: " << hashtable.size() << std::endl;
std::cout << " Instructions eliminated: " << needRemove.size() << std::endl;
}
}
void GVNContext::computeRPO(Function *func) {
rpoBlocks.clear();
visited.clear();
auto entry = func->getEntryBlock();
if (entry) {
dfs(entry);
std::reverse(rpoBlocks.begin(), rpoBlocks.end());
}
}
void GVNContext::dfs(BasicBlock *bb) {
if (!bb || visited.count(bb)) {
return;
}
visited.insert(bb);
// 访问所有后继基本块
for (auto succ : bb->getSuccessors()) {
if (visited.find(succ) == visited.end()) {
dfs(succ);
}
}
rpoBlocks.push_back(bb);
}
Value *GVNContext::checkHashtable(Value *value) {
// 避免无限递归:如果已经在哈希表中,直接返回映射的值
if (auto it = hashtable.find(value); it != hashtable.end()) {
if (DEBUG >= 2) {
std::cout << " Found " << value->getName() << " in hashtable, mapped to "
<< it->second->getName() << std::endl;
}
return it->second;
}
// 如果是指令,尝试获取其值编号
if (auto inst = dynamic_cast<Instruction *>(value)) {
if (auto valueNumber = getValueNumber(inst)) {
// 如果找到了等价的值,建立映射关系
if (valueNumber != inst) {
hashtable[value] = valueNumber;
if (DEBUG >= 2) {
std::cout << " Mapping " << value->getName() << " to equivalent value "
<< valueNumber->getName() << std::endl;
}
return valueNumber;
}
}
}
// 没有找到等价值,将自己映射到自己
hashtable[value] = value;
if (DEBUG >= 2) {
std::cout << " Mapping " << value->getName() << " to itself (unique)" << std::endl;
}
return value;
}
Value *GVNContext::getValueNumber(Instruction *inst) {
if (auto binary = dynamic_cast<BinaryInst *>(inst)) {
return getValueNumber(binary);
} else if (auto unary = dynamic_cast<UnaryInst *>(inst)) {
return getValueNumber(unary);
} else if (auto gep = dynamic_cast<GetElementPtrInst *>(inst)) {
return getValueNumber(gep);
} else if (auto load = dynamic_cast<LoadInst *>(inst)) {
return getValueNumber(load);
} else if (auto call = dynamic_cast<CallInst *>(inst)) {
// 只为无副作用的函数调用进行GVN
if (sideEffectAnalysis && sideEffectAnalysis->isPureFunction(call->getCallee())) {
return getValueNumber(call);
}
return nullptr;
}
return nullptr;
}
Value *GVNContext::getValueNumber(BinaryInst *inst) {
auto lhs = checkHashtable(inst->getLhs());
auto rhs = checkHashtable(inst->getRhs());
if (DEBUG) {
std::cout << " Checking binary instruction: " << inst->getName()
<< " (kind: " << static_cast<int>(inst->getKind()) << ")" << std::endl;
}
for (auto [key, value] : hashtable) {
if (auto binary = dynamic_cast<BinaryInst *>(key)) {
auto binLhs = checkHashtable(binary->getLhs());
auto binRhs = checkHashtable(binary->getRhs());
if (binary->getKind() == inst->getKind()) {
// 检查操作数是否匹配
bool operandsMatch = false;
if (lhs == binLhs && rhs == binRhs) {
operandsMatch = true;
} else if (inst->isCommutative() && lhs == binRhs && rhs == binLhs) {
operandsMatch = true;
}
if (operandsMatch) {
// 检查支配关系,确保替换是安全的
if (canReplace(inst, binary)) {
// 对于涉及load指令的情况需要特别检查
bool hasLoadOperands = (dynamic_cast<LoadInst*>(lhs) != nullptr) ||
(dynamic_cast<LoadInst*>(rhs) != nullptr);
if (hasLoadOperands) {
// 检查是否有任何load操作数之间有intervening store
bool hasIntervening = false;
auto loadLhs = dynamic_cast<LoadInst*>(lhs);
auto loadRhs = dynamic_cast<LoadInst*>(rhs);
auto binLoadLhs = dynamic_cast<LoadInst*>(binLhs);
auto binLoadRhs = dynamic_cast<LoadInst*>(binRhs);
if (loadLhs && binLoadLhs) {
if (hasInterveningStore(binLoadLhs, loadLhs, checkHashtable(loadLhs->getPointer()))) {
hasIntervening = true;
}
}
if (!hasIntervening && loadRhs && binLoadRhs) {
if (hasInterveningStore(binLoadRhs, loadRhs, checkHashtable(loadRhs->getPointer()))) {
hasIntervening = true;
}
}
// 对于交换操作数的情况,也需要检查
if (!hasIntervening && inst->isCommutative()) {
if (loadLhs && binLoadRhs) {
if (hasInterveningStore(binLoadRhs, loadLhs, checkHashtable(loadLhs->getPointer()))) {
hasIntervening = true;
}
}
if (!hasIntervening && loadRhs && binLoadLhs) {
if (hasInterveningStore(binLoadLhs, loadRhs, checkHashtable(loadRhs->getPointer()))) {
hasIntervening = true;
}
}
}
if (hasIntervening) {
if (DEBUG) {
std::cout << " Found equivalent binary but load operands have intervening store, skipping" << std::endl;
}
continue;
}
}
if (DEBUG) {
std::cout << " Found equivalent binary instruction: " << binary->getName() << std::endl;
}
return value;
} else {
if (DEBUG) {
std::cout << " Found equivalent binary but dominance check failed: " << binary->getName() << std::endl;
}
}
}
}
}
}
if (DEBUG) {
std::cout << " No equivalent binary instruction found" << std::endl;
}
return inst;
}
Value *GVNContext::getValueNumber(UnaryInst *inst) {
auto operand = checkHashtable(inst->getOperand());
for (auto [key, value] : hashtable) {
if (auto unary = dynamic_cast<UnaryInst *>(key)) {
auto unOperand = checkHashtable(unary->getOperand());
if (unary->getKind() == inst->getKind() && operand == unOperand) {
return value;
}
}
}
return inst;
}
Value *GVNContext::getValueNumber(GetElementPtrInst *inst) {
auto ptr = checkHashtable(inst->getBasePointer());
std::vector<Value *> indices;
// 使用正确的索引访问方法
for (unsigned i = 0; i < inst->getNumIndices(); ++i) {
indices.push_back(checkHashtable(inst->getIndex(i)));
}
for (auto [key, value] : hashtable) {
if (auto gep = dynamic_cast<GetElementPtrInst *>(key)) {
auto gepPtr = checkHashtable(gep->getBasePointer());
if (ptr == gepPtr && gep->getNumIndices() == inst->getNumIndices()) {
bool indicesMatch = true;
for (unsigned i = 0; i < inst->getNumIndices(); ++i) {
if (checkHashtable(gep->getIndex(i)) != indices[i]) {
indicesMatch = false;
break;
}
}
if (indicesMatch && inst->getType() == gep->getType()) {
return value;
}
}
}
}
return inst;
}
Value *GVNContext::getValueNumber(LoadInst *inst) {
auto ptr = checkHashtable(inst->getPointer());
if (DEBUG) {
std::cout << " Checking load instruction: " << inst->getName()
<< " from address: " << ptr->getName() << std::endl;
}
for (auto [key, value] : hashtable) {
if (auto load = dynamic_cast<LoadInst *>(key)) {
auto loadPtr = checkHashtable(load->getPointer());
if (ptr == loadPtr && inst->getType() == load->getType()) {
if (DEBUG) {
std::cout << " Found potential equivalent load: " << load->getName() << std::endl;
}
// 检查支配关系load 必须支配 inst
if (!canReplace(inst, load)) {
if (DEBUG) {
std::cout << " Equivalent load does not dominate current load, skipping" << std::endl;
}
continue;
}
// 检查是否有中间的store指令影响
if (hasInterveningStore(load, inst, ptr)) {
if (DEBUG) {
std::cout << " Found intervening store, cannot reuse load value" << std::endl;
}
continue; // 如果有store指令不能复用之前的load
}
if (DEBUG) {
std::cout << " Can safely reuse load value from: " << load->getName() << std::endl;
}
return value;
}
}
}
if (DEBUG) {
std::cout << " No equivalent load found" << std::endl;
}
return inst;
}
Value *GVNContext::getValueNumber(CallInst *inst) {
// 此时已经确认是无副作用的函数调用可以安全进行GVN
for (auto [key, value] : hashtable) {
if (auto call = dynamic_cast<CallInst *>(key)) {
if (call->getCallee() == inst->getCallee() && call->getNumOperands() == inst->getNumOperands()) {
bool argsMatch = true;
// 跳过第一个操作数(函数指针),从参数开始比较
for (size_t i = 1; i < inst->getNumOperands(); ++i) {
if (checkHashtable(inst->getOperand(i)) != checkHashtable(call->getOperand(i))) {
argsMatch = false;
break;
}
}
if (argsMatch) {
return value;
}
}
}
}
return inst;
}
void GVNContext::visitInstruction(Instruction *inst) {
// 跳过分支指令
if (inst->isBranch()) {
if (DEBUG) {
std::cout << " Skipping branch instruction: " << inst->getName() << std::endl;
}
return;
}
// 如果是store指令需要清理hashtable中可能被影响的load指令
if (auto storeInst = dynamic_cast<StoreInst*>(inst)) {
invalidateLoadsAffectedByStore(storeInst);
}
if (DEBUG) {
std::cout << " Visiting instruction: " << inst->getName()
<< " (kind: " << static_cast<int>(inst->getKind()) << ")" << std::endl;
}
auto value = checkHashtable(inst);
if (inst != value) {
if (auto instValue = dynamic_cast<Instruction *>(value)) {
if (canReplace(inst, instValue)) {
inst->replaceAllUsesWith(instValue);
needRemove.insert(inst);
if (DEBUG) {
std::cout << " GVN: Replacing redundant instruction " << inst->getName()
<< " with existing instruction " << instValue->getName() << std::endl;
}
} else {
if (DEBUG) {
std::cout << " Cannot replace instruction " << inst->getName()
<< " with " << instValue->getName() << " (dominance check failed)" << std::endl;
}
}
}
} else {
if (DEBUG) {
std::cout << " Instruction " << inst->getName() << " is unique" << std::endl;
}
}
}
bool GVNContext::canReplace(Instruction *original, Value *replacement) {
auto replInst = dynamic_cast<Instruction *>(replacement);
if (!replInst) {
return true; // 替换为常量总是安全的
}
auto originalBB = original->getParent();
auto replBB = replInst->getParent();
// 如果replacement是Call指令需要特殊处理
if (auto callInst = dynamic_cast<CallInst *>(replInst)) {
if (sideEffectAnalysis && !sideEffectAnalysis->isPureFunction(callInst->getCallee())) {
// 对于有副作用的函数,只有在同一个基本块且相邻时才能替换
if (originalBB != replBB) {
return false;
}
// 检查指令顺序
auto &insts = originalBB->getInstructions();
auto origIt =
std::find_if(insts.begin(), insts.end(), [original](const auto &ptr) { return ptr.get() == original; });
auto replIt =
std::find_if(insts.begin(), insts.end(), [replInst](const auto &ptr) { return ptr.get() == replInst; });
if (origIt == insts.end() || replIt == insts.end()) {
return false;
}
return std::abs(std::distance(origIt, replIt)) == 1;
}
}
// 简单的支配关系检查:如果在同一个基本块,检查指令顺序
if (originalBB == replBB) {
auto &insts = originalBB->getInstructions();
auto origIt =
std::find_if(insts.begin(), insts.end(), [original](const auto &ptr) { return ptr.get() == original; });
auto replIt =
std::find_if(insts.begin(), insts.end(), [replInst](const auto &ptr) { return ptr.get() == replInst; });
if (origIt == insts.end() || replIt == insts.end()) {
if (DEBUG) {
std::cout << " Cannot find instructions in basic block for dominance check" << std::endl;
}
return false;
}
// 替换指令必须在原指令之前(支配原指令)
bool canRepl = std::distance(insts.begin(), replIt) < std::distance(insts.begin(), origIt);
if (DEBUG) {
std::cout << " Same block dominance check: " << (canRepl ? "PASS" : "FAIL")
<< " (repl at " << std::distance(insts.begin(), replIt)
<< ", orig at " << std::distance(insts.begin(), origIt) << ")" << std::endl;
}
return canRepl;
}
// 使用支配关系检查(如果支配树分析可用)
if (domTree) {
auto dominators = domTree->getDominators(originalBB);
if (dominators && dominators->count(replBB)) {
return true;
}
}
return false;
}
bool GVNContext::hasInterveningStore(LoadInst* earlierLoad, LoadInst* laterLoad, Value* ptr) {
// 如果两个load在不同的基本块需要更复杂的分析
auto earlierBB = earlierLoad->getParent();
auto laterBB = laterLoad->getParent();
if (earlierBB != laterBB) {
// 跨基本块的情况为了安全起见暂时认为有intervening store
// 这是保守的做法,可能会错过一些优化机会,但确保正确性
if (DEBUG) {
std::cout << " Cross-block load optimization: conservatively assuming intervening store" << std::endl;
}
return true;
}
// 同一基本块内的情况:检查指令序列
auto &insts = earlierBB->getInstructions();
// 找到两个load指令的位置
auto earlierIt = std::find_if(insts.begin(), insts.end(),
[earlierLoad](const auto &ptr) { return ptr.get() == earlierLoad; });
auto laterIt = std::find_if(insts.begin(), insts.end(),
[laterLoad](const auto &ptr) { return ptr.get() == laterLoad; });
if (earlierIt == insts.end() || laterIt == insts.end()) {
if (DEBUG) {
std::cout << " Could not find load instructions in basic block" << std::endl;
}
return true; // 找不到指令保守返回true
}
// 确定实际的执行顺序哪个load在前哪个在后
auto firstIt = earlierIt;
auto secondIt = laterIt;
if (std::distance(insts.begin(), earlierIt) > std::distance(insts.begin(), laterIt)) {
// 如果"earlier"实际上在"later"之后,交换它们
firstIt = laterIt;
secondIt = earlierIt;
if (DEBUG) {
std::cout << " Swapped load order: " << laterLoad->getName()
<< " actually comes before " << earlierLoad->getName() << std::endl;
}
}
// 检查两个load之间的所有指令
for (auto it = std::next(firstIt); it != secondIt; ++it) {
auto inst = it->get();
// 检查是否是store指令
if (auto storeInst = dynamic_cast<StoreInst*>(inst)) {
auto storePtr = checkHashtable(storeInst->getPointer());
// 如果store的目标地址与load的地址相同说明内存被修改了
if (storePtr == ptr) {
if (DEBUG) {
std::cout << " Found intervening store to same address: " << storeInst->getName() << std::endl;
}
return true;
}
// TODO: 这里还应该检查别名分析看store是否可能影响load的地址
// 为了简化,现在只检查精确匹配
}
// 检查函数调用是否可能修改内存
if (auto callInst = dynamic_cast<CallInst*>(inst)) {
if (sideEffectAnalysis && !sideEffectAnalysis->isPureFunction(callInst->getCallee())) {
// 如果是有副作用的函数调用且load的是全局变量则可能被修改
if (auto globalPtr = dynamic_cast<GlobalValue*>(ptr)) {
if (DEBUG) {
std::cout << " Found function call that may modify global variable: " << callInst->getName() << std::endl;
}
return true;
}
// TODO: 这里还应该检查函数是否可能修改通过指针参数传递的内存
}
}
}
if (DEBUG) {
std::cout << " No intervening store found between loads" << std::endl;
}
return false; // 没有找到会修改内存的指令
}
void GVNContext::invalidateLoadsAffectedByStore(StoreInst* storeInst) {
auto storePtr = checkHashtable(storeInst->getPointer());
if (DEBUG) {
std::cout << " Invalidating loads affected by store to address" << std::endl;
}
// 查找hashtable中所有可能被这个store影响的指令
std::vector<Value*> toRemove;
std::set<Value*> invalidatedLoads;
// 第一步找到所有被直接影响的load指令
for (auto& [key, value] : hashtable) {
if (auto loadInst = dynamic_cast<LoadInst*>(key)) {
auto loadPtr = checkHashtable(loadInst->getPointer());
// 如果load的地址与store的地址相同则需要从hashtable中移除
if (loadPtr == storePtr) {
toRemove.push_back(key);
invalidatedLoads.insert(loadInst);
if (DEBUG) {
std::cout << " Invalidating load from same address: " << loadInst->getName() << std::endl;
}
}
}
}
// 第二步找到所有依赖被失效load的指令如binary指令
bool foundMore = true;
while (foundMore) {
foundMore = false;
std::vector<Value*> additionalToRemove;
for (auto& [key, value] : hashtable) {
// 跳过已经标记要删除的指令
if (std::find(toRemove.begin(), toRemove.end(), key) != toRemove.end()) {
continue;
}
bool shouldInvalidate = false;
// 检查binary指令的操作数
if (auto binaryInst = dynamic_cast<BinaryInst*>(key)) {
auto lhs = checkHashtable(binaryInst->getLhs());
auto rhs = checkHashtable(binaryInst->getRhs());
if (invalidatedLoads.count(lhs) || invalidatedLoads.count(rhs)) {
shouldInvalidate = true;
if (DEBUG) {
std::cout << " Invalidating binary instruction due to invalidated operand: "
<< binaryInst->getName() << std::endl;
}
}
}
// 检查unary指令的操作数
else if (auto unaryInst = dynamic_cast<UnaryInst*>(key)) {
auto operand = checkHashtable(unaryInst->getOperand());
if (invalidatedLoads.count(operand)) {
shouldInvalidate = true;
if (DEBUG) {
std::cout << " Invalidating unary instruction due to invalidated operand: "
<< unaryInst->getName() << std::endl;
}
}
}
// 检查GEP指令的操作数
else if (auto gepInst = dynamic_cast<GetElementPtrInst*>(key)) {
auto basePtr = checkHashtable(gepInst->getBasePointer());
if (invalidatedLoads.count(basePtr)) {
shouldInvalidate = true;
} else {
// 检查索引操作数
for (unsigned i = 0; i < gepInst->getNumIndices(); ++i) {
if (invalidatedLoads.count(checkHashtable(gepInst->getIndex(i)))) {
shouldInvalidate = true;
break;
}
}
}
if (shouldInvalidate && DEBUG) {
std::cout << " Invalidating GEP instruction due to invalidated operand: "
<< gepInst->getName() << std::endl;
}
}
if (shouldInvalidate) {
additionalToRemove.push_back(key);
if (auto inst = dynamic_cast<Instruction*>(key)) {
invalidatedLoads.insert(inst);
}
foundMore = true;
}
}
// 将新找到的失效指令加入移除列表
toRemove.insert(toRemove.end(), additionalToRemove.begin(), additionalToRemove.end());
}
// 从hashtable中移除所有被影响的指令
for (auto key : toRemove) {
hashtable.erase(key);
}
if (DEBUG && toRemove.size() > invalidatedLoads.size()) {
std::cout << " Total invalidated instructions: " << toRemove.size()
<< " (including " << (toRemove.size() - invalidatedLoads.size()) << " dependent instructions)" << std::endl;
}
}
std::string GVNContext::getCanonicalExpression(Instruction *inst) {
std::ostringstream oss;
if (auto binary = dynamic_cast<BinaryInst *>(inst)) {
oss << "binary_" << static_cast<int>(binary->getKind()) << "_";
oss << checkHashtable(binary->getLhs()) << "_";
oss << checkHashtable(binary->getRhs());
} else if (auto unary = dynamic_cast<UnaryInst *>(inst)) {
oss << "unary_" << static_cast<int>(unary->getKind()) << "_";
oss << checkHashtable(unary->getOperand());
} else if (auto gep = dynamic_cast<GetElementPtrInst *>(inst)) {
oss << "gep_" << checkHashtable(gep->getBasePointer());
for (unsigned i = 0; i < gep->getNumIndices(); ++i) {
oss << "_" << checkHashtable(gep->getIndex(i));
}
}
return oss.str();
}
} // namespace sysy