From 5a6cfbee1e68f97e0db306b6d221845c29cdf788 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Wed, 16 Jul 2025 14:21:00 +0800 Subject: [PATCH 01/35] =?UTF-8?q?[SysYIROptUtils]=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E9=80=9A=E7=94=A8=E4=BC=98=E5=8C=96=E5=B7=A5=E5=85=B7=E7=B1=BB?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E6=94=B9=E7=9B=B8=E5=85=B3=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/DeadCodeElimination.cpp | 31 ++++----------- src/Mem2Reg.cpp | 63 ++++++++++--------------------- src/Reg2Mem.cpp | 9 +---- src/SysYIROptPre.cpp | 29 +++++--------- src/include/DeadCodeElimination.h | 5 +-- src/include/Mem2Reg.h | 4 +- src/include/Reg2Mem.h | 3 +- src/include/SysYIROptPre.h | 6 ++- src/include/SysYIROptUtils.h | 33 ++++++++++++++++ 9 files changed, 77 insertions(+), 106 deletions(-) create mode 100644 src/include/SysYIROptUtils.h diff --git a/src/DeadCodeElimination.cpp b/src/DeadCodeElimination.cpp index ffe6022..a986b18 100644 --- a/src/DeadCodeElimination.cpp +++ b/src/DeadCodeElimination.cpp @@ -37,7 +37,7 @@ void DeadCodeElimination::eliminateDeadStores(Function* func, bool& changed) { auto storeInst = dynamic_cast(inst); auto pointer = storeInst->getPointer(); // 如果是全局变量或者是函数的数组参数 - if (isGlobal(pointer) || (isArr(pointer) && + if (SysYIROptUtils::isGlobal(pointer) || (SysYIROptUtils::isArr(pointer) && std::find(func->getEntryBlock()->getArguments().begin(), func->getEntryBlock()->getArguments().end(), pointer) != func->getEntryBlock()->getArguments().end())) { @@ -63,7 +63,7 @@ void DeadCodeElimination::eliminateDeadStores(Function* func, bool& changed) { std::cout << "=== Dead Store Found ===\n"; SysYPrinter::printInst(storeInst); } - usedelete(storeInst); + SysYIROptUtils::usedelete(storeInst); iter = instrs.erase(iter); } else { ++iter; @@ -85,7 +85,7 @@ void DeadCodeElimination::eliminateDeadLoads(Function* func, bool& changed) { std::cout << "=== Dead Load Binary Unary Found ===\n"; SysYPrinter::printInst(inst); } - usedelete(inst); + SysYIROptUtils::usedelete(inst); iter = instrs.erase(iter); continue; } @@ -114,7 +114,7 @@ void DeadCodeElimination::eliminateDeadAllocas(Function* func, bool& changed) { std::cout << "=== Dead Alloca Found ===\n"; SysYPrinter::printInst(inst); } - usedelete(inst); + SysYIROptUtils::usedelete(inst); iter = instrs.erase(iter); continue; } @@ -183,7 +183,7 @@ void DeadCodeElimination::eliminateDeadRedundantLoadStore(Function* func, bool& /// 如果 pointer 仅被该 phi 使用,可以删除 ph if (tag) { changed = true; - usedelete(inst); + SysYIROptUtils::usedelete(inst); iter = instrs.erase(iter); continue; } @@ -193,7 +193,7 @@ void DeadCodeElimination::eliminateDeadRedundantLoadStore(Function* func, bool& auto pointer = memsetInst->getPointer(); if (pointer->getUses().empty()) { changed = true; - usedelete(inst); + SysYIROptUtils::usedelete(inst); iter = instrs.erase(iter); continue; } @@ -234,7 +234,7 @@ void DeadCodeElimination::eliminateDeadRedundantLoadStore(Function* func, bool& SysYPrinter::printInst(loadInst); SysYPrinter::printInst(nextStore); } - usedelete(loadInst); + SysYIROptUtils::usedelete(loadInst); iter = instrs.erase(iter); // 删除 prevStore 这里是不是可以留给删除无用store处理? // if (prevStore->getUses().empty()) { @@ -256,21 +256,4 @@ void DeadCodeElimination::eliminateDeadRedundantLoadStore(Function* func, bool& } -bool DeadCodeElimination::isGlobal(Value *val){ - auto gval = dynamic_cast(val); - return gval != nullptr; -} - -bool DeadCodeElimination::isArr(Value *val){ - auto aval = dynamic_cast(val); - return aval != nullptr && aval->getNumDims() != 0; -} - -void DeadCodeElimination::usedelete(Instruction *instr){ - for (auto &use1 : instr->getOperands()) { - auto val1 = use1->getValue(); - val1->removeUse(use1); - } -} - } // namespace sysy \ No newline at end of file diff --git a/src/Mem2Reg.cpp b/src/Mem2Reg.cpp index db584ed..fd7a239 100644 --- a/src/Mem2Reg.cpp +++ b/src/Mem2Reg.cpp @@ -75,7 +75,7 @@ auto Mem2Reg::computeValue2Blocks() -> void { // std::cout << std::endl; if (instr->isAlloca()) { - if (!(isArr(instr.get()) || isGlobal(instr.get()))) { + if (!(SysYIROptUtils::isArr(instr.get()) || SysYIROptUtils::isGlobal(instr.get()))) { // std::cout << " Found alloca: "; // printer.printInst(instr.get()); // std::cout << " -> Adding to allocBlocks" << std::endl; @@ -92,7 +92,7 @@ auto Mem2Reg::computeValue2Blocks() -> void { // std::cout << " Store target: "; // printer.printInst(dynamic_cast(val)); - if (!(isArr(val) || isGlobal(val))) { + if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { // std::cout << " Adding store to defBlocks for value: "; // printer.printInst(dynamic_cast(instr.get())); // std::cout << std::endl; @@ -108,7 +108,7 @@ auto Mem2Reg::computeValue2Blocks() -> void { // printer.printInst(dynamic_cast(val)); // std::cout << std::endl; - if (!(isArr(val) || isGlobal(val))) { + if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { // std::cout << " Adding load to useBlocks for value: "; // printer.printInst(dynamic_cast(val)); // std::cout << std::endl; @@ -199,7 +199,7 @@ auto Mem2Reg::cascade(Instruction *instr, bool &changed, Function *func, BasicBl auto tofind = std::find_if(instrs.begin(), instrs.end(), [&top](const auto &instr) { return instr.get() == top; }); assert(tofind != instrs.end()); - usedelete(tofind->get()); + SysYIROptUtils::usedelete(tofind->get()); instrs.erase(tofind); } } @@ -291,7 +291,7 @@ auto Mem2Reg::preOptimize1() -> void { continue; } - usedelete(tofind->get()); + SysYIROptUtils::usedelete(tofind->get()); bb->getInstructions().erase(tofind); iter = vToAllocB.erase(iter); } else { @@ -334,7 +334,7 @@ auto Mem2Reg::preOptimize1() -> void { std::cout << std::endl; auto valUsedByStore = dynamic_cast((*it)->getOperand(0)); - usedelete(it->get()); + SysYIROptUtils::usedelete(it->get()); if (valUsedByStore != nullptr && valUsedByStore->getUses().size() == 1 && @@ -370,7 +370,7 @@ auto Mem2Reg::preOptimize1() -> void { return instr.get() == val; }); if (tofind != bb->getInstructions().end()) { - usedelete(tofind->get()); + SysYIROptUtils::usedelete(tofind->get()); bb->getInstructions().erase(tofind); } else { std::cerr << "ERROR: Alloca not found in BB!" << std::endl; @@ -423,7 +423,7 @@ auto Mem2Reg::preOptimize2() -> void { for (auto curit = std::next(it); curit != instrs.end();) { if ((*curit)->isLoad() && (*curit)->getOperand(0) == val) { curit->get()->replaceAllUsesWith(propogationVal); - usedelete(curit->get()); + SysYIROptUtils::usedelete(curit->get()); curit = instrs.erase(curit); funcInfo->removeValue2UseBlock(val, block); } else { @@ -454,7 +454,7 @@ auto Mem2Reg::preOptimize2() -> void { for (auto childIter = childInstrs.begin(); childIter != childInstrs.end();) { if ((*childIter)->isLoad() && (*childIter)->getOperand(0) == val) { childIter->get()->replaceAllUsesWith(propogationVal); - usedelete(childIter->get()); + SysYIROptUtils::usedelete(childIter->get()); childIter = childInstrs.erase(childIter); funcInfo->removeValue2UseBlock(val, child); } else { @@ -465,7 +465,7 @@ auto Mem2Reg::preOptimize2() -> void { // 如果对该val的所有load均替换掉了,那么对于该val的defining block中的最后一个define也可以删除了 // 同时该块中前面对于该val的define也变成死代码了,可调用preOptimize1进行删除 if (funcInfo->getUseBlocksByValue(val).empty()) { - usedelete(it->get()); + SysYIROptUtils::usedelete(it->get()); instrs.erase(it); auto change = funcInfo->removeValue2DefBlock(val, block); if (change) { @@ -476,7 +476,7 @@ auto Mem2Reg::preOptimize2() -> void { assert(bb != nullptr); auto tofind = std::find_if(bb->getInstructions().begin(), bb->getInstructions().end(), [val](const auto &instr) { return instr.get() == val; }); - usedelete(tofind->get()); + SysYIROptUtils::usedelete(tofind->get()); bb->getInstructions().erase(tofind); funcInfo->removeValue2AllocBlock(val); } @@ -529,7 +529,7 @@ auto Mem2Reg::preOptimize3() -> void { for (auto curit = std::next(it); curit != last;) { if ((*curit)->isLoad() && (*curit)->getOperand(0) == val) { curit->get()->replaceAllUsesWith(propogationVal); - usedelete(curit->get()); + SysYIROptUtils::usedelete(curit->get()); curit = instrs.erase(curit); funcInfo->removeValue2UseBlock(val, block); } else { @@ -541,14 +541,14 @@ auto Mem2Reg::preOptimize3() -> void { [val](const auto &instr) { return instr == val; }) != func->getEntryBlock()->getArguments().end()) && last == instrs.end()) { - usedelete(it->get()); + SysYIROptUtils::usedelete(it->get()); it = instrs.erase(it); if (funcInfo->removeValue2DefBlock(val, block)) { auto bb = funcInfo->getAllocBlockByValue(val); if (bb != nullptr) { auto tofind = std::find_if(bb->getInstructions().begin(), bb->getInstructions().end(), [val](const auto &instr) { return instr.get() == val; }); - usedelete(tofind->get()); + SysYIROptUtils::usedelete(tofind->get()); bb->getInstructions().erase(tofind); funcInfo->removeValue2AllocBlock(val); } @@ -610,7 +610,7 @@ auto Mem2Reg::rename(BasicBlock *block, std::unordered_map &count, // 对于load指令,变量用最新的那个 if (instr->isLoad()) { auto val = instr->getOperand(0); - if (!(isArr(val) || isGlobal(val))) { + if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { if (!stacks[val].empty()) { instr->replaceOperand(0, stacks[val].top()); } @@ -621,7 +621,7 @@ auto Mem2Reg::rename(BasicBlock *block, std::unordered_map &count, if (instr->isAlloca()) { // alloca指令名字不改了,命名就按x,x_1,x_2...来就行 auto val = instr; - if (!(isArr(val) || isGlobal(val))) { + if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { ++valPop[val]; stacks[val].push(val); ++count[val]; @@ -629,11 +629,11 @@ auto Mem2Reg::rename(BasicBlock *block, std::unordered_map &count, } else if (instr->isPhi()) { // Phi指令也是一条特殊的define指令 auto val = dynamic_cast(instr)->getMapVal(); - if (!(isArr(val) || isGlobal(val))) { + if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { auto i = count[val]; if (i == 0) { // 对还未alloca就有phi的指令的处理,直接删除 - usedelete(iter->get()); + SysYIROptUtils::usedelete(iter->get()); iter = instrs.erase(iter); continue; } @@ -649,7 +649,7 @@ auto Mem2Reg::rename(BasicBlock *block, std::unordered_map &count, } else { // store指令看operand的名字,我们的实现是规定变量在operand的第二位,用一个新的alloca x_i代替 auto val = instr->getOperand(1); - if (!(isArr(val) || isGlobal(val))) { + if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { auto i = count[val]; auto newname = dynamic_cast(val)->getName() + "_" + std::to_string(i); auto newalloca = pBuilder->createAllocaInstWithoutInsert(val->getType(), {}, block, newname); @@ -773,29 +773,4 @@ auto Mem2Reg::getPredIndex(BasicBlock *n, BasicBlock *s) -> int { return index; } -/** - * 判断一个value是不是全局变量 - */ -auto Mem2Reg::isGlobal(Value *val) -> bool { - auto gval = dynamic_cast(val); - return gval != nullptr; -} - -/** - * 判断一个value是不是数组 - */ -auto Mem2Reg::isArr(Value *val) -> bool { - auto aval = dynamic_cast(val); - return aval != nullptr && aval->getNumDims() != 0; -} - -/** - * 删除一个指令的operand对应的value的该条use - */ -auto Mem2Reg::usedelete(Instruction *instr) -> void { - for (auto &use : instr->getOperands()) { - auto val = use->getValue(); - val->removeUse(use); - } -} } // namespace sysy diff --git a/src/Reg2Mem.cpp b/src/Reg2Mem.cpp index d44d1c8..70bdd14 100644 --- a/src/Reg2Mem.cpp +++ b/src/Reg2Mem.cpp @@ -103,7 +103,7 @@ void Reg2Mem::DeletePhiInst(){ } // 删除phi指令 auto &instructions = basicBlock->getInstructions(); - usedelete(iter->get()); + SysYIROptUtils::usedelete(iter->get()); iter = instructions.erase(iter); if (basicBlock->getNumInstructions() == 0) { if (basicBlock->getNumSuccessors() == 1) { @@ -119,11 +119,4 @@ void Reg2Mem::DeletePhiInst(){ } } -void Reg2Mem::usedelete(Instruction *instr) { - for (auto &use : instr->getOperands()) { - auto val = use->getValue(); - val->removeUse(use); - } -} - } // namespace sysy diff --git a/src/SysYIROptPre.cpp b/src/SysYIROptPre.cpp index fb05cb7..9eb495c 100644 --- a/src/SysYIROptPre.cpp +++ b/src/SysYIROptPre.cpp @@ -1,4 +1,5 @@ #include "SysYIROptPre.h" +#include "SysYIROptUtils.h" #include #include #include @@ -10,18 +11,6 @@ namespace sysy { -/** - * use删除operand,以免扰乱后续分析 - * instr: 要删除的指令 - */ -void SysYOptPre::usedelete(Instruction *instr) { - for (auto &use : instr->getOperands()) { - Value* val = use->getValue(); - // std::cout << delete << val->getName() << std::endl; - val->removeUse(use); - } -} - // 删除br后的无用指令 void SysYOptPre::SysYDelInstAfterBr() { @@ -34,7 +23,7 @@ void SysYOptPre::SysYDelInstAfterBr() { auto Branchiter = instructions.end(); for (auto iter = instructions.begin(); iter != instructions.end(); ++iter) { if (Branch) - usedelete(iter->get()); + SysYIROptUtils::usedelete(iter->get()); else if ((*iter)->isTerminator()){ Branch = true; Branchiter = iter; @@ -69,7 +58,7 @@ void SysYOptPre::SysYDelInstAfterBr() { } } - +// 合并空基本块 void SysYOptPre::SysYBlockMerge() { auto &functions = pModule->getFunctions(); //std::map> for (auto &function : functions) { @@ -91,12 +80,12 @@ void SysYOptPre::SysYBlockMerge() { auto thelastinstinst = block->end(); (--thelastinstinst); if (thelastinstinst->get()->isUnconditional()) { - usedelete(thelastinstinst->get()); + SysYIROptUtils::usedelete(thelastinstinst->get()); block->getInstructions().erase(thelastinstinst); } else if (thelastinstinst->get()->isConditional()) { // 如果是条件分支,判断条件是否相同,主要优化相同布尔表达式 if (thelastinstinst->get()->getOperand(1)->getName() == thelastinstinst->get()->getOperand(1)->getName()) { - usedelete(thelastinstinst->get()); + SysYIROptUtils::usedelete(thelastinstinst->get()); block->getInstructions().erase(thelastinstinst); } } @@ -170,7 +159,7 @@ void SysYOptPre::SysYDelNoPreBLock() { if (!blockIter->get()->getreachable()) for (auto &iterInst : blockIter->get()->getInstructions()) - usedelete(iterInst.get()); + SysYIROptUtils::usedelete(iterInst.get()); } @@ -303,7 +292,7 @@ void SysYOptPre::SysYDelEmptyBlock() { if (dynamic_cast(thelastinst->get()->getOperand(1)) == dynamic_cast(thelastinst->get()->getOperand(2))) { auto thebrBlock = dynamic_cast(thelastinst->get()->getOperand(1)); - usedelete(thelastinst->get()); + SysYIROptUtils::usedelete(thelastinst->get()); thelastinst = basicBlock->getInstructions().erase(thelastinst); pBuilder->setPosition(basicBlock.get(), basicBlock->end()); pBuilder->createUncondBrInst(thebrBlock, {}); @@ -344,7 +333,7 @@ void SysYOptPre::SysYDelEmptyBlock() { if (dynamic_cast(thelastinst->get()->getOperand(1)) == dynamic_cast(thelastinst->get()->getOperand(2))) { auto thebrBlock = dynamic_cast(thelastinst->get()->getOperand(1)); - usedelete(thelastinst->get()); + SysYIROptUtils::usedelete(thelastinst->get()); thelastinst = basicBlock->getInstructions().erase(thelastinst); pBuilder->setPosition(basicBlock.get(), basicBlock->end()); pBuilder->createUncondBrInst(thebrBlock, {}); @@ -420,7 +409,7 @@ void SysYOptPre::SysYDelEmptyBlock() { } for (auto &iterInst : iter->get()->getInstructions()) - usedelete(iterInst.get()); + SysYIROptUtils::usedelete(iterInst.get()); // 删除不可达基本块的phi指令的操作数 for (auto &succ : iter->get()->getSuccessors()) { int index = 0; diff --git a/src/include/DeadCodeElimination.h b/src/include/DeadCodeElimination.h index 72b9935..9864a2d 100644 --- a/src/include/DeadCodeElimination.h +++ b/src/include/DeadCodeElimination.h @@ -3,6 +3,7 @@ #include "IR.h" #include "SysYIRAnalyser.h" #include "SysYIRPrinter.h" +#include "SysYIROptUtils.h" namespace sysy { @@ -31,9 +32,5 @@ class DeadCodeElimination { void eliminateDeadGlobals(bool& changed); // 消除无用全局变量 void eliminateDeadIndirectiveAllocas(Function* func, bool& changed); // 消除无用间接内存分配(phi节点) void eliminateDeadRedundantLoadStore(Function* func, bool& changed); // 消除冗余加载和存储 - bool isGlobal(Value *val); - bool isArr(Value *val); - void usedelete(Instruction *instr); - }; } // namespace sysy diff --git a/src/include/Mem2Reg.h b/src/include/Mem2Reg.h index 0004708..919886a 100644 --- a/src/include/Mem2Reg.h +++ b/src/include/Mem2Reg.h @@ -8,6 +8,7 @@ #include "IR.h" #include "IRBuilder.h" #include "SysYIRAnalyser.h" +#include "SysYIROptUtils.h" namespace sysy { /** @@ -51,9 +52,6 @@ private: auto getPredIndex(BasicBlock *n, BasicBlock *s) -> int; ///< 获取前驱索引 auto cascade(Instruction *instr, bool &changed, Function *func, BasicBlock *block, std::list> &instrs) -> void; ///< 消除级联关系 - auto isGlobal(Value *val) -> bool; ///< 判断是否是全局变量 - auto isArr(Value *val) -> bool; ///< 判断是否是数组 - auto usedelete(Instruction *instr) -> void; ///< 删除指令相关的value-use-user关系 }; } // namespace sysy diff --git a/src/include/Reg2Mem.h b/src/include/Reg2Mem.h index 6249d71..8eec1b6 100644 --- a/src/include/Reg2Mem.h +++ b/src/include/Reg2Mem.h @@ -2,6 +2,7 @@ #include "IR.h" #include "IRBuilder.h" +#include "SysYIROptUtils.h" namespace sysy { /** @@ -16,8 +17,6 @@ public: Reg2Mem(Module *pMoudle, IRBuilder *pBuilder) : pModule(pMoudle), pBuilder(pBuilder) {} void DeletePhiInst(); - // 删除UD关系, 因为删除了phi指令会修改ud关系 - void usedelete(Instruction *instr); }; } // namespace sysy \ No newline at end of file diff --git a/src/include/SysYIROptPre.h b/src/include/SysYIROptPre.h index 4f0bdca..5cb0b34 100644 --- a/src/include/SysYIROptPre.h +++ b/src/include/SysYIROptPre.h @@ -10,6 +10,11 @@ namespace sysy { // 这些操作可以在SysY IR生成时就完成,但为了简化IR生成过程, // 这里将其放在SysY IR生成后进行预处理 // 同时兼容phi节点的处理,可以再mem2reg后再次调用优化 + +//TODO: 可增加的CFG优化 +// - 简化条件分支(Branch Simplification),如条件恒真/恒假转为直接跳转 +// - 合并连续的跳转指令(Jump Threading)在合并不可达块中似乎已经实现了 +// - 基本块重排序(Block Reordering),提升局部性 class SysYOptPre { private: Module *pModule; @@ -31,7 +36,6 @@ class SysYOptPre { void SysYBlockMerge(); // 合并基本块(主要针对嵌套if while的exit块, // 也可以修改IR生成实现回填机制 void SysYAddReturn(); // 添加return指令(主要针对Void函数) - void usedelete(Instruction *instr); // use删除 }; } // namespace sysy diff --git a/src/include/SysYIROptUtils.h b/src/include/SysYIROptUtils.h new file mode 100644 index 0000000..d2d2e55 --- /dev/null +++ b/src/include/SysYIROptUtils.h @@ -0,0 +1,33 @@ +#pragma once + +#include "IR.h" + +namespace sysy { + +// 优化工具类,包含一些通用的优化方法 +// 这些方法可以在不同的优化 pass 中复用 +// 例如:删除use关系,判断是否是全局变量等 +class SysYIROptUtils{ + +public: + // 删除use关系 + static void usedelete(Instruction *instr) { + for (auto &use : instr->getOperands()) { + Value* val = use->getValue(); + val->removeUse(use); + } + } + + // 判断是否是全局变量 + static bool isGlobal(Value *val) { + auto gval = dynamic_cast(val); + return gval != nullptr; + } + // 判断是否是数组 + static bool isArr(Value *val) { + auto aval = dynamic_cast(val); + return aval != nullptr && aval->getNumDims() != 0; + } +}; + +}// namespace sysy \ No newline at end of file From 00348c1931178bc88fbefdfb8bfc0c3e14247b95 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Wed, 16 Jul 2025 21:54:36 +0800 Subject: [PATCH 02/35] =?UTF-8?q?=E4=BF=AE=E6=94=B9CFG=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E7=9A=84=E6=96=87=E4=BB=B6=E5=90=8D=EF=BC=8C=E4=BF=AE=E6=94=B9?= =?UTF-8?q?phidel=E6=A0=87=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/CMakeLists.txt | 2 +- src/Reg2Mem.cpp | 2 +- src/{SysYIROptPre.cpp => SysYIRCFGOpt.cpp} | 12 ++++++------ src/include/{SysYIROptPre.h => SysYIRCFGOpt.h} | 10 ++++++---- src/sysyc.cpp | 6 +++--- 5 files changed, 17 insertions(+), 15 deletions(-) rename src/{SysYIROptPre.cpp => SysYIRCFGOpt.cpp} (98%) rename src/include/{SysYIROptPre.h => SysYIRCFGOpt.h} (84%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c89b3b0..f29dec4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,7 +21,7 @@ add_executable(sysyc IR.cpp SysYIRGenerator.cpp SysYIRPrinter.cpp - SysYIROptPre.cpp + SysYIRCFGOpt.cpp SysYIRAnalyser.cpp DeadCodeElimination.cpp Mem2Reg.cpp diff --git a/src/Reg2Mem.cpp b/src/Reg2Mem.cpp index 70bdd14..d90812a 100644 --- a/src/Reg2Mem.cpp +++ b/src/Reg2Mem.cpp @@ -38,7 +38,7 @@ void Reg2Mem::DeletePhiInst(){ // 创建一个basicblock auto newbasicBlock = function.second->addBasicBlock(); std::stringstream ss; - ss << " phidel.L" << pBuilder->getLabelIndex(); + ss << "phidel.L" << pBuilder->getLabelIndex(); newbasicBlock->setName(ss.str()); ss.str(""); // // 修改前驱后继关系 diff --git a/src/SysYIROptPre.cpp b/src/SysYIRCFGOpt.cpp similarity index 98% rename from src/SysYIROptPre.cpp rename to src/SysYIRCFGOpt.cpp index 9eb495c..e141b67 100644 --- a/src/SysYIROptPre.cpp +++ b/src/SysYIRCFGOpt.cpp @@ -1,4 +1,4 @@ -#include "SysYIROptPre.h" +#include "SysYIRCFGOpt.h" #include "SysYIROptUtils.h" #include #include @@ -13,7 +13,7 @@ namespace sysy { // 删除br后的无用指令 -void SysYOptPre::SysYDelInstAfterBr() { +void SysYCFGOpt::SysYDelInstAfterBr() { auto &functions = pModule->getFunctions(); for (auto &function : functions) { auto basicBlocks = function.second->getBasicBlocks(); @@ -59,7 +59,7 @@ void SysYOptPre::SysYDelInstAfterBr() { } // 合并空基本块 -void SysYOptPre::SysYBlockMerge() { +void SysYCFGOpt::SysYBlockMerge() { auto &functions = pModule->getFunctions(); //std::map> for (auto &function : functions) { // auto basicBlocks = function.second->getBasicBlocks(); @@ -129,7 +129,7 @@ void SysYOptPre::SysYBlockMerge() { } // 删除无前驱块,兼容SSA后的处理 -void SysYOptPre::SysYDelNoPreBLock() { +void SysYCFGOpt::SysYDelNoPreBLock() { auto &functions = pModule->getFunctions(); // std::map> for (auto &function : functions) { @@ -190,7 +190,7 @@ void SysYOptPre::SysYDelNoPreBLock() { } } -void SysYOptPre::SysYDelEmptyBlock() { +void SysYCFGOpt::SysYDelEmptyBlock() { auto &functions = pModule->getFunctions(); for (auto &function : functions) { // 收集不可达基本块 @@ -438,7 +438,7 @@ void SysYOptPre::SysYDelEmptyBlock() { } // 如果函数没有返回指令,则添加一个默认返回指令(主要解决void函数没有返回指令的问题) -void SysYOptPre::SysYAddReturn() { +void SysYCFGOpt::SysYAddReturn() { auto &functions = pModule->getFunctions(); for (auto &function : functions) { auto &func = function.second; diff --git a/src/include/SysYIROptPre.h b/src/include/SysYIRCFGOpt.h similarity index 84% rename from src/include/SysYIROptPre.h rename to src/include/SysYIRCFGOpt.h index 5cb0b34..9c791aa 100644 --- a/src/include/SysYIROptPre.h +++ b/src/include/SysYIRCFGOpt.h @@ -11,17 +11,19 @@ namespace sysy { // 这里将其放在SysY IR生成后进行预处理 // 同时兼容phi节点的处理,可以再mem2reg后再次调用优化 -//TODO: 可增加的CFG优化 +//TODO: 可增加的CFG优化和方法 +// - 检查基本块跳转关系正确性 // - 简化条件分支(Branch Simplification),如条件恒真/恒假转为直接跳转 // - 合并连续的跳转指令(Jump Threading)在合并不可达块中似乎已经实现了 // - 基本块重排序(Block Reordering),提升局部性 -class SysYOptPre { + +class SysYCFGOpt { private: Module *pModule; IRBuilder *pBuilder; public: - SysYOptPre(Module *pMoudle, IRBuilder *pBuilder) : pModule(pMoudle), pBuilder(pBuilder) {} + SysYCFGOpt(Module *pMoudle, IRBuilder *pBuilder) : pModule(pMoudle), pBuilder(pBuilder) {} void SysYOptimizateAfterIR(){ SysYDelInstAfterBr(); @@ -32,7 +34,7 @@ class SysYOptPre { } void SysYDelInstAfterBr(); // 删除br后面的指令 void SysYDelEmptyBlock(); // 空块删除 - void SysYDelNoPreBLock(); // 删除无前驱块 + void SysYDelNoPreBLock(); // 删除无前驱块(不可达块) void SysYBlockMerge(); // 合并基本块(主要针对嵌套if while的exit块, // 也可以修改IR生成实现回填机制 void SysYAddReturn(); // 添加return指令(主要针对Void函数) diff --git a/src/sysyc.cpp b/src/sysyc.cpp index edd9318..c34a801 100644 --- a/src/sysyc.cpp +++ b/src/sysyc.cpp @@ -13,7 +13,7 @@ using namespace antlr4; #include "SysYIRGenerator.h" #include "SysYIRPrinter.h" -#include "SysYIROptPre.h" +#include "SysYIRCFGOpt.h" #include "RISCv64Backend.h" #include "SysYIRAnalyser.h" #include "DeadCodeElimination.h" @@ -127,8 +127,8 @@ int main(int argc, char **argv) { cout << "Applying middle-end optimizations (level -O" << optLevel << ")...\n"; // 默认优化 pass (在所有优化级别都会执行) - SysYOptPre optPre(moduleIR, builder); - optPre.SysYOptimizateAfterIR(); + SysYCFGOpt cfgopt(moduleIR, builder); + cfgopt.SysYOptimizateAfterIR(); ControlFlowAnalysis cfa(moduleIR); cfa.init(); From f7e318e623e787ed67894ff0142ec099c5c0f16d Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Wed, 16 Jul 2025 21:55:28 +0800 Subject: [PATCH 03/35] =?UTF-8?q?[SCCP]=E5=88=9D=E6=AD=A5=E6=9E=84?= =?UTF-8?q?=E5=BB=BASCCP=EF=BC=8C.cpp=E4=BB=8D=E4=B8=8D=E5=AE=8C=E5=96=84?= =?UTF-8?q?=E6=9A=82=E4=B8=8Dcommit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/include/SCCP.h | 196 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 src/include/SCCP.h diff --git a/src/include/SCCP.h b/src/include/SCCP.h new file mode 100644 index 0000000..da6452e --- /dev/null +++ b/src/include/SCCP.h @@ -0,0 +1,196 @@ +#pragma once + +#include "IR.h" + +namespace sysy { + +// 稀疏条件常量传播类 +// Sparse Conditional Constant Propagation +/* +伪代码 +function SCCP_Optimization(Module): + for each Function in Module: + changed = true + while changed: + changed = false + // 阶段1: 常量传播与折叠 + changed |= PropagateConstants(Function) + // 阶段2: 控制流简化 + changed |= SimplifyControlFlow(Function) + end while + end for + +function PropagateConstants(Function): + // 初始化 + executableBlocks = {entryBlock} + valueState = map // 值->状态映射 + instWorkList = Queue() + edgeWorkList = Queue() + + // 初始化工作列表 + for each inst in entryBlock: + instWorkList.push(inst) + + // 迭代处理 + while !instWorkList.empty() || !edgeWorkList.empty(): + // 处理指令工作列表 + while !instWorkList.empty(): + inst = instWorkList.pop() + // 如果指令是可执行基本块中的 + if executableBlocks.contains(inst.parent): + ProcessInstruction(inst) + + // 处理边工作列表 + while !edgeWorkList.empty(): + edge = edgeWorkList.pop() + ProcessEdge(edge) + + // 应用常量替换 + for each inst in Function: + if valueState[inst] == CONSTANT: + ReplaceWithConstant(inst, valueState[inst].constant) + changed = true + + return changed + +function ProcessInstruction(Instruction inst): + switch inst.type: + //二元操作 + case BINARY_OP: + lhs = GetValueState(inst.operands[0]) + rhs = GetValueState(inst.operands[1]) + if lhs == CONSTANT && rhs == CONSTANT: + newState = ComputeConstant(inst.op, lhs.value, rhs.value) + UpdateState(inst, newState) + else if lhs == BOTTOM || rhs == BOTTOM: + UpdateState(inst, BOTTOM) + //phi + case PHI: + mergedState = ⊤ + for each incoming in inst.incomings: + // 检查每个输入的状态 + if executableBlocks.contains(incoming.block): + incomingState = GetValueState(incoming.value) + mergedState = Meet(mergedState, incomingState) + UpdateState(inst, mergedState) + // 条件分支 + case COND_BRANCH: + cond = GetValueState(inst.condition) + if cond == CONSTANT: + // 判断条件分支 + if cond.value == true: + AddEdgeToWorkList(inst.parent, inst.trueTarget) + else: + AddEdgeToWorkList(inst.parent, inst.falseTarget) + else if cond == BOTTOM: + AddEdgeToWorkList(inst.parent, inst.trueTarget) + AddEdgeToWorkList(inst.parent, inst.falseTarget) + + case UNCOND_BRANCH: + AddEdgeToWorkList(inst.parent, inst.target) + + // 其他指令处理... + +function ProcessEdge(Edge edge): + fromBB, toBB = edge + if !executableBlocks.contains(toBB): + executableBlocks.add(toBB) + for each inst in toBB: + if inst is PHI: + instWorkList.push(inst) + else: + instWorkList.push(inst) // 非PHI指令 + + // 更新PHI节点的输入 + for each phi in toBB.phis: + instWorkList.push(phi) + +function SimplifyControlFlow(Function): + changed = false + // 标记可达基本块 + ReachableBBs = FindReachableBlocks(Function.entry) + + // 删除不可达块 + for each bb in Function.blocks: + if !ReachableBBs.contains(bb): + RemoveDeadBlock(bb) + changed = true + + // 简化条件分支 + for each bb in Function.blocks: + terminator = bb.terminator + if terminator is COND_BRANCH: + cond = GetValueState(terminator.condition) + if cond == CONSTANT: + SimplifyBranch(terminator, cond.value) + changed = true + + return changed + +function RemoveDeadBlock(BasicBlock bb): + // 1. 更新前驱块的分支指令 + for each pred in bb.predecessors: + UpdateTerminator(pred, bb) + + // 2. 更新后继块的PHI节点 + for each succ in bb.successors: + RemovePhiIncoming(succ, bb) + + // 3. 删除块内所有指令 + for each inst in bb.instructions: + inst.remove() + + // 4. 从函数中移除基本块 + Function.removeBlock(bb) + +function Meet(State a, State b): + if a == ⊤: return b + if b == ⊤: return a + if a == ⊥ || b == ⊥: return ⊥ + if a.value == b.value: return a + return ⊥ + +function UpdateState(Value v, State newState): + oldState = valueState.get(v, ⊤) + if newState != oldState: + valueState[v] = newState + for each user in v.users: + if user is Instruction: + instWorkList.push(user) + +*/ + +enum class LatticeValue { + Top, // ⊤ (Unknown) + Constant, // c (Constant) + Bottom // ⊥ (Undefined / Varying) +}; +// LatticeValue: 用于表示值的状态,Top表示未知,Constant表示常量,Bottom表示未定义或变化的值。 +// 这里的LatticeValue用于跟踪每个SSA值(变量、指令结果)的状态, +// 以便在SCCP过程中进行常量传播和控制流简化。 + +//TODO: 下列数据结构考虑集成到类中,避免重命名问题 +static std::set Worklist; +static std::unordered_set Executable_Blocks; +static std::unordered_set > Executable_Edges; +static std::map valueState; + +class SCCP { +private: + Module *pModule; + +public: + SCCP(Module *pMoudle) : pModule(pMoudle) {} + + void run(); + bool PropagateConstants(Function *function); + bool SimplifyControlFlow(Function *function); + void ProcessInstruction(Instruction *inst); + void ProcessEdge(const std::pair &edge); + void RemoveDeadBlock(BasicBlock *bb); + void UpdateState(Value *v, LatticeValue newState); + LatticeValue Meet(LatticeValue a, LatticeValue b); + LatticeValue GetValueState(Value *v); +}; + +} // namespace sysy From 009f54863e22c5eb0db4e312f5f6d22d2d6e6008 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 17 Jul 2025 15:54:37 +0800 Subject: [PATCH 04/35] =?UTF-8?q?[CFG]CFG=E4=BC=98=E5=8C=96=E6=96=B9?= =?UTF-8?q?=E6=B3=95=E8=BD=AC=E6=8D=A2=E4=B8=BA=E9=9D=99=E6=80=81=E6=96=B9?= =?UTF-8?q?=E6=B3=95=EF=BC=8C=E6=96=B9=E4=BE=BF=E5=85=B6=E4=BB=96=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E9=81=8D=E8=B0=83=E7=94=A8=EF=BC=8CTODO=EF=BC=9A?= =?UTF-8?q?=E7=AE=80=E5=8C=96=E6=9D=A1=E4=BB=B6=E5=88=86=E6=94=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRCFGOpt.cpp | 776 +++++++++++++++++++------------------ src/include/SysYIRCFGOpt.h | 34 +- 2 files changed, 415 insertions(+), 395 deletions(-) diff --git a/src/SysYIRCFGOpt.cpp b/src/SysYIRCFGOpt.cpp index e141b67..4098008 100644 --- a/src/SysYIRCFGOpt.cpp +++ b/src/SysYIRCFGOpt.cpp @@ -13,248 +13,374 @@ namespace sysy { // 删除br后的无用指令 -void SysYCFGOpt::SysYDelInstAfterBr() { - auto &functions = pModule->getFunctions(); - for (auto &function : functions) { - auto basicBlocks = function.second->getBasicBlocks(); - for (auto &basicBlock : basicBlocks) { - bool Branch = false; - auto &instructions = basicBlock->getInstructions(); - auto Branchiter = instructions.end(); - for (auto iter = instructions.begin(); iter != instructions.end(); ++iter) { - if (Branch) - SysYIROptUtils::usedelete(iter->get()); - else if ((*iter)->isTerminator()){ - Branch = true; - Branchiter = iter; - } +bool SysYCFGOpt::SysYDelInstAfterBr(Function *func) { + bool changed = false; + + auto basicBlocks = func->getBasicBlocks(); + for (auto &basicBlock : basicBlocks) { + bool Branch = false; + auto &instructions = basicBlock->getInstructions(); + auto Branchiter = instructions.end(); + for (auto iter = instructions.begin(); iter != instructions.end(); ++iter) { + if (Branch) + SysYIROptUtils::usedelete(iter->get()); + else if ((*iter)->isTerminator()){ + Branch = true; + Branchiter = iter; } - if (Branchiter != instructions.end()) ++Branchiter; - while (Branchiter != instructions.end()) - Branchiter = instructions.erase(Branchiter); - - if (Branch) { // 更新前驱后继关系 - auto thelastinstinst = basicBlock->getInstructions().end(); - --thelastinstinst; - auto &Successors = basicBlock->getSuccessors(); - for (auto iterSucc = Successors.begin(); iterSucc != Successors.end();) { - (*iterSucc)->removePredecessor(basicBlock.get()); - basicBlock->removeSuccessor(*iterSucc); - } - if (thelastinstinst->get()->isUnconditional()) { - BasicBlock* branchBlock = dynamic_cast(thelastinstinst->get()->getOperand(0)); - basicBlock->addSuccessor(branchBlock); - branchBlock->addPredecessor(basicBlock.get()); - } else if (thelastinstinst->get()->isConditional()) { - BasicBlock* thenBlock = dynamic_cast(thelastinstinst->get()->getOperand(1)); - BasicBlock* elseBlock = dynamic_cast(thelastinstinst->get()->getOperand(2)); - basicBlock->addSuccessor(thenBlock); - basicBlock->addSuccessor(elseBlock); - thenBlock->addPredecessor(basicBlock.get()); - elseBlock->addPredecessor(basicBlock.get()); - } + } + if (Branchiter != instructions.end()) ++Branchiter; + while (Branchiter != instructions.end()) { + changed = true; + Branchiter = instructions.erase(Branchiter); + } + + if (Branch) { // 更新前驱后继关系 + auto thelastinstinst = basicBlock->getInstructions().end(); + --thelastinstinst; + auto &Successors = basicBlock->getSuccessors(); + for (auto iterSucc = Successors.begin(); iterSucc != Successors.end();) { + (*iterSucc)->removePredecessor(basicBlock.get()); + basicBlock->removeSuccessor(*iterSucc); + } + if (thelastinstinst->get()->isUnconditional()) { + BasicBlock* branchBlock = dynamic_cast(thelastinstinst->get()->getOperand(0)); + basicBlock->addSuccessor(branchBlock); + branchBlock->addPredecessor(basicBlock.get()); + } else if (thelastinstinst->get()->isConditional()) { + BasicBlock* thenBlock = dynamic_cast(thelastinstinst->get()->getOperand(1)); + BasicBlock* elseBlock = dynamic_cast(thelastinstinst->get()->getOperand(2)); + basicBlock->addSuccessor(thenBlock); + basicBlock->addSuccessor(elseBlock); + thenBlock->addPredecessor(basicBlock.get()); + elseBlock->addPredecessor(basicBlock.get()); } } } + + return changed; } // 合并空基本块 -void SysYCFGOpt::SysYBlockMerge() { - auto &functions = pModule->getFunctions(); //std::map> - for (auto &function : functions) { - // auto basicBlocks = function.second->getBasicBlocks(); - auto &func = function.second; - for (auto blockiter = func->getBasicBlocks().begin(); - blockiter != func->getBasicBlocks().end();) { - if (blockiter->get()->getNumSuccessors() == 1) { - // 如果当前块只有一个后继块 - // 且后继块只有一个前驱块 - // 则将当前块和后继块合并 - if (((blockiter->get())->getSuccessors()[0])->getNumPredecessors() == 1) { - // std::cout << "merge block: " << blockiter->get()->getName() << std::endl; - BasicBlock* block = blockiter->get(); - BasicBlock* nextBlock = blockiter->get()->getSuccessors()[0]; - auto nextarguments = nextBlock->getArguments(); - // 删除br指令 - if (block->getNumInstructions() != 0) { - auto thelastinstinst = block->end(); - (--thelastinstinst); - if (thelastinstinst->get()->isUnconditional()) { +bool SysYCFGOpt::SysYBlockMerge(Function *func) { + bool changed = false; + + for (auto blockiter = func->getBasicBlocks().begin(); + blockiter != func->getBasicBlocks().end();) { + if (blockiter->get()->getNumSuccessors() == 1) { + // 如果当前块只有一个后继块 + // 且后继块只有一个前驱块 + // 则将当前块和后继块合并 + if (((blockiter->get())->getSuccessors()[0])->getNumPredecessors() == 1) { + // std::cout << "merge block: " << blockiter->get()->getName() << std::endl; + BasicBlock* block = blockiter->get(); + BasicBlock* nextBlock = blockiter->get()->getSuccessors()[0]; + auto nextarguments = nextBlock->getArguments(); + // 删除br指令 + if (block->getNumInstructions() != 0) { + auto thelastinstinst = block->end(); + (--thelastinstinst); + if (thelastinstinst->get()->isUnconditional()) { + SysYIROptUtils::usedelete(thelastinstinst->get()); + block->getInstructions().erase(thelastinstinst); + } else if (thelastinstinst->get()->isConditional()) { + // 如果是条件分支,判断条件是否相同,主要优化相同布尔表达式 + if (thelastinstinst->get()->getOperand(1)->getName() == thelastinstinst->get()->getOperand(1)->getName()) { SysYIROptUtils::usedelete(thelastinstinst->get()); block->getInstructions().erase(thelastinstinst); - } else if (thelastinstinst->get()->isConditional()) { - // 如果是条件分支,判断条件是否相同,主要优化相同布尔表达式 - if (thelastinstinst->get()->getOperand(1)->getName() == thelastinstinst->get()->getOperand(1)->getName()) { - SysYIROptUtils::usedelete(thelastinstinst->get()); - block->getInstructions().erase(thelastinstinst); - } } } - // 将后继块的指令移动到当前块 - // 并将后继块的父指针改为当前块 - for (auto institer = nextBlock->begin(); institer != nextBlock->end();) { - institer->get()->setParent(block); - block->getInstructions().emplace_back(institer->release()); - institer = nextBlock->getInstructions().erase(institer); - } - // 合并参数 - // TODO:是否需要去重? - for (auto &argm : nextarguments) { - argm->setParent(block); - block->insertArgument(argm); - } - // 更新前驱后继关系,类似树节点操作 - block->removeSuccessor(nextBlock); - nextBlock->removePredecessor(block); - std::list succshoulddel; - for (auto &succ : nextBlock->getSuccessors()) { - block->addSuccessor(succ); - succ->replacePredecessor(nextBlock, block); - succshoulddel.push_back(succ); - } - for (auto del : succshoulddel) { - nextBlock->removeSuccessor(del); - } - - func->removeBasicBlock(nextBlock); - - } else { - blockiter++; } + // 将后继块的指令移动到当前块 + // 并将后继块的父指针改为当前块 + for (auto institer = nextBlock->begin(); institer != nextBlock->end();) { + institer->get()->setParent(block); + block->getInstructions().emplace_back(institer->release()); + institer = nextBlock->getInstructions().erase(institer); + } + // 合并参数 + // TODO:是否需要去重? + for (auto &argm : nextarguments) { + argm->setParent(block); + block->insertArgument(argm); + } + // 更新前驱后继关系,类似树节点操作 + block->removeSuccessor(nextBlock); + nextBlock->removePredecessor(block); + std::list succshoulddel; + for (auto &succ : nextBlock->getSuccessors()) { + block->addSuccessor(succ); + succ->replacePredecessor(nextBlock, block); + succshoulddel.push_back(succ); + } + for (auto del : succshoulddel) { + nextBlock->removeSuccessor(del); + } + + func->removeBasicBlock(nextBlock); + changed = true; + } else { blockiter++; } + } else { + blockiter++; } } + + return changed; } // 删除无前驱块,兼容SSA后的处理 -void SysYCFGOpt::SysYDelNoPreBLock() { +bool SysYCFGOpt::SysYDelNoPreBLock(Function *func) { - auto &functions = pModule->getFunctions(); // std::map> - for (auto &function : functions) { - auto &func = function.second; + bool changed = false; - for (auto &block : func->getBasicBlocks()) { - block->setreachableFalse(); - } - // 对函数基本块做一个拓扑排序,排查不可达基本块 - auto entryBlock = func->getEntryBlock(); - entryBlock->setreachableTrue(); - std::queue blockqueue; - blockqueue.push(entryBlock); - while (!blockqueue.empty()) { - auto block = blockqueue.front(); - blockqueue.pop(); - for (auto &succ : block->getSuccessors()) { - if (!succ->getreachable()) { - succ->setreachableTrue(); - blockqueue.push(succ); - } - } - } - - // 删除不可达基本块指令 - for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();blockIter++) { - - if (!blockIter->get()->getreachable()) - for (auto &iterInst : blockIter->get()->getInstructions()) - SysYIROptUtils::usedelete(iterInst.get()); - - } - - - for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();) { - if (!blockIter->get()->getreachable()) { - for (auto succblock : blockIter->get()->getSuccessors()) { - int indexphi = 1; - for (auto pred : succblock->getPredecessors()) { - if (pred == blockIter->get()) { - break; - } - indexphi++; - } - for (auto &phiinst : succblock->getInstructions()) { - if (phiinst->getKind() != Instruction::kPhi) { - break; - } - phiinst->removeOperand(indexphi); - } - } - // 删除不可达基本块,注意迭代器不可达问题 - func->removeBasicBlock((blockIter++)->get()); - } else { - blockIter++; + for (auto &block : func->getBasicBlocks()) { + block->setreachableFalse(); + } + // 对函数基本块做一个拓扑排序,排查不可达基本块 + auto entryBlock = func->getEntryBlock(); + entryBlock->setreachableTrue(); + std::queue blockqueue; + blockqueue.push(entryBlock); + while (!blockqueue.empty()) { + auto block = blockqueue.front(); + blockqueue.pop(); + for (auto &succ : block->getSuccessors()) { + if (!succ->getreachable()) { + succ->setreachableTrue(); + blockqueue.push(succ); } } } -} -void SysYCFGOpt::SysYDelEmptyBlock() { - auto &functions = pModule->getFunctions(); - for (auto &function : functions) { - // 收集不可达基本块 - // 这里的不可达基本块是指没有实际指令的基本块 - // 当一个基本块没有实际指令例如只有phi指令和一个uncondbr指令时,也会被视作不可达 - auto basicBlocks = function.second->getBasicBlocks(); - std::map EmptyBlocks; - // 空块儿和后继的基本块的映射 - for (auto &basicBlock : basicBlocks) { - if (basicBlock->getNumInstructions() == 0) { - if (basicBlock->getNumSuccessors() == 1) { - EmptyBlocks[basicBlock.get()] = basicBlock->getSuccessors().front(); + // 删除不可达基本块指令 + for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();blockIter++) { + if (!blockIter->get()->getreachable()) + for (auto &iterInst : blockIter->get()->getInstructions()) + SysYIROptUtils::usedelete(iterInst.get()); + + } + + + for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();) { + if (!blockIter->get()->getreachable()) { + for (auto succblock : blockIter->get()->getSuccessors()) { + int indexphi = 1; + for (auto pred : succblock->getPredecessors()) { + if (pred == blockIter->get()) { + break; + } + indexphi++; + } + for (auto &phiinst : succblock->getInstructions()) { + if (phiinst->getKind() != Instruction::kPhi) { + break; + } + phiinst->removeOperand(indexphi); } } - else{ - // 如果只有phi指令和一个uncondbr。(phi)*(uncondbr)? - // 判断除了最后一个指令之外是不是只有phi指令 - bool onlyPhi = true; - for (auto &inst : basicBlock->getInstructions()) { - if (!inst->isPhi() && !inst->isUnconditional()) { - onlyPhi = false; + // 删除不可达基本块,注意迭代器不可达问题 + func->removeBasicBlock((blockIter++)->get()); + changed = true; + } else { + blockIter++; + } + } + + return changed; +} + +// 删除空块 +bool SysYCFGOpt::SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder) { + bool changed = false; + + // 收集不可达基本块 + // 这里的不可达基本块是指没有实际指令的基本块 + // 当一个基本块没有实际指令例如只有phi指令和一个uncondbr指令时,也会被视作不可达 + auto basicBlocks = func->getBasicBlocks(); + std::map EmptyBlocks; + // 空块儿和后继的基本块的映射 + for (auto &basicBlock : basicBlocks) { + if (basicBlock->getNumInstructions() == 0) { + if (basicBlock->getNumSuccessors() == 1) { + EmptyBlocks[basicBlock.get()] = basicBlock->getSuccessors().front(); + } + } + else{ + // 如果只有phi指令和一个uncondbr。(phi)*(uncondbr)? + // 判断除了最后一个指令之外是不是只有phi指令 + bool onlyPhi = true; + for (auto &inst : basicBlock->getInstructions()) { + if (!inst->isPhi() && !inst->isUnconditional()) { + onlyPhi = false; + break; + } + } + if(onlyPhi) + EmptyBlocks[basicBlock.get()] = basicBlock->getSuccessors().front(); + } + + + } + // 更新基本块信息,增加必要指令 + for (auto &basicBlock : basicBlocks) { + // 把空块转换成只有跳转指令的不可达块 + if (distance(basicBlock->begin(), basicBlock->end()) == 0) { + if (basicBlock->getNumSuccessors() == 0) { + continue; + } + if (basicBlock->getNumSuccessors() > 1) { + assert(""); + } + pBuilder->setPosition(basicBlock.get(), basicBlock->end()); + pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); + continue; + } + + auto thelastinst = basicBlock->getInstructions().end(); + --thelastinst; + + // 根据br指令传递的后继块信息,跳过空块链 + if (thelastinst->get()->isUnconditional()) { + BasicBlock* OldBrBlock = dynamic_cast(thelastinst->get()->getOperand(0)); + BasicBlock *thelastBlockOld = nullptr; + // 如果空块链表为多个块 + while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(0))) != + EmptyBlocks.end()) { + thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(0)); + thelastinst->get()->replaceOperand(0, EmptyBlocks[thelastBlockOld]); + } + + basicBlock->removeSuccessor(OldBrBlock); + OldBrBlock->removePredecessor(basicBlock.get()); + basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(0))); + dynamic_cast(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get()); + + if (thelastBlockOld != nullptr) { + int indexphi = 0; + for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors()) { + if (pred == thelastBlockOld) { + break; + } + indexphi++; + } + + // 更新phi指令的操作数 + // 移除thelastBlockOld对应的phi操作数 + for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(0))->getInstructions()) { + if (InstInNew->isPhi()) { + dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); + } else { break; } } - if(onlyPhi) - EmptyBlocks[basicBlock.get()] = basicBlock->getSuccessors().front(); } - - - } - // 更新基本块信息,增加必要指令 - for (auto &basicBlock : basicBlocks) { - // 把空块转换成只有跳转指令的不可达块 - if (distance(basicBlock->begin(), basicBlock->end()) == 0) { - if (basicBlock->getNumSuccessors() == 0) { - continue; - } - if (basicBlock->getNumSuccessors() > 1) { - assert(""); - } + + } else if (thelastinst->get()->getKind() == Instruction::kCondBr) { + auto OldThenBlock = dynamic_cast(thelastinst->get()->getOperand(1)); + auto OldElseBlock = dynamic_cast(thelastinst->get()->getOperand(2)); + + BasicBlock *thelastBlockOld = nullptr; + while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(1))) != + EmptyBlocks.end()) { + thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(1)); + thelastinst->get()->replaceOperand( + 1, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(1))]); + } + basicBlock->removeSuccessor(OldThenBlock); + OldThenBlock->removePredecessor(basicBlock.get()); + // 处理 then 和 else 分支合并的情况 + if (dynamic_cast(thelastinst->get()->getOperand(1)) == + dynamic_cast(thelastinst->get()->getOperand(2))) { + auto thebrBlock = dynamic_cast(thelastinst->get()->getOperand(1)); + SysYIROptUtils::usedelete(thelastinst->get()); + thelastinst = basicBlock->getInstructions().erase(thelastinst); pBuilder->setPosition(basicBlock.get(), basicBlock->end()); - pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); + pBuilder->createUncondBrInst(thebrBlock, {}); continue; } + basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(1))); + dynamic_cast(thelastinst->get()->getOperand(1))->addPredecessor(basicBlock.get()); + // auto indexInNew = dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors(). - auto thelastinst = basicBlock->getInstructions().end(); - --thelastinst; - - // 根据br指令传递的后继块信息,跳过空块链 - if (thelastinst->get()->isUnconditional()) { - BasicBlock* OldBrBlock = dynamic_cast(thelastinst->get()->getOperand(0)); - BasicBlock *thelastBlockOld = nullptr; - // 如果空块链表为多个块 - while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(0))) != - EmptyBlocks.end()) { - thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(0)); - thelastinst->get()->replaceOperand(0, EmptyBlocks[thelastBlockOld]); + if (thelastBlockOld != nullptr) { + int indexphi = 0; + for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(1))->getPredecessors()) { + if (pred == thelastBlockOld) { + break; + } + indexphi++; } + for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(1))->getInstructions()) { + if (InstInNew->isPhi()) { + dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); + } else { + break; + } + } + } + + thelastBlockOld = nullptr; + while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(2))) != + EmptyBlocks.end()) { + thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(2)); + thelastinst->get()->replaceOperand( + 2, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(2))]); + } + basicBlock->removeSuccessor(OldElseBlock); + OldElseBlock->removePredecessor(basicBlock.get()); + // 处理 then 和 else 分支合并的情况 + if (dynamic_cast(thelastinst->get()->getOperand(1)) == + dynamic_cast(thelastinst->get()->getOperand(2))) { + auto thebrBlock = dynamic_cast(thelastinst->get()->getOperand(1)); + SysYIROptUtils::usedelete(thelastinst->get()); + thelastinst = basicBlock->getInstructions().erase(thelastinst); + pBuilder->setPosition(basicBlock.get(), basicBlock->end()); + pBuilder->createUncondBrInst(thebrBlock, {}); + continue; + } + basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(2))); + dynamic_cast(thelastinst->get()->getOperand(2))->addPredecessor(basicBlock.get()); + + if (thelastBlockOld != nullptr) { + int indexphi = 0; + for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(2))->getPredecessors()) { + if (pred == thelastBlockOld) { + break; + } + indexphi++; + } + for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(2))->getInstructions()) { + if (InstInNew->isPhi()) { + dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); + } else { + break; + } + } + } + } else { + if (basicBlock->getNumSuccessors() == 1) { + pBuilder->setPosition(basicBlock.get(), basicBlock->end()); + pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); + auto thelastinst = basicBlock->getInstructions().end(); + (--thelastinst); + auto OldBrBlock = dynamic_cast(thelastinst->get()->getOperand(0)); + sysy::BasicBlock *thelastBlockOld = nullptr; + while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(0))) != + EmptyBlocks.end()) { + thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(0)); + + thelastinst->get()->replaceOperand( + 0, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(0))]); + } + basicBlock->removeSuccessor(OldBrBlock); OldBrBlock->removePredecessor(basicBlock.get()); basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(0))); dynamic_cast(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get()); - if (thelastBlockOld != nullptr) { int indexphi = 0; for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors()) { @@ -264,8 +390,6 @@ void SysYCFGOpt::SysYDelEmptyBlock() { indexphi++; } - // 更新phi指令的操作数 - // 移除thelastBlockOld对应的phi操作数 for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(0))->getInstructions()) { if (InstInNew->isPhi()) { dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); @@ -274,200 +398,82 @@ void SysYCFGOpt::SysYDelEmptyBlock() { } } } - - } else if (thelastinst->get()->getKind() == Instruction::kCondBr) { - auto OldThenBlock = dynamic_cast(thelastinst->get()->getOperand(1)); - auto OldElseBlock = dynamic_cast(thelastinst->get()->getOperand(2)); - - BasicBlock *thelastBlockOld = nullptr; - while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(1))) != - EmptyBlocks.end()) { - thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(1)); - thelastinst->get()->replaceOperand( - 1, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(1))]); - } - basicBlock->removeSuccessor(OldThenBlock); - OldThenBlock->removePredecessor(basicBlock.get()); - // 处理 then 和 else 分支合并的情况 - if (dynamic_cast(thelastinst->get()->getOperand(1)) == - dynamic_cast(thelastinst->get()->getOperand(2))) { - auto thebrBlock = dynamic_cast(thelastinst->get()->getOperand(1)); - SysYIROptUtils::usedelete(thelastinst->get()); - thelastinst = basicBlock->getInstructions().erase(thelastinst); - pBuilder->setPosition(basicBlock.get(), basicBlock->end()); - pBuilder->createUncondBrInst(thebrBlock, {}); - continue; - } - basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(1))); - dynamic_cast(thelastinst->get()->getOperand(1))->addPredecessor(basicBlock.get()); - // auto indexInNew = dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors(). - - if (thelastBlockOld != nullptr) { - int indexphi = 0; - for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(1))->getPredecessors()) { - if (pred == thelastBlockOld) { - break; - } - indexphi++; - } - - for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(1))->getInstructions()) { - if (InstInNew->isPhi()) { - dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); - } else { - break; - } - } - } - - thelastBlockOld = nullptr; - while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(2))) != - EmptyBlocks.end()) { - thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(2)); - thelastinst->get()->replaceOperand( - 2, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(2))]); - } - basicBlock->removeSuccessor(OldElseBlock); - OldElseBlock->removePredecessor(basicBlock.get()); - // 处理 then 和 else 分支合并的情况 - if (dynamic_cast(thelastinst->get()->getOperand(1)) == - dynamic_cast(thelastinst->get()->getOperand(2))) { - auto thebrBlock = dynamic_cast(thelastinst->get()->getOperand(1)); - SysYIROptUtils::usedelete(thelastinst->get()); - thelastinst = basicBlock->getInstructions().erase(thelastinst); - pBuilder->setPosition(basicBlock.get(), basicBlock->end()); - pBuilder->createUncondBrInst(thebrBlock, {}); - continue; - } - basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(2))); - dynamic_cast(thelastinst->get()->getOperand(2))->addPredecessor(basicBlock.get()); - - if (thelastBlockOld != nullptr) { - int indexphi = 0; - for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(2))->getPredecessors()) { - if (pred == thelastBlockOld) { - break; - } - indexphi++; - } - for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(2))->getInstructions()) { - if (InstInNew->isPhi()) { - dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); - } else { - break; - } - } - } - } else { - if (basicBlock->getNumSuccessors() == 1) { - pBuilder->setPosition(basicBlock.get(), basicBlock->end()); - pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); - auto thelastinst = basicBlock->getInstructions().end(); - (--thelastinst); - auto OldBrBlock = dynamic_cast(thelastinst->get()->getOperand(0)); - sysy::BasicBlock *thelastBlockOld = nullptr; - while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(0))) != - EmptyBlocks.end()) { - thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(0)); - - thelastinst->get()->replaceOperand( - 0, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(0))]); - } - - basicBlock->removeSuccessor(OldBrBlock); - OldBrBlock->removePredecessor(basicBlock.get()); - basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(0))); - dynamic_cast(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get()); - if (thelastBlockOld != nullptr) { - int indexphi = 0; - for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors()) { - if (pred == thelastBlockOld) { - break; - } - indexphi++; - } - - for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(0))->getInstructions()) { - if (InstInNew->isPhi()) { - dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); - } else { - break; - } - } - } - } - } - } - - for (auto iter = function.second->getBasicBlocks().begin(); iter != function.second->getBasicBlocks().end();) { - - if (EmptyBlocks.find(iter->get()) != EmptyBlocks.end()) { - // EntryBlock跳过 - if (iter->get() == function.second->getEntryBlock()) { - ++iter; - continue; - } - - for (auto &iterInst : iter->get()->getInstructions()) - SysYIROptUtils::usedelete(iterInst.get()); - // 删除不可达基本块的phi指令的操作数 - for (auto &succ : iter->get()->getSuccessors()) { - int index = 0; - for (auto &pred : succ->getPredecessors()) { - if (pred == iter->get()) { - break; - } - index++; - } - - for (auto &instinsucc : succ->getInstructions()) { - if (instinsucc->isPhi()) { - dynamic_cast(instinsucc.get())->removeOperand(index); - } else { - break; - } - } - } - - function.second->removeBasicBlock((iter++)->get()); - } else { - ++iter; } } } + + for (auto iter = func->getBasicBlocks().begin(); iter != func->getBasicBlocks().end();) { + + if (EmptyBlocks.find(iter->get()) != EmptyBlocks.end()) { + // EntryBlock跳过 + if (iter->get() == func->getEntryBlock()) { + ++iter; + continue; + } + + for (auto &iterInst : iter->get()->getInstructions()) + SysYIROptUtils::usedelete(iterInst.get()); + // 删除不可达基本块的phi指令的操作数 + for (auto &succ : iter->get()->getSuccessors()) { + int index = 0; + for (auto &pred : succ->getPredecessors()) { + if (pred == iter->get()) { + break; + } + index++; + } + + for (auto &instinsucc : succ->getInstructions()) { + if (instinsucc->isPhi()) { + dynamic_cast(instinsucc.get())->removeOperand(index); + } else { + break; + } + } + } + + func->removeBasicBlock((iter++)->get()); + changed = true; + } else { + ++iter; + } + } + + return changed; + } // 如果函数没有返回指令,则添加一个默认返回指令(主要解决void函数没有返回指令的问题) -void SysYCFGOpt::SysYAddReturn() { - auto &functions = pModule->getFunctions(); - for (auto &function : functions) { - auto &func = function.second; - auto basicBlocks = func->getBasicBlocks(); - for (auto &block : basicBlocks) { - if (block->getNumSuccessors() == 0) { - // 如果基本块没有后继块,则添加一个返回指令 - if (block->getNumInstructions() == 0) { - pBuilder->setPosition(block.get(), block->end()); - pBuilder->createReturnInst(); - } - auto thelastinst = block->getInstructions().end(); - --thelastinst; - if (thelastinst->get()->getKind() != Instruction::kReturn) { - // std::cout << "Warning: Function " << func->getName() << " has no return instruction, adding default return." << std::endl; +bool SysYCFGOpt::SysYAddReturn(Function *func, IRBuilder* pBuilder) { + bool changed = false; + auto basicBlocks = func->getBasicBlocks(); + for (auto &block : basicBlocks) { + if (block->getNumSuccessors() == 0) { + changed = true; + // 如果基本块没有后继块,则添加一个返回指令 + if (block->getNumInstructions() == 0) { + pBuilder->setPosition(block.get(), block->end()); + pBuilder->createReturnInst(); + } + auto thelastinst = block->getInstructions().end(); + --thelastinst; + if (thelastinst->get()->getKind() != Instruction::kReturn) { + // std::cout << "Warning: Function " << func->getName() << " has no return instruction, adding default return." << std::endl; - pBuilder->setPosition(block.get(), block->end()); - // TODO: 如果int float函数缺少返回值是否需要报错 - if (func->getReturnType()->isInt()) { - pBuilder->createReturnInst(ConstantValue::get(0)); - } else if (func->getReturnType()->isFloat()) { - pBuilder->createReturnInst(ConstantValue::get(0.0F)); - } else { - pBuilder->createReturnInst(); - } + pBuilder->setPosition(block.get(), block->end()); + // TODO: 如果int float函数缺少返回值是否需要报错 + if (func->getReturnType()->isInt()) { + pBuilder->createReturnInst(ConstantValue::get(0)); + } else if (func->getReturnType()->isFloat()) { + pBuilder->createReturnInst(ConstantValue::get(0.0F)); + } else { + pBuilder->createReturnInst(); } } } } + + return changed; } } // namespace sysy diff --git a/src/include/SysYIRCFGOpt.h b/src/include/SysYIRCFGOpt.h index 9c791aa..bf0e524 100644 --- a/src/include/SysYIRCFGOpt.h +++ b/src/include/SysYIRCFGOpt.h @@ -26,18 +26,32 @@ class SysYCFGOpt { SysYCFGOpt(Module *pMoudle, IRBuilder *pBuilder) : pModule(pMoudle), pBuilder(pBuilder) {} void SysYOptimizateAfterIR(){ - SysYDelInstAfterBr(); - SysYBlockMerge(); - SysYDelNoPreBLock(); - SysYDelEmptyBlock(); - SysYAddReturn(); + + auto &functions = pModule->getFunctions(); + for (auto &function : functions) { + bool changed = false; + while(changed){ + // 删除br后面的无用指令 + changed |= SysYDelInstAfterBr(function.second.get()); + // 合并空基本块 + changed |= SysYBlockMerge(function.second.get()); + // 删除无前驱块 + changed |= SysYDelNoPreBLock(function.second.get()); + // 删除空块 + changed |= SysYDelEmptyBlock(function.second.get(), pBuilder); + // 添加return指令 + changed |= SysYAddReturn(function.second.get(), pBuilder); + } + } } - void SysYDelInstAfterBr(); // 删除br后面的指令 - void SysYDelEmptyBlock(); // 空块删除 - void SysYDelNoPreBLock(); // 删除无前驱块(不可达块) - void SysYBlockMerge(); // 合并基本块(主要针对嵌套if while的exit块, + +public: + static bool SysYDelInstAfterBr(Function *func); // 删除br后面的指令 + static bool SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder); // 空块删除 + static bool SysYDelNoPreBLock(Function *func); // 删除无前驱块(不可达块) + static bool SysYBlockMerge(Function *func); // 合并基本块(主要针对嵌套if while的exit块, // 也可以修改IR生成实现回填机制 - void SysYAddReturn(); // 添加return指令(主要针对Void函数) + static bool SysYAddReturn(Function *func, IRBuilder* pBuilder); // 添加return指令(主要针对Void函数) }; } // namespace sysy From 77fae4d6625e7d54ab9d6712cd0680ebbd5be00c Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 17 Jul 2025 16:50:09 +0800 Subject: [PATCH 05/35] =?UTF-8?q?[CFG]=E5=A2=9E=E5=8A=A0=E5=88=86=E6=94=AF?= =?UTF-8?q?=E4=BC=98=E5=8C=96=EF=BC=8C=E4=B8=BASCCP=E8=B0=83=E7=94=A8?= =?UTF-8?q?=E5=81=9A=E9=93=BA=E5=9E=AB=EF=BC=8C=E9=A2=84=E5=A4=87=E4=BF=AE?= =?UTF-8?q?=E6=94=B9phi=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRCFGOpt.cpp | 86 ++++++++++++++++++++++++++++++++++++++ src/include/SCCP.h | 2 +- src/include/SysYIRCFGOpt.h | 3 ++ 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/src/SysYIRCFGOpt.cpp b/src/SysYIRCFGOpt.cpp index 4098008..4a84497 100644 --- a/src/SysYIRCFGOpt.cpp +++ b/src/SysYIRCFGOpt.cpp @@ -476,4 +476,90 @@ bool SysYCFGOpt::SysYAddReturn(Function *func, IRBuilder* pBuilder) { return changed; } +// 条件分支转换为无条件分支 +// 主要针对已知条件值的分支转换为无条件分支 +// 例如 if (cond) { ... } else { ... } 中的 cond 已经 +// 确定为 true 或 false 的情况 +bool SysYCFGOpt::SysYCondBr2Br(Function *func, IRBuilder* pBuilder) { + bool changed = false; + + for (auto &basicblock : func->getBasicBlocks()) { + if (basicblock->getNumInstructions() == 0) + continue; + + auto thelast = basicblock->getInstructions().end(); + --thelast; + + if (thelast->get()->isConditional()){ + ConstantValue *constOperand = dynamic_cast(thelast->get()->getOperand(0)); + std::string opname; + int constint = 0; + float constfloat = 0.0F; + bool constint_Use = false; + bool constfloat_Use = false; + if (constOperand != nullptr) { + if (constOperand->isFloat()) { + constfloat = constOperand->getFloat(); + constfloat_Use = true; + } else { + constint = constOperand->getInt(); + constint_Use = true; + } + } + // 如果可以计算 + if (constfloat_Use || constint_Use) { + changed = true; + + auto thenBlock = dynamic_cast(thelast->get()->getOperand(1)); + auto elseBlock = dynamic_cast(thelast->get()->getOperand(2)); + SysYIROptUtils::usedelete(thelast->get()); + thelast = basicblock->getInstructions().erase(thelast); + if ((constfloat_Use && constfloat == 1.0F) || (constint_Use && constint == 1)) { + + pBuilder->setPosition(basicblock.get(), basicblock->end()); + pBuilder->createUncondBrInst(thenBlock, {}); + int phiindex = 0; + for (auto pred : elseBlock->getPredecessors()) { + phiindex++; + if (pred == basicblock.get()) { + break; + } + } + + for (auto &phiinst : elseBlock->getInstructions()) { + if (phiinst->getKind() != Instruction::kPhi) { + break; + } + phiinst->removeOperand(phiindex); + } + basicblock->removeSuccessor(elseBlock); + elseBlock->removePredecessor(basicblock.get()); + } else { + + pBuilder->setPosition(basicblock.get(), basicblock->end()); + pBuilder->createUncondBrInst(elseBlock, {}); + int phiindex = 0; + for (auto pred : thenBlock->getPredecessors()) { + phiindex++; + if (pred == basicblock.get()) { + break; + } + } + + for (auto &phiinst : thenBlock->getInstructions()) { + if (phiinst->getKind() != Instruction::kPhi) { + break; + } + phiinst->removeOperand(phiindex); + } + basicblock->removeSuccessor(thenBlock); + thenBlock->removePredecessor(basicblock.get()); + } + } + } + } + + return changed; +} + } // namespace sysy diff --git a/src/include/SCCP.h b/src/include/SCCP.h index da6452e..7db0a7b 100644 --- a/src/include/SCCP.h +++ b/src/include/SCCP.h @@ -172,7 +172,7 @@ enum class LatticeValue { //TODO: 下列数据结构考虑集成到类中,避免重命名问题 static std::set Worklist; static std::unordered_set Executable_Blocks; -static std::unordered_set > Executable_Edges; +static std::queue > Executable_Edges; static std::map valueState; class SCCP { diff --git a/src/include/SysYIRCFGOpt.h b/src/include/SysYIRCFGOpt.h index bf0e524..6b13a3f 100644 --- a/src/include/SysYIRCFGOpt.h +++ b/src/include/SysYIRCFGOpt.h @@ -31,6 +31,8 @@ class SysYCFGOpt { for (auto &function : functions) { bool changed = false; while(changed){ + changed = false; + changed |= SysYCondBr2Br(function.second.get(), pBuilder); // 删除br后面的无用指令 changed |= SysYDelInstAfterBr(function.second.get()); // 合并空基本块 @@ -52,6 +54,7 @@ public: static bool SysYBlockMerge(Function *func); // 合并基本块(主要针对嵌套if while的exit块, // 也可以修改IR生成实现回填机制 static bool SysYAddReturn(Function *func, IRBuilder* pBuilder); // 添加return指令(主要针对Void函数) + static bool SysYCondBr2Br(Function *func, IRBuilder* pBuilder); // 条件分支(已知cond的值)转换为无条件分支 }; } // namespace sysy From 631ef80de2d45f0ac1a72b5b04854fce0658e7ed Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 17 Jul 2025 19:01:02 +0800 Subject: [PATCH 06/35] =?UTF-8?q?[IR]phi=E6=8C=87=E4=BB=A4=E9=87=8D?= =?UTF-8?q?=E6=9E=84=EF=BC=8C=E5=B0=86block=E4=BF=A1=E6=81=AF=E5=8A=A0?= =?UTF-8?q?=E5=85=A5=E5=B9=B6=E6=8F=90=E4=BE=9B=E6=96=B0=E6=96=B9=E6=B3=95?= =?UTF-8?q?=EF=BC=8C=E5=90=8E=E7=BB=AD=E9=9C=80=E6=9B=B4=E6=94=B9phi?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E6=8C=87=E4=BB=A4=E6=9E=84=E5=BB=BA=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/IR.cpp | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ src/include/IR.h | 48 +++++++++++++++++++++++------- 2 files changed, 115 insertions(+), 10 deletions(-) diff --git a/src/IR.cpp b/src/IR.cpp index 540f974..5d2765d 100644 --- a/src/IR.cpp +++ b/src/IR.cpp @@ -545,6 +545,83 @@ void User::replaceOperand(unsigned index, Value *value) { value->addUse(use); } +/** + * phi相关函数 + */ + + Value* PhiInst::getvalfromBlk(BasicBlock* blk){ + refreshB2VMap(); + if( blk2val.find(blk) != blk2val.end()) { + return blk2val.at(blk); + } + return nullptr; +} + +BasicBlock* PhiInst::getBlkfromVal(Value* val){ + // 返回第一个值对应的基本块 + for(unsigned i = 0; i < vsize; i++) { + if(getValue(i) == val) { + return getBlock(i); + } + } + return nullptr; +} + +void PhiInst::delValue(Value* val){ + //根据value删除对应的基本块和值 + unsigned i = 0; + BasicBlock* blk = getBlkfromVal(val); + for(i = 0; i < vsize; i++) { + if(getValue(i) == val) { + break; + } + } + removeOperand(2 * i + 1); // 删除blk + removeOperand(2 * i); // 删除val + vsize--; + blk2val.erase(blk); // 删除blk2val映射 +} + +void PhiInst::delBlk(BasicBlock* blk){ + //根据Blk删除对应的基本块和值 + unsigned i = 0; + Value* val = getvalfromBlk(blk); + for(i = 0; i < vsize; i++) { + if(getBlock(i) == blk) { + break; + } + } + removeOperand(2 * i + 1); // 删除blk + removeOperand(2 * i); // 删除val + vsize--; + blk2val.erase(blk); // 删除blk2val映射 +} + +void PhiInst::replaceBlk(BasicBlock* newBlk, unsigned k){ + refreshB2VMap(); + Value* val = blk2val.at(getBlock(k)); + // 替换基本块 + setOperand(2 * k + 1, newBlk); + // 替换blk2val映射 + blk2val.erase(getBlock(k)); + blk2val.emplace(newBlk, val); +} + +void PhiInst::replaceold2new(BasicBlock* oldBlk, BasicBlock* newBlk){ + refreshB2VMap(); + Value* val = blk2val.at(oldBlk); + // 替换基本块 + delBlk(oldBlk); + addIncoming(val, newBlk); +} + +void PhiInst::refreshB2VMap(){ + blk2val.clear(); + for(unsigned i = 0; i < vsize; i++) { + blk2val.emplace(getBlock(i), getValue(i)); + } +} + CallInst::CallInst(Function *callee, const std::vector &args, BasicBlock *parent, const std::string &name) : Instruction(kCall, callee->getReturnType(), parent, name) { addOperand(callee); diff --git a/src/include/IR.h b/src/include/IR.h index 1b4c702..1154d4e 100644 --- a/src/include/IR.h +++ b/src/include/IR.h @@ -755,22 +755,50 @@ class LaInst : public Instruction { class PhiInst : public Instruction { friend class IRBuilder; friend class Function; - friend class SysySSA; protected: - Value *map_val; // Phi的旧值 - PhiInst(Type *type, Value *lhs, const std::vector &rhs, Value *mval, BasicBlock *parent, + std::unordered_map blk2val; ///< 存储每个基本块对应的值 + unsigned vsize; ///< 存储值的数量 + + PhiInst(Type *type, + const std::vector &rhs = {}, + const std::vector &Blocks = {}, + BasicBlock *parent, const std::string &name = "") - : Instruction(Kind::kPhi, type, parent, name) { - map_val = mval; - addOperand(lhs); - addOperands(rhs); + : Instruction(Kind::kPhi, type, parent, name), vsize(rhs.size()) { + assert(rhs.size() == Blocks.size() && "PhiInst: rhs and Blocks must have the same size"); + for(size_t i = 0; i < rhs.size(); ++i) { + addOperand(rhs[i]); + blk2val[Blocks[i]] = rhs[i]; + } } public: - Value* getMapVal() { return map_val; } - Value* getPointer() const { return getOperand(0); } + Value* getValue(unsigned k) const {return getOperand(2 * k);} ///< 获取位置为k的值 + BasicBlock* getBlock(unsigned k) const {return dynamic_cast(getOperand(2 * k + 1));} + + auto& getincomings() const {return blk2val;} ///< 获取所有的基本块和对应的值 + + Value* getvalfromBlk(BasicBlock* blk); + BasicBlock* getBlkfromVal(Value* val); + + unsigned getNumIncomingValues() const { return vsize; } ///< 获取传入值的数量 + void addIncoming(Value *value, BasicBlock *block) { + assert(value && block && "PhiInst: value and block must not be null"); + addOperand(value); + addOperand(block); + blk2val[block] = value; + vsize++; + } ///< 添加传入值和对应的基本块 + + void delValue(Value* val); + void delBlk(BasicBlock* blk); + + void replaceBlk(BasicBlock* newBlk, unsigned k); + void replaceold2new(BasicBlock* oldBlk, BasicBlock* newBlk); + void refreshB2VMap(); + auto getValues() { return make_range(std::next(operand_begin()), operand_end()); } Value* getValue(unsigned index) const { return getOperand(index + 1); } }; @@ -884,7 +912,7 @@ public: } } ///< 根据指令类型进行二元计算,eval template模板实现 static BinaryInst* create(Kind kind, Type *type, Value *lhs, Value *rhs, BasicBlock *parent, const std::string &name = "") { - // 后端处理数组访存操作时需要创建计算地址的指令,需要在外部构造 BinaryInst 对象,所以写了个public的方法。 + // 后端处理数组访存操作时需要创建计算地址的指令,需要在外部构造 BinaryInst 对象 return new BinaryInst(kind, type, lhs, rhs, parent, name); } }; // class BinaryInst From 725da2858dca122822bb93163e5d1162dbda81f1 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 17 Jul 2025 21:34:19 +0800 Subject: [PATCH 07/35] =?UTF-8?q?[IR]=E6=8C=87=E4=BB=A4=E6=9E=84=E9=80=A0?= =?UTF-8?q?=E5=99=A8=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/include/IRBuilder.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/include/IRBuilder.h b/src/include/IRBuilder.h index aab9a1d..6df82e7 100644 --- a/src/include/IRBuilder.h +++ b/src/include/IRBuilder.h @@ -333,15 +333,11 @@ class IRBuilder { block->getInstructions().emplace(position, inst); return inst; } ///< 创建store指令 - PhiInst * createPhiInst(Type *type, Value *lhs, BasicBlock *parent, const std::string &name = "") { - auto predNum = parent->getNumPredecessors(); - std::vector rhs; - for (size_t i = 0; i < predNum; i++) { - rhs.push_back(lhs); - } - auto inst = new PhiInst(type, lhs, rhs, lhs, parent, name); + PhiInst * createPhiInst(Type *type, const std::vector &vals = {}, const std::vector &blks = {}, const std::string &name = "") { + auto predNum = block->getNumPredecessors(); + auto inst = new PhiInst(type, vals, blks, block, name); assert(inst); - parent->getInstructions().emplace(parent->begin(), inst); + block->getInstructions().emplace(block->begin(), inst); return inst; } ///< 创建Phi指令 }; From fdc946c1b528ac4659be665bb6bf63a51fdc13d1 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Fri, 18 Jul 2025 16:40:16 +0800 Subject: [PATCH 08/35] =?UTF-8?q?[IR]=E9=87=8D=E6=9E=84=E5=B8=B8=E9=87=8F?= =?UTF-8?q?=E5=AE=9A=E4=B9=89=EF=BC=8C=E5=BC=95=E5=85=A5undefvalue?= =?UTF-8?q?=E5=AE=9A=E4=B9=89=EF=BC=8C=E4=BF=AE=E6=94=B9=E5=B8=B8=E9=87=8F?= =?UTF-8?q?=E6=96=B9=E6=B3=95=E4=BD=BF=E7=94=A8=E5=B0=BD=E9=87=8F=E9=80=82?= =?UTF-8?q?=E9=85=8D=E6=97=A7=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRGenerator.cpp | 101 +++++++-------- src/include/IR.h | 266 ++++++++++++++++++++++++++++++++++------ 2 files changed, 277 insertions(+), 90 deletions(-) diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 7520891..afaf24b 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -145,8 +145,8 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { for (size_t i = 0; i < counterNumbers.size(); i++) { builder.createMemsetInst( - alloca, ConstantValue::get(static_cast(begin)), - ConstantValue::get(static_cast(counterNumbers[i])), + alloca, ConstantInteger::get(begin), + ConstantInteger::get(static_cast(counterNumbers[i])), counterValues[i]); begin += counterNumbers[i]; } @@ -218,7 +218,7 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){ paramNames.push_back(param->Ident()->getText()); std::vector dims = {}; if (!param->LBRACK().empty()) { - dims.push_back(ConstantValue::get(-1)); // 第一个维度不确定 + dims.push_back(ConstantInteger::get(-1)); // 第一个维度不确定 for (const auto &exp : param->exp()) { dims.push_back(std::any_cast(visitExp(exp))); } @@ -247,9 +247,9 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){ if(HasReturnInst == false) { // 如果没有return语句,则默认返回0 if (returnType != Type::getVoidType()) { - Value* returnValue = ConstantValue::get(0); + Value* returnValue = ConstantInteger::get(0); if (returnType == Type::getFloatType()) { - returnValue = ConstantValue::get(0.0f); + returnValue = ConstantFloating::get(0.0f); } builder.createReturnInst(returnValue); } else { @@ -286,9 +286,9 @@ std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) { ConstantValue * constValue = dynamic_cast(value); if (constValue != nullptr) { if (variableType == Type::getFloatType()) { - value = ConstantValue::get(static_cast(constValue->getInt())); + value = ConstantInteger::get(static_cast(constValue->getInt())); } else { - value = ConstantValue::get(static_cast(constValue->getFloat())); + value = ConstantFloating::get(static_cast(constValue->getFloat())); } } else { if (variableType == Type::getFloatType()) { @@ -478,9 +478,9 @@ std::any SysYIRGenerator::visitReturnStmt(SysYParser::ReturnStmtContext *ctx) { ConstantValue * constValue = dynamic_cast(returnValue); if (constValue != nullptr) { if (funcType == Type::getFloatType()) { - returnValue = ConstantValue::get(static_cast(constValue->getInt())); + returnValue = ConstantInteger::get(static_cast(constValue->getInt())); } else { - returnValue = ConstantValue::get(static_cast(constValue->getFloat())); + returnValue = ConstantFloating::get(static_cast(constValue->getFloat())); } } else { if (funcType == Type::getFloatType()) { @@ -560,10 +560,10 @@ std::any SysYIRGenerator::visitPrimaryExp(SysYParser::PrimaryExpContext *ctx) { std::any SysYIRGenerator::visitNumber(SysYParser::NumberContext *ctx) { if (ctx->ILITERAL() != nullptr) { int value = std::stol(ctx->ILITERAL()->getText(), nullptr, 0); - return static_cast(ConstantValue::get(value)); + return static_cast(ConstantInteger::get(value)); } else if (ctx->FLITERAL() != nullptr) { float value = std::stof(ctx->FLITERAL()->getText()); - return static_cast(ConstantValue::get(value)); + return static_cast(ConstantFloating::get(value)); } throw std::runtime_error("Unknown number type."); return std::any(); // 不会到达这里 @@ -599,9 +599,9 @@ std::any SysYIRGenerator::visitCall(SysYParser::CallContext *ctx) { ConstantValue * constValue = dynamic_cast(args[i]); if (constValue != nullptr) { if (params[i]->getType() == Type::getPointerType(Type::getFloatType())) { - args[i] = ConstantValue::get(static_cast(constValue->getInt())); + args[i] = ConstantInteger::get(static_cast(constValue->getInt())); } else { - args[i] = ConstantValue::get(static_cast(constValue->getFloat())); + args[i] = ConstantFloating::get(static_cast(constValue->getFloat())); } } else { if (params[i]->getType() == Type::getPointerType(Type::getFloatType())) { @@ -629,9 +629,9 @@ std::any SysYIRGenerator::visitUnaryExp(SysYParser::UnaryExpContext *ctx) { ConstantValue * constValue = dynamic_cast(value); if (constValue != nullptr) { if (constValue->isFloat()) { - result = ConstantValue::get(-constValue->getFloat()); + result = ConstantFloating::get(-constValue->getFloat()); } else { - result = ConstantValue::get(-constValue->getInt()); + result = ConstantInteger::get(-constValue->getInt()); } } else if (value != nullptr) { if (value->getType() == Type::getIntType()) { @@ -648,9 +648,9 @@ std::any SysYIRGenerator::visitUnaryExp(SysYParser::UnaryExpContext *ctx) { if (constValue != nullptr) { if (constValue->isFloat()) { result = - ConstantValue::get(1 - (constValue->getFloat() != 0.0F ? 1 : 0)); + ConstantFloating::get(1 - (constValue->getFloat() != 0.0F ? 1 : 0)); } else { - result = ConstantValue::get(1 - (constValue->getInt() != 0 ? 1 : 0)); + result = ConstantInteger::get(1 - (constValue->getInt() != 0 ? 1 : 0)); } } else if (value != nullptr) { if (value->getType() == Type::getIntType()) { @@ -692,13 +692,13 @@ std::any SysYIRGenerator::visitMulExp(SysYParser::MulExpContext *ctx) { if (operandType != floatType) { ConstantValue * constValue = dynamic_cast(operand); if (constValue != nullptr) - operand = ConstantValue::get(static_cast(constValue->getInt())); + operand = ConstantFloating::get(static_cast(constValue->getInt())); else operand = builder.createIToFInst(operand); } else if (resultType != floatType) { ConstantValue* constResult = dynamic_cast(result); if (constResult != nullptr) - result = ConstantValue::get(static_cast(constResult->getInt())); + result = ConstantFloating::get(static_cast(constResult->getInt())); else result = builder.createIToFInst(result); } @@ -707,14 +707,14 @@ std::any SysYIRGenerator::visitMulExp(SysYParser::MulExpContext *ctx) { ConstantValue* constOperand = dynamic_cast(operand); if (opType == SysYParser::MUL) { if ((constOperand != nullptr) && (constResult != nullptr)) { - result = ConstantValue::get(constResult->getFloat() * + result = ConstantFloating::get(constResult->getFloat() * constOperand->getFloat()); } else { result = builder.createFMulInst(result, operand); } } else if (opType == SysYParser::DIV) { if ((constOperand != nullptr) && (constResult != nullptr)) { - result = ConstantValue::get(constResult->getFloat() / + result = ConstantFloating::get(constResult->getFloat() / constOperand->getFloat()); } else { result = builder.createFDivInst(result, operand); @@ -729,17 +729,17 @@ std::any SysYIRGenerator::visitMulExp(SysYParser::MulExpContext *ctx) { ConstantValue * constOperand = dynamic_cast(operand); if (opType == SysYParser::MUL) { if ((constOperand != nullptr) && (constResult != nullptr)) - result = ConstantValue::get(constResult->getInt() * constOperand->getInt()); + result = ConstantInteger::get(constResult->getInt() * constOperand->getInt()); else result = builder.createMulInst(result, operand); } else if (opType == SysYParser::DIV) { if ((constOperand != nullptr) && (constResult != nullptr)) - result = ConstantValue::get(constResult->getInt() / constOperand->getInt()); + result = ConstantInteger::get(constResult->getInt() / constOperand->getInt()); else result = builder.createDivInst(result, operand); } else { if ((constOperand != nullptr) && (constResult != nullptr)) - result = ConstantValue::get(constResult->getInt() % constOperand->getInt()); + result = ConstantInteger::get(constResult->getInt() % constOperand->getInt()); else result = builder.createRemInst(result, operand); } @@ -767,13 +767,13 @@ std::any SysYIRGenerator::visitAddExp(SysYParser::AddExpContext *ctx) { if (operandType != floatType) { ConstantValue * constOperand = dynamic_cast(operand); if (constOperand != nullptr) - operand = ConstantValue::get(static_cast(constOperand->getInt())); + operand = ConstantFloating::get(static_cast(constOperand->getInt())); else operand = builder.createIToFInst(operand); } else if (resultType != floatType) { ConstantValue * constResult = dynamic_cast(result); if (constResult != nullptr) - result = ConstantValue::get(static_cast(constResult->getInt())); + result = ConstantFloating::get(static_cast(constResult->getInt())); else result = builder.createIToFInst(result); } @@ -782,12 +782,12 @@ std::any SysYIRGenerator::visitAddExp(SysYParser::AddExpContext *ctx) { ConstantValue * constOperand = dynamic_cast(operand); if (opType == SysYParser::ADD) { if ((constResult != nullptr) && (constOperand != nullptr)) - result = ConstantValue::get(constResult->getFloat() + constOperand->getFloat()); + result = ConstantFloating::get(constResult->getFloat() + constOperand->getFloat()); else result = builder.createFAddInst(result, operand); } else { if ((constResult != nullptr) && (constOperand != nullptr)) - result = ConstantValue::get(constResult->getFloat() - constOperand->getFloat()); + result = ConstantFloating::get(constResult->getFloat() - constOperand->getFloat()); else result = builder.createFSubInst(result, operand); } @@ -796,12 +796,12 @@ std::any SysYIRGenerator::visitAddExp(SysYParser::AddExpContext *ctx) { ConstantValue * constOperand = dynamic_cast(operand); if (opType == SysYParser::ADD) { if ((constResult != nullptr) && (constOperand != nullptr)) - result = ConstantValue::get(constResult->getInt() + constOperand->getInt()); + result = ConstantInteger::get(constResult->getInt() + constOperand->getInt()); else result = builder.createAddInst(result, operand); } else { if ((constResult != nullptr) && (constOperand != nullptr)) - result = ConstantValue::get(constResult->getInt() - constOperand->getInt()); + result = ConstantInteger::get(constResult->getInt() - constOperand->getInt()); else result = builder.createSubInst(result, operand); } @@ -833,10 +833,10 @@ std::any SysYIRGenerator::visitRelExp(SysYParser::RelExpContext *ctx) { auto operand2 = constOperand->isFloat() ? constOperand->getFloat() : constOperand->getInt(); - if (opType == SysYParser::LT) result = ConstantValue::get(operand1 < operand2 ? 1 : 0); - else if (opType == SysYParser::GT) result = ConstantValue::get(operand1 > operand2 ? 1 : 0); - else if (opType == SysYParser::LE) result = ConstantValue::get(operand1 <= operand2 ? 1 : 0); - else if (opType == SysYParser::GE) result = ConstantValue::get(operand1 >= operand2 ? 1 : 0); + if (opType == SysYParser::LT) result = ConstantInteger::get(operand1 < operand2 ? 1 : 0); + else if (opType == SysYParser::GT) result = ConstantInteger::get(operand1 > operand2 ? 1 : 0); + else if (opType == SysYParser::LE) result = ConstantInteger::get(operand1 <= operand2 ? 1 : 0); + else if (opType == SysYParser::GE) result = ConstantInteger::get(operand1 >= operand2 ? 1 : 0); else assert(false); } else { @@ -848,14 +848,14 @@ std::any SysYIRGenerator::visitRelExp(SysYParser::RelExpContext *ctx) { if (resultType == floatType || operandType == floatType) { if (resultType != floatType) { if (constResult != nullptr) - result = ConstantValue::get(static_cast(constResult->getInt())); + result = ConstantFloating::get(static_cast(constResult->getInt())); else result = builder.createIToFInst(result); } if (operandType != floatType) { if (constOperand != nullptr) - operand = ConstantValue::get(static_cast(constOperand->getInt())); + operand = ConstantFloating::get(static_cast(constOperand->getInt())); else operand = builder.createIToFInst(operand); @@ -901,8 +901,8 @@ std::any SysYIRGenerator::visitEqExp(SysYParser::EqExpContext *ctx) { auto operand2 = constOperand->isFloat() ? constOperand->getFloat() : constOperand->getInt(); - if (opType == SysYParser::EQ) result = ConstantValue::get(operand1 == operand2 ? 1 : 0); - else if (opType == SysYParser::NE) result = ConstantValue::get(operand1 != operand2 ? 1 : 0); + if (opType == SysYParser::EQ) result = ConstantInteger::get(operand1 == operand2 ? 1 : 0); + else if (opType == SysYParser::NE) result = ConstantInteger::get(operand1 != operand2 ? 1 : 0); else assert(false); } else { @@ -913,13 +913,13 @@ std::any SysYIRGenerator::visitEqExp(SysYParser::EqExpContext *ctx) { if (resultType == floatType || operandType == floatType) { if (resultType != floatType) { if (constResult != nullptr) - result = ConstantValue::get(static_cast(constResult->getInt())); + result = ConstantFloating::get(static_cast(constResult->getInt())); else result = builder.createIToFInst(result); } if (operandType != floatType) { if (constOperand != nullptr) - operand = ConstantValue::get(static_cast(constOperand->getInt())); + operand = ConstantFloating::get(static_cast(constOperand->getInt())); else operand = builder.createIToFInst(operand); } @@ -943,9 +943,9 @@ std::any SysYIRGenerator::visitEqExp(SysYParser::EqExpContext *ctx) { // 如果只有一个关系表达式,则将结果转换为0或1 if (constResult != nullptr) { if (constResult->isFloat()) - result = ConstantValue::get(constResult->getFloat() != 0.0F ? 1 : 0); + result = ConstantInteger::get(constResult->getFloat() != 0.0F ? 1 : 0); else - result = ConstantValue::get(constResult->getInt() != 0 ? 1 : 0); + result = ConstantInteger::get(constResult->getInt() != 0 ? 1 : 0); } } @@ -1013,6 +1013,7 @@ void Utils::tree2Array(Type *type, ArrayValueTree *root, ValueCounter &result, IRBuilder *builder) { Value* value = root->getValue(); auto &children = root->getChildren(); + // 类型转换 if (value != nullptr) { if (type == value->getType()) { result.push_back(value); @@ -1020,14 +1021,14 @@ void Utils::tree2Array(Type *type, ArrayValueTree *root, if (type == Type::getFloatType()) { ConstantValue* constValue = dynamic_cast(value); if (constValue != nullptr) - result.push_back(ConstantValue::get(static_cast(constValue->getInt()))); + result.push_back(ConstantFloating::get(static_cast(constValue->getInt()))); else result.push_back(builder->createIToFInst(value)); } else { ConstantValue* constValue = dynamic_cast(value); if (constValue != nullptr) - result.push_back(ConstantValue::get(static_cast(constValue->getFloat()))); + result.push_back(ConstantInteger::get(static_cast(constValue->getFloat()))); else result.push_back(builder->createFtoIInst(value)); @@ -1061,9 +1062,9 @@ void Utils::tree2Array(Type *type, ArrayValueTree *root, int num = blockSize - afterSize + beforeSize; if (num > 0) { if (type == Type::getFloatType()) - result.push_back(ConstantValue::get(0.0F), num); + result.push_back(ConstantFloating::get(0.0F), num); else - result.push_back(ConstantValue::get(0), num); + result.push_back(ConstantInteger::get(0), num); } } @@ -1101,7 +1102,7 @@ void Utils::initExternalFunction(Module *pModule, IRBuilder *pBuilder) { funcName, pModule, pBuilder); paramTypes.push_back(Type::getIntType()); paramNames.emplace_back("x"); - paramDims.push_back(std::vector{ConstantValue::get(-1)}); + paramDims.push_back(std::vector{ConstantInteger::get(-1)}); funcName = "getarray"; Utils::createExternalFunction(paramTypes, paramNames, paramDims, returnType, funcName, pModule, pBuilder); @@ -1117,7 +1118,7 @@ void Utils::initExternalFunction(Module *pModule, IRBuilder *pBuilder) { returnType = Type::getIntType(); paramTypes.push_back(Type::getFloatType()); paramNames.emplace_back("x"); - paramDims.push_back(std::vector{ConstantValue::get(-1)}); + paramDims.push_back(std::vector{ConstantInteger::get(-1)}); funcName = "getfarray"; Utils::createExternalFunction(paramTypes, paramNames, paramDims, returnType, funcName, pModule, pBuilder); @@ -1141,7 +1142,7 @@ void Utils::initExternalFunction(Module *pModule, IRBuilder *pBuilder) { paramTypes.push_back(Type::getIntType()); paramDims.clear(); paramDims.emplace_back(); - paramDims.push_back(std::vector{ConstantValue::get(-1)}); + paramDims.push_back(std::vector{ConstantInteger::get(-1)}); paramNames.clear(); paramNames.emplace_back("n"); paramNames.emplace_back("a"); @@ -1164,7 +1165,7 @@ void Utils::initExternalFunction(Module *pModule, IRBuilder *pBuilder) { paramTypes.push_back(Type::getFloatType()); paramDims.clear(); paramDims.emplace_back(); - paramDims.push_back(std::vector{ConstantValue::get(-1)}); + paramDims.push_back(std::vector{ConstantInteger::get(-1)}); paramNames.clear(); paramNames.emplace_back("n"); paramNames.emplace_back("a"); diff --git a/src/include/IR.h b/src/include/IR.h index 1154d4e..b23689a 100644 --- a/src/include/IR.h +++ b/src/include/IR.h @@ -268,6 +268,51 @@ class ValueCounter { } ///< 清空ValueCounter }; + +// --- Refactored ConstantValue and related classes start here --- + +using ConstantValVariant = std::variant; + +// Helper for hashing std::variant +struct VariantHash { + template + std::size_t operator()(const T& val) const { + return std::hash{}(val); + } + std::size_t operator()(const ConstantValVariant& v) const { + return std::visit(*this, v); + } +}; + +struct ConstantValueKey { + Type* type; + ConstantValVariant val; + + bool operator==(const ConstantValueKey& other) const { + // Assuming Type objects are canonicalized, or add Type::isSame() + // If Type::isSame() is not available and Type objects are not canonicalized, + // this comparison might not be robust enough for structural equivalence of types. + return type == other.type && val == other.val; + } +}; + +struct ConstantValueHash { + std::size_t operator()(const ConstantValueKey& key) const { + std::size_t typeHash = std::hash{}(key.type); + std::size_t valHash = VariantHash{}(key.val); + // A simple way to combine hashes + return typeHash ^ (valHash << 1); + } +}; + +struct ConstantValueEqual { + bool operator()(const ConstantValueKey& lhs, const ConstantValueKey& rhs) const { + // Assuming Type objects are canonicalized (e.g., Type::getIntType() always returns same pointer) + // If not, and Type::isSame() is intended, it should be added to Type class. + return lhs.type == rhs.type && lhs.val == rhs.val; + } +}; + /*! * Static constants known at compile time. * @@ -275,46 +320,178 @@ class ValueCounter { * `Value`s. It's type is either `int` or `float`. * `ConstantValue`并不由指令定义, 也不使用任何Value。它的类型为int/float。 */ - - class ConstantValue : public Value { - protected: - /// 定义字面量类型的聚合类型 - union { - int iScalar; - float fScalar; - }; +protected: + static std::unordered_map mConstantPool; - protected: - explicit ConstantValue(int value, const std::string &name = "") : Value(Type::getIntType(), name), iScalar(value) {} - explicit ConstantValue(float value, const std::string &name = "") - : Value(Type::getFloatType(), name), fScalar(value) {} +public: + explicit ConstantValue(Type* type, const std::string& name = "") : Value(type, name) {} + virtual ~ConstantValue() = default; - public: - static ConstantValue* get(int value); ///< 获取一个int类型的ConstValue *,其值为value - static ConstantValue* get(float value); ///< 获取一个float类型的ConstValue *,其值为value + virtual size_t hash() const = 0; + virtual ConstantValVariant getValue() const = 0; - public: + // Static factory method to get a canonical ConstantValue from the pool + static ConstantValue* get(Type* type, ConstantValVariant val); + + // Helper methods to access constant values with appropriate casting int getInt() const { - assert(isInt()); - return iScalar; - } ///< 返回int类型的值 + assert(getType()->isInt() && "Calling getInt() on non-integer type"); + return std::get(getValue()); + } float getFloat() const { - assert(isFloat()); - return fScalar; - } ///< 返回float类型的值 - template + assert(getType()->isFloat() && "Calling getFloat() on non-float type"); + return std::get(getValue()); + } + + template T getValue() const { - if (std::is_same::value && isInt()) { - return getInt(); - } - if (std::is_same::value && isFloat()) { - return getFloat(); - } - throw std::bad_cast(); // 或者其他适当的异常处理 - } ///< 返回值,getInt和getFloat统一化,整数返回整形,浮点返回浮点型 + if constexpr (std::is_same_v) { + return getInt(); + } else if constexpr (std::is_same_v) { + return getFloat(); + } else { + // This ensures a compilation error if an unsupported type is used + static_assert(std::always_false_v, "Unsupported type for ConstantValue::getValue()"); + } + } + + virtual bool isZero() const = 0; + virtual bool isOne() const = 0; }; +class ConstantInteger : public ConstantValue { + int constVal; +public: + explicit ConstantInteger(Type* type, int val, const std::string& name = "") + : ConstantValue(type, name), constVal(val) {} + + size_t hash() const override { + std::size_t typeHash = std::hash{}(getType()); + std::size_t valHash = std::hash{}(constVal); + return typeHash ^ (valHash << 1); + } + int getInt() const { return constVal; } + ConstantValVariant getValue() const override { return constVal; } + + static ConstantInteger* get(Type* type, int val); + static ConstantInteger* get(int val) { return get(Type::getIntType(), val); } + + ConstantInteger* getNeg() const { + assert(getType()->isInt() && "Cannot negate non-integer constant"); + return ConstantInteger::get(-constVal); + } + + bool isZero() const override { return constVal == 0; } + bool isOne() const override { return constVal == 1; } +}; + +class ConstantFloating : public ConstantValue { + float constFVal; +public: + explicit ConstantFloating(Type* type, float val, const std::string& name = "") + : ConstantValue(type, name), constFVal(val) {} + + size_t hash() const override { + std::size_t typeHash = std::hash{}(getType()); + std::size_t valHash = std::hash{}(constFVal); + return typeHash ^ (valHash << 1); + } + float getFloat() const { return constFVal; } + ConstantValVariant getValue() const override { return constFVal; } + + static ConstantFloating* get(Type* type, float val); + static ConstantFloating* get(float val) { return get(Type::getFloatType(), val); } + + ConstantFloating* getNeg() const { + assert(getType()->isFloat() && "Cannot negate non-float constant"); + return ConstantFloating::get(-constFVal); + } + + bool isZero() const override { return constFVal == 0.0f; } + bool isOne() const override { return constFVal == 1.0f; } +}; + +class UndefinedValue : public ConstantValue { +private: + static std::unordered_map UndefValues; + +protected: + explicit UndefinedValue(Type* type, const std::string& name = "") + : ConstantValue(type, name) { + assert(!type->isVoid() && "Cannot create UndefinedValue of void type!"); + } + +public: + static UndefinedValue* get(Type* type); + + size_t hash() const override { + return std::hash{}(getType()); + } + + ConstantValVariant getValue() const override { + if (getType()->isInt()) { + return 0; // Return 0 for undefined integer + } else if (getType()->isFloat()) { + return 0.0f; // Return 0.0f for undefined float + } + assert(false && "UndefinedValue has unexpected type for getValue()"); + return 0; // Should not be reached + } + + bool isZero() const override { return false; } + bool isOne() const override { return false; } +}; + +// Implementations for static members (typically in .cpp, but for single-file, put here) + +std::unordered_map ConstantValue::mConstantPool; +std::unordered_map UndefinedValue::UndefValues; + +ConstantValue* ConstantValue::get(Type* type, ConstantValVariant val) { + ConstantValueKey key = {type, val}; + auto it = mConstantPool.find(key); + if (it != mConstantPool.end()) { + return it->second; + } + + ConstantValue* newConstant = nullptr; + if (std::holds_alternative(val)) { + newConstant = new ConstantInteger(type, std::get(val)); + } else if (std::holds_alternative(val)) { + newConstant = new ConstantFloating(type, std::get(val)); + } else { + assert(false && "Unsupported ConstantValVariant type"); + } + + mConstantPool[key] = newConstant; + return newConstant; +} + +ConstantInteger* ConstantInteger::get(Type* type, int val) { + return dynamic_cast(ConstantValue::get(type, val)); +} + +ConstantFloating* ConstantFloating::get(Type* type, float val) { + return dynamic_cast(ConstantValue::get(type, val)); +} + +UndefinedValue* UndefinedValue::get(Type* type) { + assert(!type->isVoid() && "Cannot get UndefinedValue of void type!"); + + auto it = UndefValues.find(type); + if (it != UndefValues.end()) { + return it->second; + } + + UndefinedValue* newUndef = new UndefinedValue(type); + UndefValues[type] = newUndef; + return newUndef; +} + +// --- End of refactored ConstantValue and related classes --- + + class Instruction; class Function; class BasicBlock; @@ -562,8 +739,8 @@ class Instruction : public User { kLa = 0x1UL << 36, kMemset = 0x1UL << 37, kGetSubArray = 0x1UL << 38, - // constant - kConstant = 0x1UL << 37, + // Constant Kind removed as Constants are now Values, not Instructions. + // kConstant = 0x1UL << 37, // Conflicts with kMemset if kept as is // phi kPhi = 0x1UL << 39, kBitItoF = 0x1UL << 40, @@ -1258,12 +1435,15 @@ protected: if (init.size() == 0) { unsigned num = 1; for (unsigned i = 0; i < numDims; i++) { - num *= dynamic_cast(dims[i])->getInt(); + // Assume dims elements are ConstantInteger and cast appropriately + auto dim_val = dynamic_cast(dims[i]); + assert(dim_val && "GlobalValue dims must be constant integers"); + num *= dim_val->getInt(); } if (dynamic_cast(type)->getBaseType() == Type::getFloatType()) { - init.push_back(ConstantValue::get(0.0F), num); + init.push_back(ConstantFloating::get(0.0F), num); // Use new constant factory } else { - init.push_back(ConstantValue::get(0), num); + init.push_back(ConstantInteger::get(0), num); // Use new constant factory } } initValues = init; @@ -1289,8 +1469,11 @@ public: Value* getByIndices(const std::vector &indices) const { int index = 0; for (size_t i = 0; i < indices.size(); i++) { - index = dynamic_cast(getDim(i))->getInt() * index + - dynamic_cast(indices[i])->getInt(); + // Ensure dims[i] and indices[i] are ConstantInteger and retrieve their values correctly + auto dim_val = dynamic_cast(getDim(i)); + auto idx_val = dynamic_cast(indices[i]); + assert(dim_val && idx_val && "Dims and indices must be constant integers"); + index = dim_val->getInt() * index + idx_val->getInt(); } return getByIndex(index); } ///< 通过多维索引indices获取初始值 @@ -1331,8 +1514,11 @@ class ConstantVariable : public User, public LVal { int index = 0; // 计算偏移量 for (size_t i = 0; i < indices.size(); i++) { - index = dynamic_cast(getDim(i))->getInt() * index + - dynamic_cast(indices[i])->getInt(); + // Ensure dims[i] and indices[i] are ConstantInteger and retrieve their values correctly + auto dim_val = dynamic_cast(getDim(i)); + auto idx_val = dynamic_cast(indices[i]); + assert(dim_val && idx_val && "Dims and indices must be constant integers"); + index = dim_val->getInt() * index + idx_val->getInt(); } return getByIndex(index); From a5d97185e1877e79da583cc264d7496fbde28005 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Fri, 18 Jul 2025 18:17:22 +0800 Subject: [PATCH 09/35] =?UTF-8?q?[IR]=E4=BF=AE=E5=A4=8DIR=E6=8A=A5?= =?UTF-8?q?=E9=94=99=EF=BC=8C=E8=B0=83=E6=95=B4=E7=BB=93=E6=9E=84=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/IR.cpp | 62 ++++++++++++++++++++++++----------- src/include/IR.h | 85 ++++++++++++------------------------------------ 2 files changed, 64 insertions(+), 83 deletions(-) diff --git a/src/IR.cpp b/src/IR.cpp index 5d2765d..5f4e0c5 100644 --- a/src/IR.cpp +++ b/src/IR.cpp @@ -102,30 +102,54 @@ void Value::replaceAllUsesWith(Value *value) { uses.clear(); } -ConstantValue* ConstantValue::get(int value) { - static std::map> intConstants; - auto iter = intConstants.find(value); - if (iter != intConstants.end()) { - return iter->second.get(); + +// Implementations for static members + +std::unordered_map ConstantValue::mConstantPool; +std::unordered_map UndefinedValue::UndefValues; + +ConstantValue* ConstantValue::get(Type* type, ConstantValVariant val) { + ConstantValueKey key = {type, val}; + auto it = mConstantPool.find(key); + if (it != mConstantPool.end()) { + return it->second; } - auto inst = new ConstantValue(value); - assert(inst); - auto result = intConstants.emplace(value, inst); - return result.first->second.get(); + + ConstantValue* newConstant = nullptr; + if (std::holds_alternative(val)) { + newConstant = new ConstantInteger(type, std::get(val)); + } else if (std::holds_alternative(val)) { + newConstant = new ConstantFloating(type, std::get(val)); + } else { + assert(false && "Unsupported ConstantValVariant type"); + } + + mConstantPool[key] = newConstant; + return newConstant; } -ConstantValue* ConstantValue::get(float value) { - static std::map> floatConstants; - auto iter = floatConstants.find(value); - if (iter != floatConstants.end()) { - return iter->second.get(); - } - auto inst = new ConstantValue(value); - assert(inst); - auto result = floatConstants.emplace(value, inst); - return result.first->second.get(); +ConstantInteger* ConstantInteger::get(Type* type, int val) { + return dynamic_cast(ConstantValue::get(type, val)); } +ConstantFloating* ConstantFloating::get(Type* type, float val) { + return dynamic_cast(ConstantValue::get(type, val)); +} + +UndefinedValue* UndefinedValue::get(Type* type) { + assert(!type->isVoid() && "Cannot get UndefinedValue of void type!"); + + auto it = UndefValues.find(type); + if (it != UndefValues.end()) { + return it->second; + } + + UndefinedValue* newUndef = new UndefinedValue(type); + UndefValues[type] = newUndef; + return newUndef; +} + + auto Function::getCalleesWithNoExternalAndSelf() -> std::set { std::set result; for (auto callee : callees) { diff --git a/src/include/IR.h b/src/include/IR.h index b23689a..060bdc5 100644 --- a/src/include/IR.h +++ b/src/include/IR.h @@ -271,7 +271,7 @@ class ValueCounter { // --- Refactored ConstantValue and related classes start here --- -using ConstantValVariant = std::variant; +using ConstantValVariant = std::variant; // Helper for hashing std::variant struct VariantHash { @@ -320,6 +320,10 @@ struct ConstantValueEqual { * `Value`s. It's type is either `int` or `float`. * `ConstantValue`并不由指令定义, 也不使用任何Value。它的类型为int/float。 */ + +template struct always_false : std::false_type {}; +template constexpr bool always_false_v = always_false::value; + class ConstantValue : public Value { protected: static std::unordered_map mConstantPool; @@ -329,7 +333,7 @@ public: virtual ~ConstantValue() = default; virtual size_t hash() const = 0; - virtual ConstantValVariant getValue() const = 0; + virtual ConstantValVariant getVal() const = 0; // Static factory method to get a canonical ConstantValue from the pool static ConstantValue* get(Type* type, ConstantValVariant val); @@ -337,23 +341,23 @@ public: // Helper methods to access constant values with appropriate casting int getInt() const { assert(getType()->isInt() && "Calling getInt() on non-integer type"); - return std::get(getValue()); + return std::get(getVal()); } float getFloat() const { assert(getType()->isFloat() && "Calling getFloat() on non-float type"); - return std::get(getValue()); + return std::get(getVal()); } template - T getValue() const { - if constexpr (std::is_same_v) { - return getInt(); - } else if constexpr (std::is_same_v) { - return getFloat(); - } else { - // This ensures a compilation error if an unsupported type is used - static_assert(std::always_false_v, "Unsupported type for ConstantValue::getValue()"); - } + T getVal() const { + if constexpr (std::is_same_v) { + return getInt(); + } else if constexpr (std::is_same_v) { + return getFloat(); + } else { + // This ensures a compilation error if an unsupported type is used + static_assert(always_false_v, "Unsupported type for ConstantValue::getValue()"); + } } virtual bool isZero() const = 0; @@ -372,7 +376,7 @@ public: return typeHash ^ (valHash << 1); } int getInt() const { return constVal; } - ConstantValVariant getValue() const override { return constVal; } + ConstantValVariant getVal() const override { return constVal; } static ConstantInteger* get(Type* type, int val); static ConstantInteger* get(int val) { return get(Type::getIntType(), val); } @@ -398,7 +402,7 @@ public: return typeHash ^ (valHash << 1); } float getFloat() const { return constFVal; } - ConstantValVariant getValue() const override { return constFVal; } + ConstantValVariant getVal() const override { return constFVal; } static ConstantFloating* get(Type* type, float val); static ConstantFloating* get(float val) { return get(Type::getFloatType(), val); } @@ -429,7 +433,7 @@ public: return std::hash{}(getType()); } - ConstantValVariant getValue() const override { + ConstantValVariant getVal() const override { if (getType()->isInt()) { return 0; // Return 0 for undefined integer } else if (getType()->isFloat()) { @@ -443,52 +447,6 @@ public: bool isOne() const override { return false; } }; -// Implementations for static members (typically in .cpp, but for single-file, put here) - -std::unordered_map ConstantValue::mConstantPool; -std::unordered_map UndefinedValue::UndefValues; - -ConstantValue* ConstantValue::get(Type* type, ConstantValVariant val) { - ConstantValueKey key = {type, val}; - auto it = mConstantPool.find(key); - if (it != mConstantPool.end()) { - return it->second; - } - - ConstantValue* newConstant = nullptr; - if (std::holds_alternative(val)) { - newConstant = new ConstantInteger(type, std::get(val)); - } else if (std::holds_alternative(val)) { - newConstant = new ConstantFloating(type, std::get(val)); - } else { - assert(false && "Unsupported ConstantValVariant type"); - } - - mConstantPool[key] = newConstant; - return newConstant; -} - -ConstantInteger* ConstantInteger::get(Type* type, int val) { - return dynamic_cast(ConstantValue::get(type, val)); -} - -ConstantFloating* ConstantFloating::get(Type* type, float val) { - return dynamic_cast(ConstantValue::get(type, val)); -} - -UndefinedValue* UndefinedValue::get(Type* type) { - assert(!type->isVoid() && "Cannot get UndefinedValue of void type!"); - - auto it = UndefValues.find(type); - if (it != UndefValues.end()) { - return it->second; - } - - UndefinedValue* newUndef = new UndefinedValue(type); - UndefValues[type] = newUndef; - return newUndef; -} - // --- End of refactored ConstantValue and related classes --- @@ -941,7 +899,7 @@ class PhiInst : public Instruction { PhiInst(Type *type, const std::vector &rhs = {}, const std::vector &Blocks = {}, - BasicBlock *parent, + BasicBlock *parent = nullptr, const std::string &name = "") : Instruction(Kind::kPhi, type, parent, name), vsize(rhs.size()) { assert(rhs.size() == Blocks.size() && "PhiInst: rhs and Blocks must have the same size"); @@ -977,7 +935,6 @@ class PhiInst : public Instruction { void refreshB2VMap(); auto getValues() { return make_range(std::next(operand_begin()), operand_end()); } - Value* getValue(unsigned index) const { return getOperand(index + 1); } }; From 34b5a93aafb844af9946cd02e1677c27e0f0eb48 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Fri, 18 Jul 2025 18:17:45 +0800 Subject: [PATCH 10/35] =?UTF-8?q?[Mem2Reg]=E9=87=8D=E6=9E=84SSA=E6=8F=90?= =?UTF-8?q?=E5=8D=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Mem2Reg.cpp | 1219 ++++++++++++++++------------------------- src/include/Mem2Reg.h | 90 +-- 2 files changed, 535 insertions(+), 774 deletions(-) diff --git a/src/Mem2Reg.cpp b/src/Mem2Reg.cpp index fd7a239..2daef27 100644 --- a/src/Mem2Reg.cpp +++ b/src/Mem2Reg.cpp @@ -1,490 +1,154 @@ #include "Mem2Reg.h" +#include "SysYIRPrinter.h" #include #include -#include #include #include #include -#include #include -#include -#include "IR.h" -#include "SysYIRAnalyser.h" -#include "SysYIRPrinter.h" +#include +#include namespace sysy { -// 计算给定变量的定义块集合的迭代支配边界 -// TODO:优化Semi-Naive IDF -std::unordered_set Mem2Reg::computeIterDf(const std::unordered_set &blocks) { - std::unordered_set workList; - std::unordered_set ret_list; - workList.insert(blocks.begin(), blocks.end()); +// --- 私有成员函数实现 --- - while (!workList.empty()) { - auto n = workList.begin(); - BlockAnalysisInfo* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(*n); - auto DFs = blockInfo->getDomFrontiers(); - for (auto c : DFs) { - // 如果c不在ret_list中,则将其加入ret_list和workList - // 这里的c是n的支配边界 - // 也就是n的支配边界中的块 - // 需要注意的是,支配边界是一个集合,所以可能会有重复 - if (ret_list.count(c) == 0U) { - ret_list.emplace(c); - workList.emplace(c); +// 计算给定定义块集合的迭代支配边界 +std::unordered_set Mem2Reg::computeIteratedDomFrontiers(const std::unordered_set& blocks) { + std::unordered_set result; + std::queue worklist; // 使用队列进行 BFS-like 遍历 + + for (auto* block : blocks) + worklist.push(block); + + while (!worklist.empty()) { + auto* block = worklist.front(); + worklist.pop(); + + auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(block); + if (!blockInfo) continue; + + for (auto* df : blockInfo->getDomFrontiers()) { + if (result.find(df) == result.end()) { // If not already in result + result.insert(df); + worklist.push(df); } } - workList.erase(n); } - return ret_list; + return result; } -/** - * 计算value2Blocks的映射,包括value2AllocBlocks、value2DefBlocks以及value2UseBlocks - * 其中value2DefBlocks可用于计算迭代支配边界来插入相应变量的phi结点 - * 这里的value2AllocBlocks、value2DefBlocks和value2UseBlocks改变了函数级别的分析信息 - */ -auto Mem2Reg::computeValue2Blocks() -> void { - SysYPrinter printer(pModule); // 初始化打印机 - // std::cout << "===== Start computeValue2Blocks =====" << std::endl; - - auto &functions = pModule->getFunctions(); - for (const auto &function : functions) { - auto func = function.second.get(); - // std::cout << "\nProcessing function: " << func->getName() << std::endl; - - FunctionAnalysisInfo* funcInfo = controlFlowAnalysis->getFunctionAnalysisInfo(func); - if (!funcInfo) { - std::cerr << "ERROR: No analysis info for function " << func->getName() << std::endl; - continue; +// 分析一个 alloca 的所有 uses,填充 allocaDefsBlock 和 allocaUsesBlock +void Mem2Reg::allocaAnalysis(AllocaInst* alloca) { + allocaDefsBlock[alloca].clear(); + allocaUsesBlock[alloca].clear(); + + for (auto use : alloca->getUses()) { + Instruction* userInst = dynamic_cast(use->getUser()); + if (!userInst) continue; + + if (StoreInst* store = dynamic_cast(userInst)) { + if (store->getOperand(1) == alloca) { // Store's second operand is the pointer + allocaDefsBlock[alloca].insert(store->getParent()); // Store's parent is the defining block + } + } else if (LoadInst* load = dynamic_cast(userInst)) { + if (load->getOperand(0) == alloca) { // Load's first operand is the pointer + allocaUsesBlock[alloca].insert(load->getParent()); // Load's parent is the using block + } + } + } +} + +// 判断一个 alloca 是否可以被提升为寄存器 (无地址逃逸,标量类型) +bool Mem2Reg::is_promoted(AllocaInst* alloca) { + // 检查是否是标量类型 (非数组、非全局变量等) + if(!(SysYIROptUtils::isArr(alloca) || SysYIROptUtils::isGlobal(alloca))){ + return false; // 只有标量类型的 alloca 才能被提升 + } + + // 获取 alloca 指向的基类型 + PointerType* ptrType = dynamic_cast(alloca->getType()); + if (!ptrType) return false; // Should always be a pointer type + Type* allocabaseType = ptrType->getBaseType(); + + for (const auto& use : alloca->getUses()) { + Instruction* userInst = dynamic_cast(use->getUser()); + if (!userInst) { + // 如果不是指令的 use,比如作为全局变量的初始值等,通常认为逃逸 + return false; } - auto basicBlocks = func->getBasicBlocks(); - // std::cout << "BasicBlocks count: " << basicBlocks.size() << std::endl; - - for (auto &it : basicBlocks) { - auto basicBlock = it.get(); - // std::cout << "\nProcessing BB: " << basicBlock->getName() << std::endl; - // printer.printBlock(basicBlock); // 打印基本块内容 - - auto &instrs = basicBlock->getInstructions(); - for (auto &instr : instrs) { - // std::cout << " Analyzing instruction: "; - // printer.printInst(instr.get()); - // std::cout << std::endl; + if (LoadInst* load = dynamic_cast(userInst)) { + // Load 指令结果的类型必须与 alloca 的基类型一致 + if (load->getType() != allocabaseType) { + return false; + } + } else if (StoreInst* store = dynamic_cast(userInst)) { + // Store 指令的值操作数类型必须与 alloca 的基类型一致 + // 且 store 的指针操作数必须是当前 alloca + if (store->getOperand(1) != alloca || store->getOperand(0)->getType() != allocabaseType) { + return false; + } + } else if (userInst->isGetSubArray()) { + // GSA 指令表示对数组的访问 + // 这意味着地址逃逸,不能简单提升为单个寄存器 + return false; + } else if (userInst->isCall()) { + // 如果 alloca 作为函数参数传递,通常认为地址逃逸 + return false; + } + // 如果有其他类型的指令使用 alloca 的地址,也需要判断是否是逃逸 + // 例如:BitCastInst, PtrToIntInst, 如果这些操作将地址暴露,则不能提升 + } + return true; +} - if (instr->isAlloca()) { - if (!(SysYIROptUtils::isArr(instr.get()) || SysYIROptUtils::isGlobal(instr.get()))) { - // std::cout << " Found alloca: "; - // printer.printInst(instr.get()); - // std::cout << " -> Adding to allocBlocks" << std::endl; - - funcInfo->addValue2AllocBlocks(instr.get(), basicBlock); - } else { - // std::cout << " Skip array/global alloca: "; - // printer.printInst(instr.get()); - // std::cout << std::endl; - } - } - else if (instr->isStore()) { - auto val = instr->getOperand(1); - // std::cout << " Store target: "; - // printer.printInst(dynamic_cast(val)); +// 在迭代支配边界处插入 Phi 指令 +void Mem2Reg::insertPhiNodes(Function* func) { + // 清空上次 Phi 插入的结果 + phiMap.clear(); + allPhiInstructions.clear(); + + std::unordered_set phiPlacementBlocks; // 存放需要插入 Phi 的块 + std::queue workQueue; // BFS 队列,用于迭代支配边界计算 + + // 遍历所有可提升的 alloca + for (AllocaInst* alloca : currentFunctionAllocas) { + phiPlacementBlocks.clear(); // 为每个 alloca 重新计算 Phi 放置位置 + + // 初始化工作队列,放入所有定义该 alloca 的基本块 + for (BasicBlock* defBB : allocaDefsBlock[alloca]) { + workQueue.push(defBB); + } + + while (!workQueue.empty()) { + BasicBlock* currentBB = workQueue.front(); + workQueue.pop(); + + auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(currentBB); + if (!blockInfo) continue; + + // 遍历当前块的支配边界 + for (BasicBlock* domFrontierBB : blockInfo->getDomFrontiers()) { + // 如果这个支配边界块还没有为当前 alloca 插入 Phi 指令 + if (phiPlacementBlocks.find(domFrontierBB) == phiPlacementBlocks.end()) { + // 获取 alloca 的基类型,作为 Phi 指令的结果类型 + Type* phiType = dynamic_cast(alloca->getType())->getBaseType(); - if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { - // std::cout << " Adding store to defBlocks for value: "; - // printer.printInst(dynamic_cast(instr.get())); - // std::cout << std::endl; - // 将store的目标值添加到defBlocks中 - funcInfo->addValue2DefBlocks(val, basicBlock); - } else { - // std::cout << " Skip array/global store" << std::endl; - } - } - else if (instr->isLoad()) { - auto val = instr->getOperand(0); - // std::cout << " Load source: "; - // printer.printInst(dynamic_cast(val)); - // std::cout << std::endl; + // 在支配边界块的开头插入 Phi 指令 + pBuilder->setPosition(domFrontierBB->begin()); + PhiInst* newPhi = pBuilder->createPhiInst(phiType, {}, {}); // 初始入边为空 - if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { - // std::cout << " Adding load to useBlocks for value: "; - // printer.printInst(dynamic_cast(val)); - // std::cout << std::endl; - - funcInfo->addValue2UseBlocks(val, basicBlock); - } else { - // std::cout << " Skip array/global load" << std::endl; - } - } - } - } - - // 打印分析结果 - // std::cout << "\nAnalysis results for function " << func->getName() << ":" << std::endl; - - // auto &allocMap = funcInfo->getValue2AllocBlocks(); - // std::cout << "AllocBlocks (" << allocMap.size() << "):" << std::endl; - // for (auto &[val, bb] : allocMap) { - // std::cout << " "; - // printer.printInst(dynamic_cast(val)); - // std::cout << " in BB: " << bb->getName() << std::endl; - // } - - // auto &defMap = funcInfo->getValue2DefBlocks(); - // std::cout << "DefBlocks (" << defMap.size() << "):" << std::endl; - // for (auto &[val, bbs] : defMap) { - // std::cout << " "; - // printer.printInst(dynamic_cast(val)); - // for (const auto &[bb, count] : bbs) { - // std::cout << " in BB: " << bb->getName() << " (count: " << count << ")"; - // } - // } - - // auto &useMap = funcInfo->getValue2UseBlocks(); - // std::cout << "UseBlocks (" << useMap.size() << "):" << std::endl; - // for (auto &[val, bbs] : useMap) { - // std::cout << " "; - // printer.printInst(dynamic_cast(val)); - // for (const auto &[bb, count] : bbs) { - // std::cout << " in BB: " << bb->getName() << " (count: " << count << ")"; - // } - // } - } - // std::cout << "===== End computeValue2Blocks =====" << std::endl; -} + allPhiInstructions.push_back(newPhi); // 记录所有 Phi + phiPlacementBlocks.insert(domFrontierBB); // 标记已插入 + // 将 Phi 指令映射到它所代表的原始 alloca + phiMap[domFrontierBB][newPhi] = alloca; -/** - * @brief 级联关系的顺带消除,用于llvm mem2reg类预优化1 - * - * 采用队列进行模拟,从某种程度上来看其实可以看作是UD链的反向操作; - * - * @param [in] instr store指令使用的指令 - * @param [in] changed 不动点法的判断标准,地址传递 - * @param [in] func 指令所在函数 - * @param [in] block 指令所在基本块 - * @param [in] instrs 基本块所在指令集合,地址传递 - * @return 无返回值,但满足条件的情况下会对指令进行删除 - */ -auto Mem2Reg::cascade(Instruction *instr, bool &changed, Function *func, BasicBlock *block, - std::list> &instrs) -> void { - if (instr != nullptr) { - if (instr->isUnary() || instr->isBinary() || instr->isLoad()) { - std::queue toRemove; - toRemove.push(instr); - while (!toRemove.empty()) { - auto top = toRemove.front(); - toRemove.pop(); - auto operands = top->getOperands(); - for (const auto &operand : operands) { - auto elem = dynamic_cast(operand->getValue()); - if (elem != nullptr) { - if ((elem->isUnary() || elem->isBinary() || elem->isLoad()) && elem->getUses().size() == 1 && - elem->getUses().front()->getUser() == top) { - toRemove.push(elem); - } else if (elem->isAlloca()) { - // value2UseBlock中该block对应次数-1,如果该变量的该useblock中count减为0了,则意味着 - // 该block其他地方也没用到该alloc了,故从value2UseBlock中删除 - FunctionAnalysisInfo* funcInfo = controlFlowAnalysis->getFunctionAnalysisInfo(func); - auto res = funcInfo->removeValue2UseBlock(elem, block); - // 只要有一次返回了true,就说明有变化 - if (res) { - changed = true; - } - } - } - } - auto tofind = - std::find_if(instrs.begin(), instrs.end(), [&top](const auto &instr) { return instr.get() == top; }); - assert(tofind != instrs.end()); - SysYIROptUtils::usedelete(tofind->get()); - instrs.erase(tofind); - } - } - } -} - -/** - * llvm mem2reg预优化1: 删除不含load的alloc和store - * - * 1. 删除不含load的alloc和store; - * 2. 删除store指令,之前的用于作store指令第0个操作数的那些级联指令就冗余了,也要删除; - * 3. 删除之后,可能有些变量的load使用恰好又没有了,因此再次从第一步开始循环,这里使用不动点法 - * - * 由于删除了级联关系,所以这里的方法有点儿激进; - * 同时也考虑了级联关系时如果调用了函数,可能会有side effect,所以没有删除调用函数的级联关系; - * 而且关于函数参数的alloca不会在指令中删除,也不会在value2Alloca中删除; - * 同样地,我们不考虑数组和global,不过这里的代码是基于value2blocks的,在value2blocks中已经考虑了,所以不用显式指明 - *= - */ -auto Mem2Reg::preOptimize1() -> void { - SysYPrinter printer(pModule); // 初始化打印机 - - auto &functions = pModule->getFunctions(); - // std::cout << "===== Start preOptimize1 =====" << std::endl; - - for (const auto &function : functions) { - auto func = function.second.get(); - // std::cout << "\nProcessing function: " << func->getName() << std::endl; - - FunctionAnalysisInfo* funcInfo = controlFlowAnalysis->getFunctionAnalysisInfo(func); - if (!funcInfo) { - // std::cerr << "ERROR: No analysis info for function " << func->getName() << std::endl; - continue; - } - - auto &vToDefB = funcInfo->getValue2DefBlocks(); - auto &vToUseB = funcInfo->getValue2UseBlocks(); - auto &vToAllocB = funcInfo->getValue2AllocBlocks(); - - // 打印初始状态 - // std::cout << "Initial allocas: " << vToAllocB.size() << std::endl; - // for (auto &[val, bb] : vToAllocB) { - // std::cout << " Alloca: "; - // printer.printInst(dynamic_cast(val)); - // std::cout << " in BB: " << bb->getName() << std::endl; - // } - - // 阶段1:删除无store的alloca - // std::cout << "\nPhase 1: Remove unused allocas" << std::endl; - for (auto iter = vToAllocB.begin(); iter != vToAllocB.end();) { - auto val = iter->first; - auto bb = iter->second; - - // std::cout << "Checking alloca: "; - // printer.printInst(dynamic_cast(val)); - // std::cout << " in BB: " << bb->getName() << std::endl; - - // 如果该alloca没有对应的store指令,且不在函数参数中 - // 这里的vToDefB是value2DefBlocks,vToUseB是value2UseBlocks - - // 打印vToDefB - // std::cout << "DefBlocks (" << vToDefB.size() << "):" << std::endl; - // for (auto &[val, bbs] : vToDefB) { - // std::cout << " "; - // printer.printInst(dynamic_cast(val)); - // for (const auto &[bb, count] : bbs) { - // std::cout << " in BB: " << bb->getName() << " (count: " << count << ")" << std::endl; - // } - // } - // std::cout << vToDefB.count(val) << std::endl; - - if (vToDefB.count(val) == 0U && - std::find(func->getEntryBlock()->getArguments().begin(), - func->getEntryBlock()->getArguments().end(), - val) == func->getEntryBlock()->getArguments().end()) { - - // std::cout << " Removing unused alloca: "; - // printer.printInst(dynamic_cast(val)); - // std::cout << std::endl; - - auto tofind = std::find_if(bb->getInstructions().begin(), - bb->getInstructions().end(), - [val](const auto &instr) { - return instr.get() == val; - }); - if (tofind == bb->getInstructions().end()) { - // std::cerr << "ERROR: Alloca not found in BB!" << std::endl; - ++iter; - continue; - } - - SysYIROptUtils::usedelete(tofind->get()); - bb->getInstructions().erase(tofind); - iter = vToAllocB.erase(iter); - } else { - ++iter; - } - } - - // 阶段2:删除无load的store - // std::cout << "\nPhase 2: Remove dead stores" << std::endl; - bool changed = true; - int iteration = 0; - - while (changed) { - changed = false; - iteration++; - // std::cout << "\nIteration " << iteration << std::endl; - - for (auto iter = vToDefB.begin(); iter != vToDefB.end();) { - auto val = iter->first; - - // std::cout << "Checking value: "; - // printer.printInst(dynamic_cast(val)); - // std::cout << std::endl; - - if (vToUseB.count(val) == 0U) { - // std::cout << " Found dead store for value: "; - // printer.printInst(dynamic_cast(val)); - // std::cout << std::endl; - - auto blocks = funcInfo->getDefBlocksByValue(val); - for (auto block : blocks) { - // std::cout << " Processing BB: " << block->getName() << std::endl; - // printer.printBlock(block); // 打印基本块内容 - - auto &instrs = block->getInstructions(); - for (auto it = instrs.begin(); it != instrs.end();) { - if ((*it)->isStore() && (*it)->getOperand(1) == val) { - // std::cout << " Removing store: "; - // printer.printInst(it->get()); - std::cout << std::endl; - - auto valUsedByStore = dynamic_cast((*it)->getOperand(0)); - SysYIROptUtils::usedelete(it->get()); - - if (valUsedByStore != nullptr && - valUsedByStore->getUses().size() == 1 && - valUsedByStore->getUses().front()->getUser() == (*it).get()) { - // std::cout << " Cascade deleting: "; - // printer.printInst(valUsedByStore); - // std::cout << std::endl; - - cascade(valUsedByStore, changed, func, block, instrs); - } - it = instrs.erase(it); - changed = true; - } else { - ++it; - } - } - } - - // 删除对应的alloca - if (std::find(func->getEntryBlock()->getArguments().begin(), - func->getEntryBlock()->getArguments().end(), - val) == func->getEntryBlock()->getArguments().end()) { - auto bb = funcInfo->getAllocBlockByValue(val); - if (bb != nullptr) { - // std::cout << " Removing alloca: "; - // printer.printInst(dynamic_cast(val)); - // std::cout << " in BB: " << bb->getName() << std::endl; - - funcInfo->removeValue2AllocBlock(val); - auto tofind = std::find_if(bb->getInstructions().begin(), - bb->getInstructions().end(), - [val](const auto &instr) { - return instr.get() == val; - }); - if (tofind != bb->getInstructions().end()) { - SysYIROptUtils::usedelete(tofind->get()); - bb->getInstructions().erase(tofind); - } else { - std::cerr << "ERROR: Alloca not found in BB!" << std::endl; - } - } - } - iter = vToDefB.erase(iter); - } else { - ++iter; - } - } - } - } - // std::cout << "===== End preOptimize1 =====" << std::endl; -} - -/** - * llvm mem2reg预优化2: 针对某个变量的Defblocks只有一个块的情况 - * - * 1. 该基本块最后一次对该变量的store指令后的所有对该变量的load指令都可以替换为该基本块最后一次store指令的第0个操作数; - * 2. 以该基本块为必经结点的结点集合中的对该变量的load指令都可以替换为该基本块最后一次对该变量的store指令的第0个操作数; - * 3. - * 如果对该变量的所有load均替换掉了,删除该基本块中最后一次store指令,如果这个store指令是唯一的define,那么再删除alloca指令(不删除参数的alloca); - * 4. - * 如果对该value的所有load都替换掉了,对于该变量剩下还有store的话,就转换成了preOptimize1的情况,再调用preOptimize1进行删除; - * - * 同样不考虑数组和全局变量,因为这些变量不会被mem2reg优化,在value2blocks中已经考虑了,所以不用显式指明; - * 替换的操作采用了UD链进行简化和效率的提升 - * - */ -auto Mem2Reg::preOptimize2() -> void { - auto &functions = pModule->getFunctions(); - for (const auto &function : functions) { - auto func = function.second.get(); - FunctionAnalysisInfo* funcInfo = controlFlowAnalysis->getFunctionAnalysisInfo(func); - auto values = funcInfo->getValuesOfDefBlock(); - for (auto val : values) { - auto blocks = funcInfo->getDefBlocksByValue(val); - // 该val只有一个defining block - if (blocks.size() == 1) { - auto block = *blocks.begin(); - auto &instrs = block->getInstructions(); - auto rit = std::find_if(instrs.rbegin(), instrs.rend(), - [val](const auto &instr) { return instr->isStore() && instr->getOperand(1) == val; }); - // 注意reverse_iterator求base后是指向下一个指令,因此要减一才是原来的指令 - assert(rit != instrs.rend()); - auto it = --rit.base(); - auto propogationVal = (*it)->getOperand(0); - // 其实该块中it后对该val的load指令也可以替换掉了 - for (auto curit = std::next(it); curit != instrs.end();) { - if ((*curit)->isLoad() && (*curit)->getOperand(0) == val) { - curit->get()->replaceAllUsesWith(propogationVal); - SysYIROptUtils::usedelete(curit->get()); - curit = instrs.erase(curit); - funcInfo->removeValue2UseBlock(val, block); - } else { - ++curit; - } - } - // 在支配树后继结点中替换load指令的操作数 - BlockAnalysisInfo* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(block); - std::vector blkchildren; - // 获取该块的支配树后继结点 - std::queue q; - auto sdoms = blockInfo->getSdoms(); - for (auto sdom : sdoms) { - q.push(sdom); - blkchildren.push_back(sdom); - } - while (!q.empty()) { - auto blk = q.front(); - q.pop(); - BlockAnalysisInfo* blkInfo = controlFlowAnalysis->getBlockAnalysisInfo(blk); - for (auto sdom : blkInfo->getSdoms()) { - q.push(sdom); - blkchildren.push_back(sdom); - } - } - for (auto child : blkchildren) { - auto &childInstrs = child->getInstructions(); - for (auto childIter = childInstrs.begin(); childIter != childInstrs.end();) { - if ((*childIter)->isLoad() && (*childIter)->getOperand(0) == val) { - childIter->get()->replaceAllUsesWith(propogationVal); - SysYIROptUtils::usedelete(childIter->get()); - childIter = childInstrs.erase(childIter); - funcInfo->removeValue2UseBlock(val, child); - } else { - ++childIter; - } - } - } - // 如果对该val的所有load均替换掉了,那么对于该val的defining block中的最后一个define也可以删除了 - // 同时该块中前面对于该val的define也变成死代码了,可调用preOptimize1进行删除 - if (funcInfo->getUseBlocksByValue(val).empty()) { - SysYIROptUtils::usedelete(it->get()); - instrs.erase(it); - auto change = funcInfo->removeValue2DefBlock(val, block); - if (change) { - // 如果define是唯一的,且不是函数参数的alloca,直接删alloca - if (std::find(func->getEntryBlock()->getArguments().begin(), func->getEntryBlock()->getArguments().end(), - val) == func->getEntryBlock()->getArguments().end()) { - auto bb = funcInfo->getAllocBlockByValue(val); - assert(bb != nullptr); - auto tofind = std::find_if(bb->getInstructions().begin(), bb->getInstructions().end(), - [val](const auto &instr) { return instr.get() == val; }); - SysYIROptUtils::usedelete(tofind->get()); - bb->getInstructions().erase(tofind); - funcInfo->removeValue2AllocBlock(val); - } - } else { - // 如果该变量还有其他的define,那么前面的define也变成死代码了 - assert(!funcInfo->getDefBlocksByValue(val).empty()); - assert(funcInfo->getUseBlocksByValue(val).empty()); - preOptimize1(); + // 如果支配边界块本身没有定义该 alloca,则其支配边界也可能需要 Phi + // 只有当这个块不是当前alloca的定义块时,才将其加入workQueue,以计算其DF。 + if (allocaDefsBlock[alloca].find(domFrontierBB) == allocaDefsBlock[alloca].end()) { + workQueue.push(domFrontierBB); } } } @@ -492,285 +156,360 @@ auto Mem2Reg::preOptimize2() -> void { } } -/** - * @brief llvm mem2reg类预优化3:针对某个变量的所有读写都在同一个块中的情况 - * - * 1. 将每一个load替换成前一个store的值,并删除该load; - * 2. 如果在load前没有对该变量的store,则不删除该load; - * 3. 如果一个store后没有任何对改变量的load,则删除该store; - * - * @note 额外说明:第二点不用显式处理,因为我们的方法是从找到第一个store开始; - * 第三点其实可以更激进一步地理解,即每次替换了load之后,它对应地那个store也可以删除了,同时注意这里不要使用preoptimize1进行处理,因为他们的级联关系是有用的:即用来求load的替换值; - * 同样地,我们这里不考虑数组和全局变量,因为这些变量不会被mem2reg优化,不过这里在计算value2DefBlocks时已经跳过了,所以不需要再显式处理了; - * 替换的操作采用了UD链进行简化和效率的提升 - * - * @param [in] void - * @return 无返回值,但满足条件的情况下会对指令的操作数进行替换以及对指令进行删除 - */ -auto Mem2Reg::preOptimize3() -> void { - auto &functions = pModule->getFunctions(); - for (const auto &function : functions) { - auto func = function.second.get(); - FunctionAnalysisInfo* funcInfo = controlFlowAnalysis->getFunctionAnalysisInfo(func); - auto values = funcInfo->getValuesOfDefBlock(); - for (auto val : values) { - auto sblocks = funcInfo->getDefBlocksByValue(val); - auto lblocks = funcInfo->getUseBlocksByValue(val); - if (sblocks.size() == 1 && lblocks.size() == 1 && *sblocks.begin() == *lblocks.begin()) { - auto block = *sblocks.begin(); - auto &instrs = block->getInstructions(); - auto it = std::find_if(instrs.begin(), instrs.end(), - [val](const auto &instr) { return instr->isStore() && instr->getOperand(1) == val; }); - while (it != instrs.end()) { - auto propogationVal = (*it)->getOperand(0); - auto last = std::find_if(std::next(it), instrs.end(), [val](const auto &instr) { - return instr->isStore() && instr->getOperand(1) == val; - }); - for (auto curit = std::next(it); curit != last;) { - if ((*curit)->isLoad() && (*curit)->getOperand(0) == val) { - curit->get()->replaceAllUsesWith(propogationVal); - SysYIROptUtils::usedelete(curit->get()); - curit = instrs.erase(curit); - funcInfo->removeValue2UseBlock(val, block); - } else { - ++curit; - } - } - // 替换了load之后,它对应地那个store也可以删除了 - if (!(std::find_if(func->getEntryBlock()->getArguments().begin(), func->getEntryBlock()->getArguments().end(), - [val](const auto &instr) { return instr == val; }) != - func->getEntryBlock()->getArguments().end()) && - last == instrs.end()) { - SysYIROptUtils::usedelete(it->get()); - it = instrs.erase(it); - if (funcInfo->removeValue2DefBlock(val, block)) { - auto bb = funcInfo->getAllocBlockByValue(val); - if (bb != nullptr) { - auto tofind = std::find_if(bb->getInstructions().begin(), bb->getInstructions().end(), - [val](const auto &instr) { return instr.get() == val; }); - SysYIROptUtils::usedelete(tofind->get()); - bb->getInstructions().erase(tofind); - funcInfo->removeValue2AllocBlock(val); - } - } - } - it = last; - } - } - } - } -} - -/** - * 为所有变量的定义块集合的迭代支配边界插入phi结点 - * - * insertPhi是mem2reg的核心之一,这里是对所有变量的迭代支配边界的phi结点插入,无参数也无返回值; - * 同样跳过对数组和全局变量的处理,因为这些变量不会被mem2reg优化,刚好这里在计算value2DefBlocks时已经跳过了,所以不需要再显式处理了; - * 同时我们进行了剪枝处理,只有在基本块入口活跃的变量,才插入phi函数 - * - */ -auto Mem2Reg::insertPhi() -> void { - auto &functions = pModule->getFunctions(); - for (const auto &function : functions) { - auto func = function.second.get(); - FunctionAnalysisInfo* funcInfo = controlFlowAnalysis->getFunctionAnalysisInfo(func); - const auto &vToDefB = funcInfo->getValue2DefBlocks(); - for (const auto &map_pair : vToDefB) { - // 首先为每个变量找到迭代支配边界 - auto val = map_pair.first; - auto blocks = funcInfo->getDefBlocksByValue(val); - auto itDFs = computeIterDf(blocks); - // 然后在每个变量相应的迭代支配边界上插入phi结点 - for (auto basicBlock : itDFs) { - const auto &actiTable = activeVarAnalysis->getActiveTable(); - auto dval = dynamic_cast(val); - // 只有在基本块入口活跃的变量,才插入phi函数 - if (actiTable.at(basicBlock).front().count(dval) != 0U) { - pBuilder->createPhiInst(val->getType(), val, basicBlock); - } - } - } - } -} - -/** - * 重命名 - * - * 重命名是mem2reg的核心之二,这里是对单个块的重命名,递归实现 - * 同样跳过对数组和全局变量的处理,因为这些变量不会被mem2reg优化 - * - */ -auto Mem2Reg::rename(BasicBlock *block, std::unordered_map &count, - std::unordered_map> &stacks) -> void { - auto &instrs = block->getInstructions(); - std::unordered_map valPop; - // 第一大步:对块中的所有指令遍历处理 - for (auto iter = instrs.begin(); iter != instrs.end();) { - auto instr = iter->get(); - // 对于load指令,变量用最新的那个 - if (instr->isLoad()) { - auto val = instr->getOperand(0); - if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { - if (!stacks[val].empty()) { - instr->replaceOperand(0, stacks[val].top()); - } - } - } - // 然后对于define的情况,看alloca、store和phi指令 - if (instr->isDefine()) { - if (instr->isAlloca()) { - // alloca指令名字不改了,命名就按x,x_1,x_2...来就行 - auto val = instr; - if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { - ++valPop[val]; - stacks[val].push(val); - ++count[val]; - } - } else if (instr->isPhi()) { - // Phi指令也是一条特殊的define指令 - auto val = dynamic_cast(instr)->getMapVal(); - if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { - auto i = count[val]; - if (i == 0) { - // 对还未alloca就有phi的指令的处理,直接删除 - SysYIROptUtils::usedelete(iter->get()); - iter = instrs.erase(iter); - continue; - } - auto newname = dynamic_cast(val)->getName() + "_" + std::to_string(i); - auto newalloca = pBuilder->createAllocaInstWithoutInsert(val->getType(), {}, block, newname); - FunctionAnalysisInfo* ParentfuncInfo = controlFlowAnalysis->getFunctionAnalysisInfo(block->getParent()); - ParentfuncInfo->addIndirectAlloca(newalloca); - instr->replaceOperand(0, newalloca); - ++valPop[val]; - stacks[val].push(newalloca); - ++count[val]; - } - } else { - // store指令看operand的名字,我们的实现是规定变量在operand的第二位,用一个新的alloca x_i代替 - auto val = instr->getOperand(1); - if (!(SysYIROptUtils::isArr(val) || SysYIROptUtils::isGlobal(val))) { - auto i = count[val]; - auto newname = dynamic_cast(val)->getName() + "_" + std::to_string(i); - auto newalloca = pBuilder->createAllocaInstWithoutInsert(val->getType(), {}, block, newname); - FunctionAnalysisInfo* ParentfuncInfo = controlFlowAnalysis->getFunctionAnalysisInfo(block->getParent()); - ParentfuncInfo->addIndirectAlloca(newalloca); - // block->getParent()->addIndirectAlloca(newalloca); - instr->replaceOperand(1, newalloca); - ++valPop[val]; - stacks[val].push(newalloca); - ++count[val]; - } - } - } - ++iter; - } - // 第二大步:把所有CFG中的该块的successor的phi指令的相应operand确定 - for (auto succ : block->getSuccessors()) { - auto position = getPredIndex(block, succ); - for (auto &instr : succ->getInstructions()) { - if (instr->isPhi()) { - auto val = dynamic_cast(instr.get())->getMapVal(); - if (!stacks[val].empty()) { - instr->replaceOperand(position + 1, stacks[val].top()); - } - } else { - // phi指令是添加在块的最前面的,因此过了之后就不会有phi了,直接break - break; - } - } - } - // 第三大步:递归支配树的后继,支配树才能表示define-use关系 - BlockAnalysisInfo* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(block); - for (auto sdom : blockInfo->getSdoms()) { - rename(sdom, count, stacks); - } - // 第四大步:遍历块中的所有指令,如果涉及到define,就弹栈,这一步是必要的,可以从递归的整体性来思考原因 - // 注意这里count没清理,因为平级之间计数仍然是一直增加的,但是stack要清理,因为define-use关系来自直接 - // 支配结点而不是平级之间,不清理栈会被污染 - // 提前优化:知道变量对应的要弹栈的次数就可以了,没必要遍历所有instr. - for (auto val_pair : valPop) { - auto val = val_pair.first; - for (int i = 0; i < val_pair.second; ++i) { - stacks[val].pop(); - } - } -} - -/** - * 重命名所有块 - * - * 调用rename,自上而下实现所有rename - * - */ -auto Mem2Reg::renameAll() -> void { - auto &functions = pModule->getFunctions(); - for (const auto &function : functions) { - auto func = function.second.get(); - // 对于每个function都要SSA化,所以count和stacks定义在这并初始化 - std::unordered_map count; - std::unordered_map> stacks; - FunctionAnalysisInfo* funcInfo = controlFlowAnalysis->getFunctionAnalysisInfo(func); - for (const auto &map_pair : funcInfo->getValue2DefBlocks()) { - auto val = map_pair.first; - count[val] = 0; - } - rename(func->getEntryBlock(), count, stacks); - } -} - -/** - * mem2reg,对外的接口 - * - * 静态单一赋值 + mem2reg等pass的逻辑组合 - * - */ -auto Mem2Reg::mem2regPipeline() -> void { - // 首先进行mem2reg的前置分析 - controlFlowAnalysis->clear(); - controlFlowAnalysis->runControlFlowAnalysis(); - // 活跃变量分析 - activeVarAnalysis->clear(); - dataFlowAnalysisUtils.addBackwardAnalyzer(activeVarAnalysis); - dataFlowAnalysisUtils.backwardAnalyze(pModule); - - // 计算所有valueToBlocks的定义映射 - computeValue2Blocks(); - // SysYPrinter printer(pModule); - // 参考llvm的mem2reg遍,在插入phi结点之前,先做些优化 - preOptimize1(); - // printer.printIR(); - preOptimize2(); - // printer.printIR(); - // 优化三 可能会针对局部变量优化而删除整个块的alloca/store - preOptimize3(); - //再进行活跃变量分析 - // 报错? - - // printer.printIR(); - dataFlowAnalysisUtils.backwardAnalyze(pModule); - // 为所有变量插入phi结点 - insertPhi(); - // 重命名 - renameAll(); -} - -/** - * 计算块n是块s的第几个前驱 - * - * helperfunction,没有返回值,但是会将dom和other的交集赋值给dom - * - */ -auto Mem2Reg::getPredIndex(BasicBlock *n, BasicBlock *s) -> int { +// 获取前驱块在后继块前驱列表中的索引 +int Mem2Reg::getPredIndex(BasicBlock* pred, BasicBlock* succ) { int index = 0; - for (auto elem : s->getPredecessors()) { - if (elem == n) { - break; + for (auto* elem : succ->getPredecessors()) { + if (elem == pred) { + return index; } ++index; } - assert(index < static_cast(s->getPredecessors().size()) && "n is not a predecessor of s."); - return index; + // 断言通常在你的 IR 框架中应该确保前驱是存在的 + // assert(false && "Predecessor not found in successor's predecessor list"); + return -1; // 应该不会发生 } -} // namespace sysy +// 递归地重命名基本块中的变量并填充 Phi 指令 +void Mem2Reg::renameBlock(BasicBlock* block, + std::unordered_map& currentIncomings, + std::unordered_set& visitedBlocks) { + + // 记录在此块中发生的定义,以便在退出时将它们从栈中弹出 + std::unordered_map definitionsInBlockCount; + + // 如果已经访问过这个块,直接返回(防止无限循环或重复处理,在DFS中尤其重要) + if (visitedBlocks.count(block)) { + return; + } + visitedBlocks.insert(block); + + // --- 1. 处理当前基本块内的指令 --- + // 使用迭代器安全地遍历和删除指令 + for (auto it = block->getInstructions().begin(); it != block->getInstructions().end(); ) { + Instruction* currentInst = it->get(); + + if (AllocaInst* alloca = dynamic_cast(currentInst)) { + // 如果是可提升的 alloca,标记为删除 + if (std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), alloca) != currentFunctionAllocas.end()) { + SysYIROptUtils::usedelete(currentInst); // 标记为删除(或直接删除取决于你的 IR 管理) + it = block->getInstructions().erase(it); // 从列表中移除 + continue; // 继续下一个指令 + } + } else if (LoadInst* load = dynamic_cast(currentInst)) { + AllocaInst* originalAlloca = dynamic_cast(load->getOperand(0)); // load 的第一个操作数是指针 + if (originalAlloca && std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), originalAlloca) != currentFunctionAllocas.end()) { + // 如果是可提升 alloca 的 load 指令 + Value* incomingVal = nullptr; + if (currentIncomings.count(originalAlloca)) { + incomingVal = currentIncomings[originalAlloca]; + } else { + // 如果在当前路径上没有找到定义,则使用 UndefinedValue + incomingVal = UndefinedValue::get(originalAlloca->getType()->isPointer() ? + dynamic_cast(originalAlloca->getType())->getBaseType() : + originalAlloca->getType()); + } + + load->replaceAllUsesWith(incomingVal); // 用最新值替换所有 load 的用途 + SysYIROptUtils::usedelete(currentInst); + it = block->getInstructions().erase(it); + continue; + } + } else if (StoreInst* store = dynamic_cast(currentInst)) { + AllocaInst* originalAlloca = dynamic_cast(store->getOperand(1)); // store 的第二个操作数是指针 + if (originalAlloca && std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), originalAlloca) != currentFunctionAllocas.end()) { + // 如果是可提升 alloca 的 store 指令,更新当前值 + currentIncomings[originalAlloca] = store->getOperand(0); // store 的第一个操作数是值 + definitionsInBlockCount[originalAlloca]++; // 记录在该块中进行的定义数量 + SysYIROptUtils::usedelete(currentInst); + it = block->getInstructions().erase(it); + continue; + } + } else if (PhiInst* phi = dynamic_cast(currentInst)) { + // 如果是 Mem2Reg 插入的 Phi 指令 (通过 phiMap 判断) + if (phiMap[block].count(phi)) { + AllocaInst* originalAlloca = phiMap[block][phi]; + currentIncomings[originalAlloca] = phi; // Phi 指令本身成为该变量的新定义 + definitionsInBlockCount[originalAlloca]++; // 记录该 Phi 的定义 + } + } + ++it; // 移动到下一个指令 + } + + // --- 2. 填充后继基本块中 Phi 指令的入边 --- + for (BasicBlock* successorBB : block->getSuccessors()) { + int predIndex = getPredIndex(block, successorBB); + if (predIndex == -1) continue; + + // Phi 指令总是在基本块的开头 + for (auto& inst_ptr : successorBB->getInstructions()) { + if (PhiInst* phi = dynamic_cast(inst_ptr.get())) { + if (phiMap[successorBB].count(phi)) { // 确保这是我们关心的 Phi 指令 + AllocaInst* originalAlloca = phiMap[successorBB][phi]; + Value* incomingValue = nullptr; + + if (currentIncomings.count(originalAlloca)) { + incomingValue = currentIncomings[originalAlloca]; + } else { + // 如果在当前块没有找到对应的定义,使用 UndefinedValue + incomingValue = UndefinedValue::get(originalAlloca->getType()->isPointer() ? + dynamic_cast(originalAlloca->getType())->getBaseType() : + originalAlloca->getType()); + } + + if (incomingValue) { + phi->addIncoming(incomingValue, block); // 添加 (值, 前驱块) 对 + } + } + } else { + // 遇到非 Phi 指令,说明已经处理完所有 Phi,可以跳出 + break; + } + } + } + + // --- 3. 递归调用支配树的子节点 --- + auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(block); + if (blockInfo) { + for (BasicBlock* dominatedChildBB : blockInfo->getSdoms()) { // getSdoms 获取直接支配的子节点 + // 递归调用,传递当前 Incomings 的副本(或通过值传递以实现回溯) + // 注意:这里是传递 `currentIncomings` 的拷贝,以便递归返回后可以恢复。 + // 但如果 `currentIncomings` 是引用传递,则这里需要回溯逻辑。 + // 鉴于它是值传递,此处的 `definitionsInBlockCount` 仅用于统计,无需实际操作 `currentIncomings`。 + renameBlock(dominatedChildBB, currentIncomings, visitedBlocks); + } + } + + // --- 4. 回溯:从栈中弹出在此块中创建的所有定义 --- + for (auto const& [alloca, count] : definitionsInBlockCount) { + // 在我们的实现中,`currentIncomings` 是通过值传递的,每次递归都收到一个新的拷贝。 + // 因此,不需要显式地 "pop" 栈。`currentIncomings` 在函数返回时会自动销毁。 + // 这种方式模拟了 "SSA 栈" 的行为,每个函数调用帧有自己的局部定义环境。 + } +} + +// 简化冗余的 Phi 指令 (当所有输入都相同时) +void Mem2Reg::simplifyphi(PhiInst* phi) { + BasicBlock* phifromblock = phi->getParent(); + if (!phifromblock) return; // 指令可能已经被删除 + + Value* commonValue = nullptr; + bool allSame = true; + + // Phi 指令的操作数是 Value, BasicBlock 交替出现,所以是 getOperandSize() / 2 个入边 + if (phi->getNumOperands() == 0) { // 空 Phi,通常是无效的,直接删除 + phi->replaceAllUsesWith(UndefinedValue::get(phi->getType())); // 用 UndefinedValue 替换所有用途 + // phi->getParent()->delete_inst(phi); + // 删除 Phi 指令后直接返回 + // phi指令在开头一个比较快 + // TODO:后续可优化查找 + auto tofind = std::find_if(phifromblock->getInstructions().begin(), phifromblock->getInstructions().end(), + [phi](const auto &instr) { return instr.get() == phi; }); + SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令 + phifromblock->getInstructions().erase(tofind); + // 从基本块中删除 Phi 指令 + return; + } + + for (size_t i = 0; i < phi->getNumIncomingValues(); ++i) { + Value* incomingVal = phi->getOperand(2 * i); // 值位于偶数索引 + + if (incomingVal == phi) { // 如果 Phi 指令引用自身 (循环变量) + // 这种情况下,Phi 暂时不能简化,除非所有入边都是它自己,这通常通过其他优化处理 + // 为避免复杂性,我们在此处不处理自引用 Phi 的简化,除非它是唯一选择。 + // 更好的做法是,如果所有入边都指向自身,则该Phi是冗余的,可以替换为undef或其第一个实际值 + // 但这需要更复杂的分析来确定循环的初始值。目前简单返回。 + // TODO:留到后续循环优化处理 + return; + } + + if (commonValue == nullptr) { + commonValue = incomingVal; + } else if (commonValue != incomingVal) { + allSame = false; + break; // 发现不同的入边值 + } + } + + if (allSame && commonValue != nullptr) { + // 所有入边值都相同,用这个值替换 Phi 指令的所有用途 + phi->replaceAllUsesWith(commonValue); + // 从基本块中删除 Phi 指令 + auto tofind = std::find_if(phifromblock->getInstructions().begin(), phifromblock->getInstructions().end(), + [phi](const auto &instr) { return instr.get() == phi; }); + SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令 + phifromblock->getInstructions().erase(tofind); + } +} + +// 对单个函数执行内存到寄存器的提升 +bool Mem2Reg::promoteMemoryToRegisters(Function* func) { + bool changed = false; + + // 每次开始对一个函数进行 Mem2Reg 时,清空所有上下文信息 + currentFunctionAllocas.clear(); + allocaDefsBlock.clear(); + allocaUsesBlock.clear(); + phiMap.clear(); + allPhiInstructions.clear(); + + // 1. 收集所有可提升的 AllocaInst,并进行初步分析 + BasicBlock* entryBB = func->getEntryBlock(); + if (!entryBB) return false; + + // 逆序遍历入口块的指令,安全地识别 Alloca + for (auto it = entryBB->getInstructions().rbegin(); it != entryBB->getInstructions().rend(); ++it) { + if (AllocaInst* alloca = dynamic_cast(it->get())) { + if (is_promoted(alloca)) { + currentFunctionAllocas.push_back(alloca); + } + } + } + // 收集后反转,使其按原始顺序排列 (如果需要的话,但对后续分析影响不大) + std::reverse(currentFunctionAllocas.begin(), currentFunctionAllocas.end()); + + // 对收集到的所有 alloca 进行 DefsBlock 和 UsesBlock 分析 + for (AllocaInst* alloca : currentFunctionAllocas) { + allocaAnalysis(alloca); + } + + // 2. 预处理:删除无用的 AllocaInst (没有 Load 和 Store) + // 迭代 currentFunctionAllocas,安全删除 + for (unsigned int i = 0; i < currentFunctionAllocas.size(); ) { + AllocaInst* alloca = currentFunctionAllocas[i]; + + bool hasRelevantUse = false; + // 检查 alloca 的 uses 列表,看是否有 Load 或 Store + // 只要有 Load/Store,就认为是"相关用途",不删除 + for (auto use_ptr : alloca->getUses()) { + Instruction* user_inst = dynamic_cast(use_ptr->getUser()); + if (user_inst && (dynamic_cast(user_inst) || dynamic_cast(user_inst))) { + hasRelevantUse = true; + break; + } + } + + // 如果没有相关用途(没有 Load 和 Store),则 alloca 是死代码 + if (!hasRelevantUse && allocaDefsBlock[alloca].empty() && allocaUsesBlock[alloca].empty()) { + if (alloca->getParent()) { + // alloca->getParent()->delete_inst(alloca); // 从其所在块删除 alloca 指令 + auto tofind = std::find_if(alloca->getParent()->getInstructions().begin(), alloca->getParent()->getInstructions().end(), + [alloca](const auto &instr) { return instr.get() == alloca; }); + SysYIROptUtils::usedelete(alloca); + alloca->getParent()->getInstructions().erase(tofind); + } + currentFunctionAllocas.erase(currentFunctionAllocas.begin() + i); // 从列表中移除 + changed = true; // 发生了改变 + } else { + i++; // 否则,移动到下一个 alloca + } + } + + // 如果没有可提升的 alloca 了,直接返回 + if (currentFunctionAllocas.empty()) { + return changed; + } + + // 3. 插入 Phi 指令 + insertPhiNodes(func); + if (!allPhiInstructions.empty()) changed = true; + + // 4. 重命名变量,转换为 SSA 形式并填充 Phi 指令 + std::unordered_map initialIncomings; + std::unordered_set visitedBlocks; // 用于 DFS 遍历,防止循环 + + // 初始化 entry block 的 Incomings 状态 + for (AllocaInst* alloca : currentFunctionAllocas) { + initialIncomings[alloca] = UndefinedValue::get(dynamic_cast(alloca->getType())->getBaseType()); + } + + // 从入口块开始递归重命名 + renameBlock(entryBB, initialIncomings, visitedBlocks); + + // 5. 简化 Phi 指令 + // 由于 renameBlock 可能会删除 Phi,这里复制一份列表以安全迭代 + std::vector phisToSimplify = allPhiInstructions; + for (PhiInst* phi : phisToSimplify) { + // 检查 phi 是否还在 IR 中 (可能已被其他优化删除) + // 一个简单检查是看它是否有父块 + if (phi->getParent()) { + simplifyphi(phi); + // simplifyphi 内部会删除 Phi,所以这里不需要再处理 allPhiInstructions + // 最终的 allPhiInstructions 清理将在 promoteMemoryToRegisters 结束后进行 + } + } + + // 清理所有 Phi 的列表和映射 + // 遍历 allPhiInstructions,删除那些在 simplifyphi 后可能仍然存在的、但已经没有 uses 的 Phi + std::vector remainingPhis; + for(PhiInst* phi : allPhiInstructions) { + if(phi->getParent() && phi->getUses().empty()){ // 如果还在IR中但没有用处 + + // phi->getParent()->delete_inst(phi); + // 找到phi节点对应的迭代器 + auto tofind = std::find_if(phi->getParent()->getInstructions().begin(), phi->getParent()->getInstructions().end(), + [phi](const auto &instr) { return instr.get() == phi; }); + SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令 + phi->getParent()->getInstructions().erase(tofind); + + changed = true; + } else if (phi->getParent()) { // 仍在IR中且有uses + remainingPhis.push_back(phi); + } + } + allPhiInstructions = remainingPhis; // 更新为仅包含未被删除的 Phi + + // 重新清理 phiMap 中已经删除的 Phi 指令项 + for (auto& pairBBPhiMap : phiMap) { + std::vector phisToRemoveFromMap; + for (auto& pairPhiAlloca : pairBBPhiMap.second) { + if (!pairPhiAlloca.first->getParent()) { // 如果 Phi 已经被删除 + phisToRemoveFromMap.push_back(pairPhiAlloca.first); + } + } + for (PhiInst* phi : phisToRemoveFromMap) { + pairBBPhiMap.second.erase(phi); + } + } + + + return changed; +} + +// --- run函数实现 --- +void Mem2Reg::run() { + // 每次运行整个 Mem2Reg Pass 时,重新进行分析 + controlFlowAnalysis->clear(); + controlFlowAnalysis->runControlFlowAnalysis(); + activeVarAnalysis->clear(); + // 假设 dataFlowAnalysisUtils 可以管理和运行各个分析器 + dataFlowAnalysisUtils.addBackwardAnalyzer(activeVarAnalysis); + dataFlowAnalysisUtils.backwardAnalyze(pModule); // 运行活跃变量分析 + + bool globalChanged = false; + // 循环直到没有更多的 alloca 可以被提升 + // 每次 promoteMemoryToRegisters 会尝试在一个函数内完成所有 Mem2Reg 优化 + do { + globalChanged = false; + for (const auto& [_, func] : pModule->getFunctions()) { + // 对每个函数执行 Mem2Reg + if (promoteMemoryToRegisters(func.get())) { + globalChanged = true; + // 如果一个函数发生改变,可能影响其他函数或需要重新分析 + // 因此需要重新运行控制流和活跃变量分析,以备下一次循环 + controlFlowAnalysis->clear(); + controlFlowAnalysis->runControlFlowAnalysis(); + activeVarAnalysis->clear(); + dataFlowAnalysisUtils.backwardAnalyze(pModule); // 重新分析活跃变量 + } + } + } while (globalChanged); // 如果全局有任何函数发生改变,则继续迭代 + + // 最终清理和重新分析 + controlFlowAnalysis->clear(); + controlFlowAnalysis->runControlFlowAnalysis(); + activeVarAnalysis->clear(); + dataFlowAnalysisUtils.backwardAnalyze(pModule); +} + +} // namespace sysy \ No newline at end of file diff --git a/src/include/Mem2Reg.h b/src/include/Mem2Reg.h index 919886a..2c65583 100644 --- a/src/include/Mem2Reg.h +++ b/src/include/Mem2Reg.h @@ -1,57 +1,79 @@ -#pragma once +// 假设 Mem2Reg.h 看起来像这样 (你需要根据实际情况调整) +#ifndef SYSY_MEM2REG_H +#define SYSY_MEM2REG_H -#include -#include -#include +#include #include #include +#include +#include // For computeIteratedDomFrontiers + +// Include your IR and analysis headers #include "IR.h" #include "IRBuilder.h" #include "SysYIRAnalyser.h" #include "SysYIROptUtils.h" namespace sysy { -/** - * 实现静态单变量赋值核心类 mem2reg - */ + class Mem2Reg { private: - Module *pModule; - IRBuilder *pBuilder; - ControlFlowAnalysis *controlFlowAnalysis; // 控制流分析 - ActiveVarAnalysis *activeVarAnalysis; // 活跃变量分析 - DataFlowAnalysisUtils dataFlowAnalysisUtils; + Module* pModule; + IRBuilder* pBuilder; + ControlFlowAnalysis* controlFlowAnalysis; + ActiveVarAnalysis* activeVarAnalysis; + DataFlowAnalysisUtils dataFlowAnalysisUtils; // If this is part of Mem2Reg or an external helper public: - Mem2Reg(Module *pMoudle, IRBuilder *pBuilder, - ControlFlowAnalysis *pCFA = nullptr, ActiveVarAnalysis *pAVA = nullptr) : - pModule(pMoudle), pBuilder(pBuilder), controlFlowAnalysis(pCFA), activeVarAnalysis(pAVA), dataFlowAnalysisUtils() - {} // 初始化函数 + Mem2Reg(Module* module, IRBuilder* builder, ControlFlowAnalysis* cfa, ActiveVarAnalysis* ava) + : pModule(module), pBuilder(builder), controlFlowAnalysis(cfa), activeVarAnalysis(ava) {} + // Constructor initializes members + void run(); - void mem2regPipeline(); ///< mem2reg + // --- 新增的私有成员变量和方法,用于SSA转换上下文 --- + // 这是核心,用于存储 SSA 转换过程中的状态 + std::vector currentFunctionAllocas; // 当前函数中所有可提升的 alloca + // alloca -> set of BasicBlocks where it's defined (stored into) + std::unordered_map> allocaDefsBlock; + // alloca -> set of BasicBlocks where it's used (loaded from) + std::unordered_map> allocaUsesBlock; -private: + // BasicBlock -> Map of (PhiInst, Original AllocaInst) + // 用于在 rename 阶段通过 phi 指令找到它代表的原始 alloca + std::unordered_map> phiMap; + std::vector allPhiInstructions; // 收集所有创建的 Phi 指令以便后续简化和清理 - // phi节点的插入需要计算IDF - std::unordered_set computeIterDf(const std::unordered_set &blocks); ///< 计算定义块集合的迭代支配边界 + // --- 核心 SSA 转换辅助函数 --- + // 计算给定定义块集合的迭代支配边界 + std::unordered_set computeIteratedDomFrontiers(const std::unordered_set& blocks); - auto computeValue2Blocks() -> void; ///< 计算value2block的映射(不包括数组和global) + // 分析一个 alloca 的所有 uses,填充 allocaDefsBlock 和 allocaUsesBlock + void allocaAnalysis(AllocaInst* alloca); - auto preOptimize1() -> void; ///< llvm memtoreg预优化1: 删除不含load的alloc和store - auto preOptimize2() -> void; ///< llvm memtoreg预优化2: 针对某个变量的Defblocks只有一个块的情况 - auto preOptimize3() -> void; ///< llvm memtoreg预优化3: 针对某个变量的所有读写都在同一个块中的情况 + // 判断一个 alloca 是否可以被提升为寄存器 (无地址逃逸,标量类型) + bool is_promoted(AllocaInst* alloca); - auto insertPhi() -> void; ///< 为所有变量的迭代支配边界插入phi结点 + // 在迭代支配边界处插入 Phi 指令 + void insertPhiNodes(Function* func); - auto rename(BasicBlock *block, std::unordered_map &count, - std::unordered_map> &stacks) -> void; ///< 单个块的重命名 - auto renameAll() -> void; ///< 重命名所有块 + // 递归地重命名基本块中的变量并填充 Phi 指令 + // 这里的 `count` 和 `stacks` 是临时的,用于 DFS 过程中传递状态 + void renameBlock(BasicBlock* block, + std::unordered_map& currentIncomings, + std::unordered_set& visitedBlocks); // 修改为传递 map 和 set - // private helper function. -private: - auto getPredIndex(BasicBlock *n, BasicBlock *s) -> int; ///< 获取前驱索引 - auto cascade(Instruction *instr, bool &changed, Function *func, BasicBlock *block, - std::list> &instrs) -> void; ///< 消除级联关系 + // 简化冗余的 Phi 指令 (当所有输入都相同时) + void simplifyphi(PhiInst* phi); + + // 获取前驱块在后继块前驱列表中的索引,用于 Phi 指令入边 + int getPredIndex(BasicBlock* pred, BasicBlock* succ); + + // --- Mem2Reg 的主要工作流函数 --- + // 对单个函数执行内存到寄存器的提升 + bool promoteMemoryToRegisters(Function* func); }; -} // namespace sysy + +} // namespace sysy + +#endif // SYSY_MEM2REG_H \ No newline at end of file From 10b011a1de6950709856659f576277942118d28f Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Fri, 18 Jul 2025 21:28:36 +0800 Subject: [PATCH 11/35] =?UTF-8?q?[fix]=E4=BF=AE=E5=A4=8D=E9=83=A8=E5=88=86?= =?UTF-8?q?=E5=B8=B8=E9=87=8F=E6=9E=84=E5=BB=BA=EF=BC=8C[Pass]=E5=BB=BA?= =?UTF-8?q?=E7=AB=8BPass=E5=9F=BA=E7=B1=BB=E5=92=8C=E7=AE=A1=E7=90=86?= =?UTF-8?q?=E5=99=A8=EF=BC=8C=E9=A2=84=E9=87=8D=E6=9E=84=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E9=81=8D=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRCFGOpt.cpp | 4 +-- src/include/SysYIRPass.h | 59 +++++++++++++++++++++++++++++++++ src/include/SysYIRPassManager.h | 58 ++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 src/include/SysYIRPass.h create mode 100644 src/include/SysYIRPassManager.h diff --git a/src/SysYIRCFGOpt.cpp b/src/SysYIRCFGOpt.cpp index 4a84497..c386379 100644 --- a/src/SysYIRCFGOpt.cpp +++ b/src/SysYIRCFGOpt.cpp @@ -463,9 +463,9 @@ bool SysYCFGOpt::SysYAddReturn(Function *func, IRBuilder* pBuilder) { pBuilder->setPosition(block.get(), block->end()); // TODO: 如果int float函数缺少返回值是否需要报错 if (func->getReturnType()->isInt()) { - pBuilder->createReturnInst(ConstantValue::get(0)); + pBuilder->createReturnInst(ConstantInteger::get(0)); } else if (func->getReturnType()->isFloat()) { - pBuilder->createReturnInst(ConstantValue::get(0.0F)); + pBuilder->createReturnInst(ConstantFloating::get(0.0F)); } else { pBuilder->createReturnInst(); } diff --git a/src/include/SysYIRPass.h b/src/include/SysYIRPass.h new file mode 100644 index 0000000..cb6e6a9 --- /dev/null +++ b/src/include/SysYIRPass.h @@ -0,0 +1,59 @@ +#pragma once + +#include "IR.h" + +namespace sysy { + +// 前置声明 +class FunctionPass; +class ModulePass; +class AnalysisPass; +class PassManager; + +// 抽象基类 Pass +class Pass { +public: + enum PassKind { + PK_Function, + PK_Module, + PK_Analysis + }; + + Pass(PassKind kind, const std::string& name) : Kind(kind), Name(name) {} + virtual ~Pass() = default; + + PassKind getPassKind() const { return Kind; } + const std::string& getPassName() const { return Name; } + + // 每个Pass需要实现此方法来执行其逻辑 + // 具体的run方法将根据Pass类型在FunctionPass和ModulePass中定义 +protected: + PassKind Kind; + std::string Name; +}; + +// 针对函数的优化遍 +class FunctionPass : public Pass { +public: + FunctionPass(const std::string& name) : Pass(PK_Function, name) {} + // 真正的优化逻辑将在此方法中实现 + virtual bool runOnFunction(Function& F) = 0; +}; + +// 针对模块的优化遍 +class ModulePass : public Pass { +public: + ModulePass(const std::string& name) : Pass(PK_Module, name) {} + // 真正的优化逻辑将在此方法中实现 + virtual bool runOnModule(Module& M) = 0; +}; + +// 分析遍 +class AnalysisPass : public Pass { +public: + AnalysisPass(const std::string& name) : Pass(PK_Analysis, name) {} + // 分析遍通常需要一个模块或函数作为输入,并计算出分析结果 + // 具体分析结果的存储和访问方式需要设计 +}; + +} // namespace sysy \ No newline at end of file diff --git a/src/include/SysYIRPassManager.h b/src/include/SysYIRPassManager.h new file mode 100644 index 0000000..310b50f --- /dev/null +++ b/src/include/SysYIRPassManager.h @@ -0,0 +1,58 @@ +// PassManager.h +#pragma once + +#include +#include +#include // For std::type_index +#include +#include "SysYIRPass.h" +#include "IR.h" // 假设你的IR.h定义了Module, Function等 + +namespace sysy { + +class PassManager { +public: + PassManager() = default; + + // 添加一个FunctionPass + void addPass(std::unique_ptr pass) { + functionPasses.push_back(std::move(pass)); + } + + // 添加一个ModulePass + void addPass(std::unique_ptr pass) { + modulePasses.push_back(std::move(pass)); + } + + // 添加一个AnalysisPass + template + T* addAnalysisPass(Args&&... args) { + static_assert(std::is_base_of::value, "T must derive from AnalysisPass"); + auto analysis = std::make_unique(std::forward(args)...); + T* rawPtr = analysis.get(); + analysisPasses[std::type_index(typeid(T))] = std::move(analysis); + return rawPtr; + } + + // 获取分析结果(用于其他Pass访问) + template + T* getAnalysis() { + static_assert(std::is_base_of::value, "T must derive from AnalysisPass"); + auto it = analysisPasses.find(std::type_index(typeid(T))); + if (it != analysisPasses.end()) { + return static_cast(it->second.get()); + } + return nullptr; // 或者抛出异常 + } + + // 运行所有注册的遍 + void run(Module& M); + +private: + std::vector> functionPasses; + std::vector> modulePasses; + std::unordered_map> analysisPasses; + // 未来可以添加AnalysisPass的缓存机制 +}; + +} // namespace sysy \ No newline at end of file From 8f1e477e73bc2abdd89909ed70df914e9cbc579b Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Sat, 19 Jul 2025 14:23:57 +0800 Subject: [PATCH 12/35] =?UTF-8?q?=E6=9A=82=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRAnalyser.cpp | 3 - src/SysYIRPassManager.cpp | 36 ++++ src/include/SysYFormatter.h | 340 ------------------------------------ 3 files changed, 36 insertions(+), 343 deletions(-) create mode 100644 src/SysYIRPassManager.cpp delete mode 100644 src/include/SysYFormatter.h diff --git a/src/SysYIRAnalyser.cpp b/src/SysYIRAnalyser.cpp index 0761c0a..51e2b27 100644 --- a/src/SysYIRAnalyser.cpp +++ b/src/SysYIRAnalyser.cpp @@ -523,9 +523,6 @@ bool ActiveVarAnalysis::analyze(Module *pModule, BasicBlock *block) { } -auto ActiveVarAnalysis::getActiveTable() const -> const std::map>> & { - return activeTable; -} } // namespace sysy diff --git a/src/SysYIRPassManager.cpp b/src/SysYIRPassManager.cpp new file mode 100644 index 0000000..f66f74a --- /dev/null +++ b/src/SysYIRPassManager.cpp @@ -0,0 +1,36 @@ +// PassManager.cpp +#include "SysYIRPassManager.h" +#include + +namespace sysy { + +void PassManager::run(Module& M) { + // 首先运行Module级别的Pass + for (auto& pass : modulePasses) { + std::cout << "Running Module Pass: " << pass->getPassName() << std::endl; + pass->runOnModule(M); + } + + // 然后对每个函数运行Function级别的Pass + auto& functions = M.getFunctions(); + for (auto& pair : functions) { + Function& F = *(pair.second); // 获取Function的引用 + std::cout << " Processing Function: " << F.getName() << std::endl; + + // 在每个函数上运行FunctionPasses + bool changedInFunction; + do { + changedInFunction = false; + for (auto& pass : functionPasses) { + // 对于FunctionPasses,可以考虑一个迭代执行的循环,直到稳定 + std::cout << " Running Function Pass: " << pass->getPassName() << std::endl; + changedInFunction |= pass->runOnFunction(F); + } + } while (changedInFunction); // 循环直到函数稳定,这模拟了您SysYCFGOpt的while(changed)逻辑 + } + + // 分析Pass的运行可以在其他Pass需要时触发,或者在特定的PassManager阶段触发 + // 对于依赖于分析结果的Pass,可以在其run方法中通过PassManager::getAnalysis()来获取 +} + +} // namespace sysy \ No newline at end of file diff --git a/src/include/SysYFormatter.h b/src/include/SysYFormatter.h deleted file mode 100644 index d4c9fb7..0000000 --- a/src/include/SysYFormatter.h +++ /dev/null @@ -1,340 +0,0 @@ -#pragma once - -#include "SysYBaseVisitor.h" -#include "SysYParser.h" -#include - -namespace sysy { - -class SysYFormatter : public SysYBaseVisitor { -protected: - std::ostream &os; - int indent = 0; - -public: - SysYFormatter(std::ostream &os) : os(os), indent(0) {} - -protected: - struct Indentor { - static constexpr int TabSize = 2; - int &indent; - Indentor(int &indent) : indent(indent) { indent += TabSize; } - ~Indentor() { indent -= TabSize; } - }; - std::ostream &space() { return os << std::string(indent, ' '); } - template - std::ostream &interleave(const T &container, const std::string sep = ", ") { - auto b = container.begin(), e = container.end(); - (*b)->accept(this); - for (b = std::next(b); b != e; b = std::next(b)) { - os << sep; - (*b)->accept(this); - } - return os; - } - -public: - // virtual std::any visitModule(SysYParser::ModuleContext *ctx) override { - // return visitChildren(ctx); - // } - - virtual std::any visitBtype(SysYParser::BtypeContext *ctx) override { - os << ctx->getText(); - return 0; - } - - virtual std::any visitDecl(SysYParser::DeclContext *ctx) override { - space(); - if (ctx->CONST()) - os << ctx->CONST()->getText() << ' '; - ctx->btype()->accept(this); - os << ' '; - interleave(ctx->varDef(), ", ") << ';' << '\n'; - return 0; - } - - virtual std::any visitVarDef(SysYParser::VarDefContext *ctx) override { - ctx->lValue()->accept(this); - if (ctx->initValue()) { - os << ' ' << '=' << ' '; - ctx->initValue()->accept(this); - } - return 0; - } - - virtual std::any visitInitValue(SysYParser::InitValueContext *ctx) override { - if (not ctx->exp()) { - os << '{'; - auto values = ctx->initValue(); - if (values.size()) - interleave(values, ", "); - os << '}'; - } - return 0; - } - - virtual std::any visitFunc(SysYParser::FuncContext *ctx) override { - ctx->funcType()->accept(this); - os << ' ' << ctx->ID()->getText() << '('; - if (ctx->funcFParams()) - ctx->funcFParams()->accept(this); - os << ')' << ' '; - ctx->blockStmt()->accept(this); - os << '\n'; - return 0; - } - - virtual std::any visitFuncType(SysYParser::FuncTypeContext *ctx) override { - os << ctx->getText(); - return 0; - } - - virtual std::any - visitFuncFParams(SysYParser::FuncFParamsContext *ctx) override { - interleave(ctx->funcFParam(), ", "); - return 0; - } - - virtual std::any - visitFuncFParam(SysYParser::FuncFParamContext *ctx) override { - ctx->btype()->accept(this); - os << ' ' << ctx->ID()->getText(); - if (not ctx->LBRACKET().empty()) { - os << '['; - auto exp = ctx->exp(); - if (not exp.empty()) { - os << '['; - interleave(exp, "][") << ']'; - } - } - return 0; - } - - virtual std::any visitBlockStmt(SysYParser::BlockStmtContext *ctx) override { - os << '{' << '\n'; - { - Indentor indentor(indent); - auto items = ctx->blockItem(); - if (not items.empty()) - interleave(items, ""); - } - space() << ctx->RBRACE()->getText() << '\n'; - return 0; - } - - // virtual std::any visitBlockItem(SysYParser::BlockItemContext *ctx) - // override { - // return visitChildren(ctx); - // } - - // virtual std::any visitStmt(SysYParser::StmtContext *ctx) override { - // return visitChildren(ctx); - // } - - virtual std::any - visitAssignStmt(SysYParser::AssignStmtContext *ctx) override { - space(); - ctx->lValue()->accept(this); - os << " = "; - ctx->exp()->accept(this); - os << ';' << '\n'; - return 0; - } - - virtual std::any visitExpStmt(SysYParser::ExpStmtContext *ctx) override { - space(); - ctx->exp()->accept(this); - os << ';' << '\n'; - return 0; - } - - void wrapBlock(SysYParser::StmtContext *stmt) { - bool isBlock = stmt->blockStmt(); - if (isBlock) { - stmt->accept(this); - } else { - os << "{\n"; - { - Indentor indentor(indent); - stmt->accept(this); - } - space() << "}\n"; - } - }; - virtual std::any visitIfStmt(SysYParser::IfStmtContext *ctx) override { - space(); - os << ctx->IF()->getText() << " ("; - ctx->exp()->accept(this); - os << ") "; - auto stmt = ctx->stmt(); - auto ifStmt = stmt[0]; - wrapBlock(ifStmt); - if (stmt.size() == 2) { - auto elseStmt = stmt[1]; - wrapBlock(elseStmt); - } - return 0; - } - - virtual std::any visitWhileStmt(SysYParser::WhileStmtContext *ctx) override { - space(); - os << ctx->WHILE()->getText() << " ("; - ctx->exp()->accept(this); - os << ") "; - wrapBlock(ctx->stmt()); - return 0; - } - - virtual std::any visitBreakStmt(SysYParser::BreakStmtContext *ctx) override { - space() << ctx->BREAK()->getText() << ';' << '\n'; - return 0; - } - - virtual std::any - visitContinueStmt(SysYParser::ContinueStmtContext *ctx) override { - space() << ctx->CONTINUE()->getText() << ';' << '\n'; - return 0; - } - - virtual std::any - visitReturnStmt(SysYParser::ReturnStmtContext *ctx) override { - space() << ctx->RETURN()->getText(); - if (ctx->exp()) { - os << ' '; - ctx->exp()->accept(this); - } - os << ';' << '\n'; - return 0; - } - - // virtual std::any visitEmptyStmt(SysYParser::EmptyStmtContext *ctx) - // override { - // return visitChildren(ctx); - // } - - virtual std::any - visitRelationExp(SysYParser::RelationExpContext *ctx) override { - auto lhs = ctx->exp(0); - auto rhs = ctx->exp(1); - std::string op = - ctx->LT() ? "<" : (ctx->LE() ? "<=" : (ctx->GT() ? ">" : ">=")); - lhs->accept(this); - os << ' ' << op << ' '; - rhs->accept(this); - return 0; - } - - virtual std::any - visitMultiplicativeExp(SysYParser::MultiplicativeExpContext *ctx) override { - auto lhs = ctx->exp(0); - auto rhs = ctx->exp(1); - std::string op = ctx->MUL() ? "*" : (ctx->DIV() ? "/" : "%"); - lhs->accept(this); - os << ' ' << op << ' '; - rhs->accept(this); - return 0; - } - - // virtual std::any visitLValueExp(SysYParser::LValueExpContext *ctx) - // override { - // return visitChildren(ctx); - // } - - // virtual std::any visitNumberExp(SysYParser::NumberExpContext *ctx) - // override { - // return visitChildren(ctx); - // } - - virtual std::any visitAndExp(SysYParser::AndExpContext *ctx) override { - ctx->exp(0)->accept(this); - os << " && "; - ctx->exp(1)->accept(this); - return 0; - } - - virtual std::any visitUnaryExp(SysYParser::UnaryExpContext *ctx) override { - std::string op = ctx->ADD() ? "+" : (ctx->SUB() ? "-" : "!"); - os << op; - ctx->exp()->accept(this); - return 0; - } - - virtual std::any visitParenExp(SysYParser::ParenExpContext *ctx) override { - os << '('; - ctx->exp()->accept(this); - os << ')'; - return 0; - } - - virtual std::any visitStringExp(SysYParser::StringExpContext *ctx) override { - return visitChildren(ctx); - } - - virtual std::any visitOrExp(SysYParser::OrExpContext *ctx) override { - ctx->exp(0)->accept(this); - os << " || "; - ctx->exp(1)->accept(this); - return 0; - } - - // virtual std::any visitCallExp(SysYParser::CallExpContext *ctx) override { - // return visitChildren(ctx); - // } - - virtual std::any - visitAdditiveExp(SysYParser::AdditiveExpContext *ctx) override { - auto lhs = ctx->exp(0); - auto rhs = ctx->exp(1); - std::string op = ctx->ADD() ? "+" : "-"; - lhs->accept(this); - os << ' ' << op << ' '; - rhs->accept(this); - return 0; - } - - virtual std::any visitEqualExp(SysYParser::EqualExpContext *ctx) override { - auto lhs = ctx->exp(0); - auto rhs = ctx->exp(1); - std::string op = ctx->EQ() ? "==" : "!="; - lhs->accept(this); - os << ' ' << op << ' '; - rhs->accept(this); - return 0; - } - - virtual std::any visitCall(SysYParser::CallContext *ctx) override { - os << ctx->ID()->getText() << '('; - if (ctx->funcRParams()) - ctx->funcRParams()->accept(this); - os << ')'; - return 0; - } - - virtual std::any visitLValue(SysYParser::LValueContext *ctx) override { - os << ctx->ID()->getText(); - auto exp = ctx->exp(); - if (not exp.empty()) { - os << '['; - interleave(exp, "][") << ']'; - } - return 0; - } - - virtual std::any visitNumber(SysYParser::NumberContext *ctx) override { - os << ctx->getText(); - return 0; - } - - virtual std::any visitString(SysYParser::StringContext *ctx) override { - os << ctx->getText(); - return 0; - } - - virtual std::any - visitFuncRParams(SysYParser::FuncRParamsContext *ctx) override { - interleave(ctx->exp(), ", "); - return 0; - } -}; - -} // namespace sysy From 20cd16bf5213317f5ccfb6c3100ee203ba162c50 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Sun, 20 Jul 2025 12:54:19 +0800 Subject: [PATCH 13/35] =?UTF-8?q?=E6=9A=82=E5=AD=982?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRGenerator.cpp | 120 +++++++++++++++++++++++++++++++++------- 1 file changed, 100 insertions(+), 20 deletions(-) diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index afaf24b..40b5f59 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -10,10 +10,9 @@ #include #include #include -using namespace std; #include "SysYIRGenerator.h" - +using namespace std; namespace sysy { /* @@ -130,30 +129,111 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { delete root; if (dims.empty()) { builder.createStoreInst(values.getValue(0), alloca); - } else { - // 对于多维数组,使用memset初始化 - // 计算每个维度的大小 - // 这里的values.getNumbers()返回的是每个维度的大小 - // 这里的values.getValues()返回的是每个维度对应的值 - // 例如:对于一个二维数组,values.getNumbers()可能是[3, 4],表示3行4列 - // values.getValues()可能是[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - // 对于每个维度,使用memset将对应的值填充到数组中 - // 这里的alloca是一个指向数组的指针 - const std::vector & counterNumbers = values.getNumbers(); - const std::vector & counterValues = values.getValues(); - unsigned begin = 0; - for (size_t i = 0; i < counterNumbers.size(); i++) { - + } else{ + // **数组变量初始化** + const std::vector &counterValues = values.getValues(); + + // 计算数组的**总元素数量**和**总字节大小** + int numElements = 1; + // 存储每个维度的实际整数大小,用于索引计算 + std::vector dimSizes; + for (Value *dimVal : dims) { + if (ConstantInteger *constInt = dynamic_cast(dimVal)) { + int dimSize = constInt->getInt(); + numElements *= dimSize; + dimSizes.push_back(dimSize); + } + // TODO else 错误处理:数组维度必须是常量(对于静态分配) + } + unsigned int elementSizeInBytes = type->getSize(); // 获取单个元素的大小(字节) + unsigned int totalSizeInBytes = numElements * elementSizeInBytes; + + // **判断是否可以进行全零初始化优化** + bool allValuesAreZero = false; + if (counterValues.empty()) { // 例如 int arr[3] = {}; 或 int arr[3][4] = {}; + allValuesAreZero = true; + } + else { + allValuesAreZero = true; + for (Value *val : counterValues){ + if (ConstantInteger *constInt = dynamic_cast(val)){ + if (constInt->getInt() != 0){ + allValuesAreZero = false; + break; + } + } + else{ + // 如果值不是常量,我们通常不能确定它是否为零,所以不进行 memset 优化 + allValuesAreZero = false; + break; + } + } + } + + if (allValuesAreZero) { + // 如果所有初始化值都是零(或没有明确初始化但语法允许),使用 memset 优化 builder.createMemsetInst( - alloca, ConstantInteger::get(begin), - ConstantInteger::get(static_cast(counterNumbers[i])), - counterValues[i]); - begin += counterNumbers[i]; + alloca, // 目标数组的起始地址 + ConstantInteger::get(0), // 偏移量(通常为0),后续删除 + ConstantInteger::get(totalSizeInBytes), + ConstantInteger::get(0)); // 填充的总字节数 + } + else { + // **逐元素存储:遍历所有初始值,并为每个值生成一个 store 指令** + for (size_t k = 0; k < counterValues.size(); ++k) { + // 用于存储当前元素的索引列表 + std::vector currentIndices; + int tempLinearIndex = k; // 临时线性索引,用于计算多维索引 + + // **将线性索引转换为多维索引** + // 这个循环从最内层维度开始倒推,计算每个维度的索引 + // 假设是行主序(row-major order),这是 C/C++ 数组的标准存储方式 + for (int dimIdx = dimSizes.size() - 1; dimIdx >= 0; --dimIdx) + { + // 计算当前维度的索引,并插入到列表的最前面 + currentIndices.insert(currentIndices.begin(), + ConstantInteger::get(static_cast(tempLinearIndex % dimSizes[dimIdx]))); + // 更新线性索引,用于计算下一个更高维度的索引 + tempLinearIndex /= dimSizes[dimIdx]; + } + + // **生成 store 指令,传入值、基指针和计算出的索引列表** + // 你的 builder.createStoreInst 签名需要能够接受这些参数 + // 假设你的 builder.createStoreInst(Value *val, Value *ptr, const std::vector &indices, ...) + builder.createStoreInst(counterValues[k], alloca, currentIndices); + } } } } + else + { // **如果没有显式初始化值,默认对数组进行零初始化** + if (!dims.empty()) + { // 只有数组才需要默认的零初始化 + int numElements = 1; + for (Value *dimVal : dims) + { + if (ConstantInteger *constInt = dynamic_cast(dimVal)) + { + numElements *= constInt->getInt(); + } + } + unsigned int elementSizeInBytes = type->getSize(); + unsigned int totalSizeInBytes = numElements * elementSizeInBytes; + + // 使用 memset 将整个数组清零 + builder.createMemsetInst( + alloca, + ConstantInteger::get(0), + ConstantInteger::get(totalSizeInBytes), + ConstantInteger::get(0) + ); // 填充的总字节数 + } + // 标量变量如果没有初始化值,通常不生成额外的初始化指令,因为其内存已分配但未赋值。 + } + module->addVariable(name, alloca); } + return std::any(); } From de696b2b5311cf5fd2051cb4fd3a986c7d42e3cf Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Sun, 20 Jul 2025 15:33:58 +0800 Subject: [PATCH 14/35] =?UTF-8?q?[IR]=E9=87=8D=E6=9E=84=E6=95=B0=E7=BB=84?= =?UTF-8?q?=E5=9C=B0=E5=9D=80=E7=9B=B8=E5=85=B3=E6=8C=87=E4=BB=A4=20?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0GEP=E6=8C=87=E4=BB=A4=E4=BB=A5=E5=8F=8A?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E6=96=B9=E6=B3=95=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E6=95=B0=E7=BB=84Array=20Type=20=E5=88=A0=E9=99=A4=E6=97=A0?= =?UTF-8?q?=E7=94=A8=E6=8C=87=E4=BB=A4(GetSubArray,LA)=20=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=86=97=E4=BD=99=E7=B1=BB=E5=AE=9A=E4=B9=89(Lval)=20?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=B8=AD=E9=97=B4=E4=BB=A3=E7=A0=81=E7=94=9F?= =?UTF-8?q?=E6=88=90=E9=80=BB=E8=BE=91=20=E6=B5=8B=E8=AF=95=E9=80=9A?= =?UTF-8?q?=E8=BF=87=E6=89=80=E4=BB=A5test=E7=9B=AE=E5=BD=95=E4=B8=8B?= =?UTF-8?q?=E7=9A=84=E6=96=87=E4=BB=B6=20TODO:=E5=90=8E=E7=AB=AF=E5=B1=95?= =?UTF-8?q?=E5=BC=80=E6=95=B0=E7=BB=84=E8=AE=A1=E7=AE=97=E5=9C=B0=E5=9D=80?= =?UTF-8?q?=E4=BB=85=E9=9C=80=E8=A6=81=E9=92=88=E5=AF=B9GEP=E6=8C=87?= =?UTF-8?q?=E4=BB=A4=E5=B1=95=E5=BC=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/CMakeLists.txt | 2 +- src/IR.cpp | 53 ++---- src/SysYIRGenerator.cpp | 277 +++++++++++++++++++++++--------- src/SysYIRPrinter.cpp | 294 +++++++++++++++++----------------- src/include/IR.h | 222 +++++++------------------ src/include/IRBuilder.h | 58 +++---- src/include/SysYIRGenerator.h | 6 + src/sysyc.cpp | 10 +- 8 files changed, 455 insertions(+), 467 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 47d74ab..a052e5b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,7 +22,7 @@ add_executable(sysyc SysYIRGenerator.cpp SysYIRPrinter.cpp SysYIRCFGOpt.cpp - SysYIRAnalyser.cpp + # SysYIRAnalyser.cpp # DeadCodeElimination.cpp AddressCalculationExpansion.cpp # Mem2Reg.cpp diff --git a/src/IR.cpp b/src/IR.cpp index 5f4e0c5..c694839 100644 --- a/src/IR.cpp +++ b/src/IR.cpp @@ -49,6 +49,11 @@ auto Type::getFunctionType(Type *returnType, const std::vector ¶mTyp return FunctionType::get(returnType, paramTypes); } +auto Type::getArrayType(Type *elementType, unsigned numElements) -> Type * { + // forward to ArrayType + return ArrayType::get(elementType, numElements); +} + auto Type::getSize() const -> unsigned { switch (kind) { case kInt: @@ -58,6 +63,10 @@ auto Type::getSize() const -> unsigned { case kPointer: case kFunction: return 8; + case Kind::kArray: { + const ArrayType* arrType = static_cast(this); + return arrType->getElementType()->getSize() * arrType->getNumElements(); + } case kVoid: return 0; } @@ -95,6 +104,11 @@ FunctionType*FunctionType::get(Type *returnType, const std::vector ¶ return result.first->get(); } +ArrayType *ArrayType::get(Type *elementType, unsigned numElements) { + // TODO:可以考虑在这里添加缓存,避免重复创建相同的数组类型 + return new ArrayType(elementType, numElements); +} + void Value::replaceAllUsesWith(Value *value) { for (auto &use : uses) { use->getUser()->setOperand(use->getIndex(), value); @@ -465,44 +479,7 @@ Function * Function::clone(const std::string &suffix) const { break; } - case Instruction::kLa: { - auto oldLaInst = dynamic_cast(inst); - auto oldPointer = oldLaInst->getPointer(); - Value *newPointer; - std::vector newIndices; - newPointer = oldNewValueMap.at(oldPointer); - - for (const auto &index : oldLaInst->getIndices()) { - newIndices.emplace_back(oldNewValueMap.at(index->getValue())); - } - ss << oldLaInst->getName() << suffix; - auto newLaInst = new LaInst(newPointer, newIndices, oldNewBlockMap.at(oldLaInst->getParent()), ss.str()); - ss.str(""); - oldNewValueMap.emplace(oldLaInst, newLaInst); - break; - } - - case Instruction::kGetSubArray: { - auto oldGetSubArrayInst = dynamic_cast(inst); - auto oldFather = oldGetSubArrayInst->getFatherArray(); - auto oldChild = oldGetSubArrayInst->getChildArray(); - Value *newFather; - Value *newChild; - std::vector newIndices; - newFather = oldNewValueMap.at(oldFather); - newChild = oldNewValueMap.at(oldChild); - - for (const auto &index : oldGetSubArrayInst->getIndices()) { - newIndices.emplace_back(oldNewValueMap.at(index->getValue())); - } - ss << oldGetSubArrayInst->getName() << suffix; - auto newGetSubArrayInst = - new GetSubArrayInst(dynamic_cast(newFather), dynamic_cast(newChild), newIndices, - oldNewBlockMap.at(oldGetSubArrayInst->getParent()), ss.str()); - ss.str(""); - oldNewValueMap.emplace(oldGetSubArrayInst, newGetSubArrayInst); - break; - } + // TODO:复制GEP指令 case Instruction::kMemset: { auto oldMemsetInst = dynamic_cast(inst); diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 40b5f59..6302aa5 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -15,6 +15,73 @@ using namespace std; namespace sysy { + +Type* SysYIRGenerator::buildArrayType(Type* baseType, const std::vector& dims){ + Type* currentType = baseType; + // 从最内层维度开始构建 ArrayType + // 例如对于 int arr[2][3],先处理 [3],再处理 [2] + // 注意:SysY 的 dims 是从最外层到最内层,所以我们需要反向迭代 + // 或者调整逻辑,使得从内到外构建 ArrayType + // 假设 dims 列表是 [dim1, dim2, dim3...] (例如 [2, 3] for int[2][3]) + // 我们需要从最内层维度开始向外构建 ArrayType + for (int i = dims.size() - 1; i >= 0; --i) { + // 维度大小必须是常量,否则无法构建 ArrayType + ConstantInteger* constDim = dynamic_cast(dims[i]); + if (constDim == nullptr) { + // 如果维度不是常量,可能需要特殊处理,例如将其视为指针 + // 对于函数参数 int arr[] 这种,第一个维度可以为未知 + // 在这里,我们假设所有声明的数组维度都是常量 + assert(false && "Array dimension must be a constant integer!"); + return nullptr; + } + unsigned dimSize = constDim->getInt(); + currentType = Type::getArrayType(currentType, dimSize); + } + return currentType; +} + +Value* SysYIRGenerator::getGEPAddressInst(Value* basePointer, const std::vector& indices) { + // 检查 basePointer 是否为指针类型 + if (!basePointer->getType()->isPointer()) { + assert(false && "GEP base pointer must be a pointer type!"); + } + + // 获取基指针所指向的实际类型 (例如 int* 指向 int, int[2][3]* 指向 int[2][3]) + Type* currentElementType = basePointer->getType()->as()->getBaseType(); + + std::vector actualGEPIndices; + // GEP 指令的第一个索引通常是0,用于“跳过”基指针指向的聚合类型本身,直接指向其第一个元素。 + // 例如,对于 AllocaInst 返回的 `int[2][3]*`,第一个 `0` 索引表示从数组的开始而不是指针本身开始索引。 + actualGEPIndices.push_back(ConstantInteger::get(0)); + + // 将用户提供的索引添加到 GEP 操作数中 + for (Value* index : indices) { + actualGEPIndices.push_back(index); + } + + // 根据索引链计算最终的元素类型 + Type* finalTargetType = currentElementType; + + // 遍历用户提供的索引(不包括我们添加的第一个0),逐步确定 GEP 的最终结果类型 + // 每个索引都“深入”一个维度 + for (size_t i = 0; i < indices.size(); ++i) { // 这里遍历的是用户提供的索引 + if (finalTargetType && finalTargetType->isArray()) { + finalTargetType = finalTargetType->as()->getElementType(); + } else { + // 如果索引链还在继续,但当前类型已经不是数组或聚合类型,这通常是一个错误 + // 或者表示访问的是标量,后续索引无效。此时,finalTargetType 已经是最终的标量类型,不能再深入。 + // 例如,对 int arr[5]; 访问 arr[i][j] (j 是多余的),这里会停止类型推断。 + break; + } + } + + // GEP 的结果总是指针类型,指向最终计算出的元素 + Type* gepResultType = Type::getPointerType(finalTargetType); + + // 创建 GEP 指令。假设 builder.createGetElementPtrInst 的签名为 + // (Type* resultType, Value* basePointer, const std::vector& indices) + return builder.createGetElementPtrInst(basePointer, actualGEPIndices); +} /* * @brief: visit compUnit * @details: @@ -118,24 +185,28 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { } } + Type* variableType = type; + if (!dims.empty()) { // 如果有维度,说明是数组 + variableType = buildArrayType(type, dims); // 构建完整的 ArrayType + } + + // 对于数组,alloca 的类型将是指针指向数组类型,例如 `int[2][3]*` + // 对于标量,alloca 的类型将是指针指向标量类型,例如 `int*` AllocaInst* alloca = builder.createAllocaInst(Type::getPointerType(type), dims, name); if (varDef->initVal() != nullptr) { ValueCounter values; - // 这里的varDef->initVal()可能是ScalarInitValue或ArrayInitValue ArrayValueTree* root = std::any_cast(varDef->initVal()->accept(this)); Utils::tree2Array(type, root, dims, dims.size(), values, &builder); delete root; - if (dims.empty()) { + + if (dims.empty()) { // 标量变量初始化 builder.createStoreInst(values.getValue(0), alloca); - } else{ - // **数组变量初始化** + } else { // 数组变量初始化 const std::vector &counterValues = values.getValues(); - // 计算数组的**总元素数量**和**总字节大小** int numElements = 1; - // 存储每个维度的实际整数大小,用于索引计算 std::vector dimSizes; for (Value *dimVal : dims) { if (ConstantInteger *constInt = dynamic_cast(dimVal)) { @@ -145,12 +216,11 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { } // TODO else 错误处理:数组维度必须是常量(对于静态分配) } - unsigned int elementSizeInBytes = type->getSize(); // 获取单个元素的大小(字节) + unsigned int elementSizeInBytes = type->getSize(); unsigned int totalSizeInBytes = numElements * elementSizeInBytes; - // **判断是否可以进行全零初始化优化** bool allValuesAreZero = false; - if (counterValues.empty()) { // 例如 int arr[3] = {}; 或 int arr[3][4] = {}; + if (counterValues.empty()) { allValuesAreZero = true; } else { @@ -163,7 +233,6 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { } } else{ - // 如果值不是常量,我们通常不能确定它是否为零,所以不进行 memset 优化 allValuesAreZero = false; break; } @@ -171,64 +240,51 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { } if (allValuesAreZero) { - // 如果所有初始化值都是零(或没有明确初始化但语法允许),使用 memset 优化 builder.createMemsetInst( - alloca, // 目标数组的起始地址 - ConstantInteger::get(0), // 偏移量(通常为0),后续删除 + alloca, + ConstantInteger::get(0), ConstantInteger::get(totalSizeInBytes), - ConstantInteger::get(0)); // 填充的总字节数 + ConstantInteger::get(0)); } else { - // **逐元素存储:遍历所有初始值,并为每个值生成一个 store 指令** for (size_t k = 0; k < counterValues.size(); ++k) { - // 用于存储当前元素的索引列表 std::vector currentIndices; - int tempLinearIndex = k; // 临时线性索引,用于计算多维索引 + int tempLinearIndex = k; - // **将线性索引转换为多维索引** - // 这个循环从最内层维度开始倒推,计算每个维度的索引 - // 假设是行主序(row-major order),这是 C/C++ 数组的标准存储方式 + // 将线性索引转换为多维索引 for (int dimIdx = dimSizes.size() - 1; dimIdx >= 0; --dimIdx) { - // 计算当前维度的索引,并插入到列表的最前面 currentIndices.insert(currentIndices.begin(), ConstantInteger::get(static_cast(tempLinearIndex % dimSizes[dimIdx]))); - // 更新线性索引,用于计算下一个更高维度的索引 tempLinearIndex /= dimSizes[dimIdx]; } - // **生成 store 指令,传入值、基指针和计算出的索引列表** - // 你的 builder.createStoreInst 签名需要能够接受这些参数 - // 假设你的 builder.createStoreInst(Value *val, Value *ptr, const std::vector &indices, ...) - builder.createStoreInst(counterValues[k], alloca, currentIndices); + // 计算元素的地址 + Value* elementAddress = getGEPAddressInst(alloca, currentIndices); + // 生成 store 指令 (假设 createStoreInst 接受 Value* value, Value* pointer) + builder.createStoreInst(counterValues[k], elementAddress); } } } } - else - { // **如果没有显式初始化值,默认对数组进行零初始化** - if (!dims.empty()) - { // 只有数组才需要默认的零初始化 + else { // 如果没有显式初始化值,默认对数组进行零初始化 + if (!dims.empty()) { // 只有数组才需要默认的零初始化 int numElements = 1; - for (Value *dimVal : dims) - { - if (ConstantInteger *constInt = dynamic_cast(dimVal)) - { + for (Value *dimVal : dims) { + if (ConstantInteger *constInt = dynamic_cast(dimVal)) { numElements *= constInt->getInt(); } } unsigned int elementSizeInBytes = type->getSize(); unsigned int totalSizeInBytes = numElements * elementSizeInBytes; - // 使用 memset 将整个数组清零 builder.createMemsetInst( alloca, ConstantInteger::get(0), ConstantInteger::get(totalSizeInBytes), ConstantInteger::get(0) - ); // 填充的总字节数 + ); } - // 标量变量如果没有初始化值,通常不生成额外的初始化指令,因为其内存已分配但未赋值。 } module->addVariable(name, alloca); @@ -356,29 +412,56 @@ std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) { for (const auto &exp : lVal->exp()) { dims.push_back(std::any_cast(visitExp(exp))); } + + auto variable = module->getVariable(name); // 获取 AllocaInst 或 GlobalValue + Value* value = std::any_cast(visitExp(ctx->exp())); // 右值 - auto variable = module->getVariable(name); - Value* value = std::any_cast(visitExp(ctx->exp())); - Type* variableType = dynamic_cast(variable->getType())->getBaseType(); + if (variable == nullptr) { + throw std::runtime_error("Variable " + name + " not found in assignment."); + } - // 左值右值类型不同处理 - if (variableType != value->getType()) { + // 计算最终赋值目标元素的类型 + // variable 本身应该是一个指针类型 (例如 int* 或 int[2][3]*) + if (!variable->getType()->isPointer()) { + assert(false && "Variable to be assigned must be a pointer type!"); + return std::any(); + } + Type* targetElementType = variable->getType()->as()->getBaseType(); // 从基指针指向的类型开始 + + // 模拟 GEP 路径,根据 dims 确定最终元素的类型 + for (size_t i = 0; i < dims.size(); ++i) { + if (targetElementType && targetElementType->isArray()) { + targetElementType = targetElementType->as()->getElementType(); + } else { + break; // 如果不是数组类型但还有索引,或者索引超出维度,则停止推断 + } + } + + // 左值右值类型不同处理:根据最终元素类型进行转换 + if (targetElementType != value->getType()) { ConstantValue * constValue = dynamic_cast(value); if (constValue != nullptr) { - if (variableType == Type::getFloatType()) { - value = ConstantInteger::get(static_cast(constValue->getInt())); - } else { - value = ConstantFloating::get(static_cast(constValue->getFloat())); + if (targetElementType == Type::getFloatType()) { + value = ConstantFloating::get(static_cast(constValue->getInt())); + } else { // 假设如果不是浮点型,就是整型 + value = ConstantInteger::get(static_cast(constValue->getFloat())); } } else { - if (variableType == Type::getFloatType()) { + if (targetElementType == Type::getFloatType()) { value = builder.createIToFInst(value); - } else { + } else { // 假设如果不是浮点型,就是整型 value = builder.createFtoIInst(value); } } } - builder.createStoreInst(value, variable, dims, variable->getName()); + + // 计算目标地址:如果 dims 为空,就是变量本身地址;否则通过 GEP 计算 + Value* targetAddress = variable; + if (!dims.empty()) { + targetAddress = getGEPAddressInst(variable, dims); + } + + builder.createStoreInst(value, targetAddress); return std::any(); } @@ -576,51 +659,89 @@ std::any SysYIRGenerator::visitReturnStmt(SysYParser::ReturnStmtContext *ctx) { } +// SysYIRGenerator.cpp (修改部分) + std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) { std::string name = ctx->Ident()->getText(); User* variable = module->getVariable(name); Value* value = nullptr; + if (variable == nullptr) { + throw std::runtime_error("Variable " + name + " not found."); + } std::vector dims; for (const auto &exp : ctx->exp()) { dims.push_back(std::any_cast(visitExp(exp))); } - if (variable == nullptr) { - throw std::runtime_error("Variable " + name + " not found."); - } - - bool indicesConstant = true; - for (const auto &dim : dims) { - if (dynamic_cast(dim) == nullptr) { - indicesConstant = false; - break; - } + // 1. 获取变量的声明维度数量 + unsigned declaredNumDims = 0; + if (AllocaInst* alloc = dynamic_cast(variable)) { + declaredNumDims = alloc->getNumDims(); + } else if (GlobalValue* glob = dynamic_cast(variable)) { + declaredNumDims = glob->getNumDims(); + } else if (ConstantVariable* constV = dynamic_cast(variable)) { + declaredNumDims = constV->getNumDims(); } + // 2. 处理常量变量 (ConstantVariable) 且所有索引都是常量的情况 ConstantVariable* constVar = dynamic_cast(variable); - GlobalValue* globalVar = dynamic_cast(variable); - AllocaInst* localVar = dynamic_cast(variable); - if (constVar != nullptr && indicesConstant) { - // 如果是常量变量,且索引是常量,则直接获取子数组 - value = constVar->getByIndices(dims); - } else if (module->isInGlobalArea() && (globalVar != nullptr)) { - assert(indicesConstant); - value = globalVar->getByIndices(dims); - } else { - if ((globalVar != nullptr && globalVar->getNumDims() > dims.size()) || - (localVar != nullptr && localVar->getNumDims() > dims.size()) || - (constVar != nullptr && constVar->getNumDims() > dims.size())) { - // value = builder.createLaInst(variable, indices); - // 如果变量是全局变量或局部变量,且索引数量小于维度数量,则创建createGetSubArray获取子数组 - auto getArrayInst = - builder.createGetSubArray(dynamic_cast(variable), dims); - value = getArrayInst->getChildArray(); - } else { - value = builder.createLoadInst(variable, dims); + if (constVar != nullptr) { + bool allIndicesConstant = true; + for (const auto &dim : dims) { + if (dynamic_cast(dim) == nullptr) { + allIndicesConstant = false; + break; + } + } + if (allIndicesConstant) { + // 如果是常量变量且所有索引都是常量,直接通过 getByIndices 获取编译时值 + // 这个方法会根据索引深度返回最终的标量值或指向子数组的指针 (作为 ConstantValue/Variable) + return constVar->getByIndices(dims); } } + // 3. 处理可变变量 (AllocaInst/GlobalValue) 或带非常量索引的常量变量 + // 这里区分标量访问和数组元素/子数组访问 + + // 检查是否是访问标量变量本身(没有索引,且声明维度为0) + if (dims.empty() && declaredNumDims == 0) { + // 对于标量变量,直接加载其值。 + // variable 本身就是指向标量的指针 (e.g., int* %a) + if (dynamic_cast(variable) || dynamic_cast(variable)) { + value = builder.createLoadInst(variable); + } else { + // 如果走到这里且不是AllocaInst/GlobalValue,但dims为空且declaredNumDims为0, + // 且又不是ConstantVariable (前面已处理),则可能是错误情况。 + assert(false && "Unhandled scalar variable type in LValue access."); + return static_cast(nullptr); + } + } else { + // 访问数组元素或子数组(有索引,或变量本身是数组/多维指针) + Value* targetAddress = nullptr; + + // GEP 的基指针就是变量本身(它是一个指向内存的指针) + if (dynamic_cast(variable) || dynamic_cast(variable) || (constVar != nullptr)) { + // 允许对 ConstantVariable (如果它代表全局数组常量) 进行 GEP + targetAddress = getGEPAddressInst(variable, dims); + } else { + // 其他情况(例如尝试对非指针类型或不支持的 LValue 进行 GEP)应报错 + assert(false && "LValue variable type not supported for GEP or dynamic load."); + return static_cast(nullptr); + } + + // 现在 targetAddress 持有元素或子数组的地址。 + // 需要判断是加载值,还是返回子数组的地址。 + + // 如果提供的索引数量少于声明的维度数量,则表示访问的是子数组,返回其地址 + if (dims.size() < declaredNumDims) { + value = targetAddress; + } else { + // 否则,表示访问的是最终的标量元素,加载其值 + // 假设 createLoadInst 接受 Value* pointer + value = builder.createLoadInst(targetAddress); + } + } return value; } diff --git a/src/SysYIRPrinter.cpp b/src/SysYIRPrinter.cpp index 7d92a0f..952d500 100644 --- a/src/SysYIRPrinter.cpp +++ b/src/SysYIRPrinter.cpp @@ -3,12 +3,11 @@ #include #include #include -#include "IR.h" +#include "IR.h" // 确保IR.h包含了ArrayType、GetElementPtrInst等的定义 namespace sysy { void SysYPrinter::printIR() { - const auto &functions = pModule->getFunctions(); //TODO: Print target datalayout and triple (minimal required by LLVM) @@ -36,11 +35,18 @@ std::string SysYPrinter::getTypeString(Type *type) { return "i32"; } else if (type->isFloat()) { return "float"; - } else if (auto ptrType = dynamic_cast(type)) { + // 递归打印指针指向的类型,然后加上 '*' return getTypeString(ptrType->getBaseType()) + "*"; - } else if (auto ptrType = dynamic_cast(type)) { - return getTypeString(ptrType->getReturnType()); + } else if (auto funcType = dynamic_cast(type)) { + // 对于函数类型,打印其返回类型 + // 注意:这里可能需要更完整的函数签名打印,取决于你的IR表示方式 + // 比如:`retType (paramType1, paramType2, ...)` + // 但为了简化和LLVM IR兼容性,通常在定义时完整打印 + return getTypeString(funcType->getReturnType()); + } else if (auto arrayType = dynamic_cast(type)) { // 新增:处理数组类型 + // 打印格式为 [num_elements x element_type] + return "[" + std::to_string(arrayType->getNumElements()) + " x " + getTypeString(arrayType->getElementType()) + "]"; } assert(false && "Unsupported type"); return ""; @@ -51,15 +57,23 @@ std::string SysYPrinter::getValueName(Value *value) { return "@" + global->getName(); } else if (auto inst = dynamic_cast(value)) { return "%" + inst->getName(); - } else if (auto constVal = dynamic_cast(value)) { - if (constVal->isFloat()) { - return std::to_string(constVal->getFloat()); + } else if (auto constInt = dynamic_cast(value)) { // 优先匹配具体的常量类型 + return std::to_string(constInt->getInt()); + } else if (auto constFloat = dynamic_cast(value)) { // 优先匹配具体的常量类型 + return std::to_string(constFloat->getFloat()); + } else if (auto constUndef = dynamic_cast(value)) { // 如果有Undef类型 + return "undef"; + } else if (auto constVal = dynamic_cast(value)) { // fallback for generic ConstantValue + // 这里的逻辑可能需要根据你ConstantValue的实际设计调整 + // 确保它能处理所有可能的ConstantValue + if (constVal->getType()->isFloat()) { + return std::to_string(constVal->getFloat()); } return std::to_string(constVal->getInt()); } else if (auto constVar = dynamic_cast(value)) { - return constVar->getName(); + return constVar->getName(); // 假设ConstantVariable有自己的名字或通过getByIndices获取值 } - assert(false && "Unknown value type"); + assert(false && "Unknown value type or unable to get value name"); return ""; } @@ -77,44 +91,35 @@ void SysYPrinter::printGlobalVariable() { for (const auto &global : globals) { std::cout << "@" << global->getName() << " = global "; - auto baseType = dynamic_cast(global->getType())->getBaseType(); - printType(baseType); - - if (global->getNumDims() > 0) { - // Array type - std::cout << " ["; - for (unsigned i = 0; i < global->getNumDims(); i++) { - if (i > 0) std::cout << " x "; - std::cout << getValueName(global->getDim(i)); - } - std::cout << "]"; - } + // 全局变量的类型是一个指针,指向其基类型 (可能是 ArrayType 或 Integer/FloatType) + auto globalVarBaseType = dynamic_cast(global->getType())->getBaseType(); + printType(globalVarBaseType); // 打印全局变量的实际类型 (例如 i32 或 [10 x i32]) std::cout << " "; - if (global->getNumDims() > 0) { - // Array initializer - std::cout << "["; - auto values = global->getInitValues(); - auto counterValues = values.getValues(); - auto counterNumbers = values.getNumbers(); + // 检查是否是数组类型 (通过检查 globalVarBaseType 是否是 ArrayType) + if (globalVarBaseType->isArray()) { + // 数组初始化器 + std::cout << "["; // LLVM IR 数组初始化器格式: [type value, type value, ...] + auto values = global->getInitValues(); // 假设 getInitValues() 返回一个 ValueCounter + const std::vector &counterValues = values.getValues(); // 获取所有值 - for (size_t i = 0; i < counterNumbers.size(); i++) { + for (size_t i = 0; i < counterValues.size(); i++) { if (i > 0) std::cout << ", "; - if (baseType->isFloat()) { - std::cout << "float " << dynamic_cast(counterValues[i])->getFloat(); - } else { - std::cout << "i32 " << dynamic_cast(counterValues[i])->getInt(); - } + // 打印元素类型,这个元素类型应该是数组的最终元素类型,例如 i32 或 float + // 可以从 globalVarBaseType 逐层剥离得到最终元素类型,但这里简化为直接从值获取 + printType(counterValues[i]->getType()); + std::cout << " "; + printValue(counterValues[i]); } std::cout << "]"; } else { - // Scalar initializer - if (baseType->isFloat()) { - std::cout << "float " << dynamic_cast(global->getByIndex(0))->getFloat(); - } else { - std::cout << "i32 " << dynamic_cast(global->getByIndex(0))->getInt(); - } + // 标量初始化器 + // 假设标量全局变量的初始化值通过 getByIndex(0) 获取 + Value* initVal = global->getByIndex(0); + printType(initVal->getType()); // 打印标量值的类型 + std::cout << " "; + printValue(initVal); // 打印标量值 } std::cout << ", align 4" << std::endl; @@ -209,19 +214,19 @@ void SysYPrinter::printInst(Instruction *pInst) { case Kind::kFDiv: std::cout << "fdiv"; break; case Kind::kICmpEQ: std::cout << "icmp eq"; break; case Kind::kICmpNE: std::cout << "icmp ne"; break; - case Kind::kICmpLT: std::cout << "icmp slt"; break; + case Kind::kICmpLT: std::cout << "icmp slt"; break; // LLVM uses slt/sgt for signed less/greater than case Kind::kICmpGT: std::cout << "icmp sgt"; break; case Kind::kICmpLE: std::cout << "icmp sle"; break; case Kind::kICmpGE: std::cout << "icmp sge"; break; - case Kind::kFCmpEQ: std::cout << "fcmp oeq"; break; - case Kind::kFCmpNE: std::cout << "fcmp one"; break; - case Kind::kFCmpLT: std::cout << "fcmp olt"; break; - case Kind::kFCmpGT: std::cout << "fcmp ogt"; break; - case Kind::kFCmpLE: std::cout << "fcmp ole"; break; - case Kind::kFCmpGE: std::cout << "fcmp oge"; break; + case Kind::kFCmpEQ: std::cout << "fcmp oeq"; break; // oeq for ordered equal + case Kind::kFCmpNE: std::cout << "fcmp one"; break; // one for ordered not equal + case Kind::kFCmpLT: std::cout << "fcmp olt"; break; // olt for ordered less than + case Kind::kFCmpGT: std::cout << "fcmp ogt"; break; // ogt for ordered greater than + case Kind::kFCmpLE: std::cout << "fcmp ole"; break; // ole for ordered less than or equal + case Kind::kFCmpGE: std::cout << "fcmp oge"; break; // oge for ordered greater than or equal case Kind::kAnd: std::cout << "and"; break; case Kind::kOr: std::cout << "or"; break; - default: break; + default: break; // Should not reach here } // Types and operands @@ -238,7 +243,6 @@ void SysYPrinter::printInst(Instruction *pInst) { case Kind::kNeg: case Kind::kNot: case Kind::kFNeg: - case Kind::kFNot: case Kind::kFtoI: case Kind::kBitFtoI: case Kind::kItoF: @@ -250,31 +254,39 @@ void SysYPrinter::printInst(Instruction *pInst) { } switch (pInst->getKind()) { - case Kind::kNeg: std::cout << "sub "; break; - case Kind::kNot: std::cout << "not "; break; - case Kind::kFNeg: std::cout << "fneg "; break; - case Kind::kFNot: std::cout << "fneg "; break; // FNot not standard, map to fneg - case Kind::kFtoI: std::cout << "fptosi "; break; - case Kind::kBitFtoI: std::cout << "bitcast "; break; - case Kind::kItoF: std::cout << "sitofp "; break; - case Kind::kBitItoF: std::cout << "bitcast "; break; - default: break; + case Kind::kNeg: std::cout << "sub "; break; // integer negation is `sub i32 0, operand` + case Kind::kNot: std::cout << "xor "; break; // logical/bitwise NOT is `xor i32 -1, operand` or `xor i1 true, operand` + case Kind::kFNeg: std::cout << "fneg "; break; // float negation + case Kind::kFtoI: std::cout << "fptosi "; break; // float to signed integer + case Kind::kBitFtoI: std::cout << "bitcast "; break; // bitcast float to int + case Kind::kItoF: std::cout << "sitofp "; break; // signed integer to float + case Kind::kBitItoF: std::cout << "bitcast "; break; // bitcast int to float + default: break; // Should not reach here } - printType(unyInst->getType()); + printType(unyInst->getOperand()->getType()); // Print operand type std::cout << " "; - // Special handling for negation - if (pInst->getKind() == Kind::kNeg || pInst->getKind() == Kind::kNot) { - std::cout << "i32 0, "; + // Special handling for integer negation and logical NOT + if (pInst->getKind() == Kind::kNeg) { + std::cout << "0, "; // for 'sub i32 0, operand' + } else if (pInst->getKind() == Kind::kNot) { + // For logical NOT (i1 -> i1), use 'xor i1 true, operand' + // For bitwise NOT (i32 -> i32), use 'xor i32 -1, operand' + if (unyInst->getOperand()->getType()->isInt()) { // Assuming i32 for bitwise NOT + std::cout << "NOT, "; // or specific bitmask for NOT + } else { // Assuming i1 for logical NOT + std::cout << "true, "; + } } printValue(pInst->getOperand(0)); - // For bitcast, need to specify destination type - if (pInst->getKind() == Kind::kBitFtoI || pInst->getKind() == Kind::kBitItoF) { + // For type conversions (fptosi, sitofp, bitcast), need to specify destination type + if (pInst->getKind() == Kind::kFtoI || pInst->getKind() == Kind::kItoF || + pInst->getKind() == Kind::kBitFtoI || pInst->getKind() == Kind::kBitItoF) { std::cout << " to "; - printType(unyInst->getType()); + printType(unyInst->getType()); // Print result type } std::cout << std::endl; @@ -289,7 +301,7 @@ void SysYPrinter::printInst(Instruction *pInst) { } std::cout << "call "; - printType(callInst->getType()); + printType(callInst->getType()); // Return type of the call std::cout << " @" << function->getName() << "("; auto params = callInst->getArguments(); @@ -297,9 +309,9 @@ void SysYPrinter::printInst(Instruction *pInst) { for (auto ¶m : params) { if (!first) std::cout << ", "; first = false; - printType(param->getValue()->getType()); + printType(param->getValue()->getType()); // Type of argument std::cout << " "; - printValue(param->getValue()); + printValue(param->getValue()); // Value of argument } std::cout << ")" << std::endl; @@ -307,7 +319,7 @@ void SysYPrinter::printInst(Instruction *pInst) { case Kind::kCondBr: { auto condBrInst = dynamic_cast(pInst); - std::cout << "br i1 "; + std::cout << "br i1 "; // Condition type should be i1 printValue(condBrInst->getCondition()); std::cout << ", label %" << condBrInst->getThenBlock()->getName(); std::cout << ", label %" << condBrInst->getElseBlock()->getName(); @@ -337,14 +349,17 @@ void SysYPrinter::printInst(Instruction *pInst) { auto allocaInst = dynamic_cast(pInst); std::cout << "%" << allocaInst->getName() << " = alloca "; - auto baseType = dynamic_cast(allocaInst->getType())->getBaseType(); - printType(baseType); + // AllocaInst 的类型现在应该是一个 PointerType,指向正确的 ArrayType 或 ScalarType + // 例如:alloca i32, align 4 或者 alloca [10 x i32], align 4 + auto allocatedType = dynamic_cast(allocaInst->getType())->getBaseType(); + printType(allocatedType); - if (allocaInst->getNumDims() > 0) { + // 仍然打印维度信息,如果存在的话 + if (allocaInst->getNumDims() > 0) { std::cout << ", "; for (size_t i = 0; i < allocaInst->getNumDims(); i++) { if (i > 0) std::cout << ", "; - printType(Type::getIntType()); + printType(Type::getIntType()); // 维度大小通常是 i32 类型 std::cout << " "; printValue(allocaInst->getDim(i)); } @@ -356,70 +371,74 @@ void SysYPrinter::printInst(Instruction *pInst) { case Kind::kLoad: { auto loadInst = dynamic_cast(pInst); std::cout << "%" << loadInst->getName() << " = load "; - printType(loadInst->getType()); + printType(loadInst->getType()); // 加载的结果类型 std::cout << ", "; - printType(loadInst->getPointer()->getType()); + printType(loadInst->getPointer()->getType()); // 指针类型 std::cout << " "; - printValue(loadInst->getPointer()); + printValue(loadInst->getPointer()); // 要加载的地址 + // 仍然打印索引信息,如果存在的话 if (loadInst->getNumIndices() > 0) { - std::cout << ", "; + std::cout << ", indices "; // 或者其他分隔符,取决于你期望的格式 for (size_t i = 0; i < loadInst->getNumIndices(); i++) { - if (i > 0) std::cout << ", "; - printType(Type::getIntType()); - std::cout << " "; - printValue(loadInst->getIndex(i)); + if (i > 0) std::cout << ", "; + printType(loadInst->getIndex(i)->getType()); + std::cout << " "; + printValue(loadInst->getIndex(i)); } } std::cout << ", align 4" << std::endl; } break; - case Kind::kLa: { - auto laInst = dynamic_cast(pInst); - std::cout << "%" << laInst->getName() << " = getelementptr inbounds "; - - auto ptrType = dynamic_cast(laInst->getPointer()->getType()); - printType(ptrType->getBaseType()); - std::cout << ", "; - printType(laInst->getPointer()->getType()); - std::cout << " "; - printValue(laInst->getPointer()); - std::cout << ", "; - - for (size_t i = 0; i < laInst->getNumIndices(); i++) { - if (i > 0) std::cout << ", "; - printType(Type::getIntType()); - std::cout << " "; - printValue(laInst->getIndex(i)); - } - - std::cout << std::endl; - } break; - case Kind::kStore: { auto storeInst = dynamic_cast(pInst); std::cout << "store "; - printType(storeInst->getValue()->getType()); + printType(storeInst->getValue()->getType()); // 要存储的值的类型 std::cout << " "; - printValue(storeInst->getValue()); + printValue(storeInst->getValue()); // 要存储的值 std::cout << ", "; - printType(storeInst->getPointer()->getType()); + printType(storeInst->getPointer()->getType()); // 目标指针的类型 std::cout << " "; - printValue(storeInst->getPointer()); + printValue(storeInst->getPointer()); // 目标地址 + // 仍然打印索引信息,如果存在的话 if (storeInst->getNumIndices() > 0) { - std::cout << ", "; + std::cout << ", indices "; // 或者其他分隔符 for (size_t i = 0; i < storeInst->getNumIndices(); i++) { - if (i > 0) std::cout << ", "; - printType(Type::getIntType()); - std::cout << " "; - printValue(storeInst->getIndex(i)); + if (i > 0) std::cout << ", "; + printType(storeInst->getIndex(i)->getType()); + std::cout << " "; + printValue(storeInst->getIndex(i)); } } std::cout << ", align 4" << std::endl; } break; + + case Kind::kGetElementPtr: { // 新增:GetElementPtrInst 打印 + auto gepInst = dynamic_cast(pInst); + std::cout << "%" << gepInst->getName() << " = getelementptr inbounds "; // 假设总是 inbounds + + // GEP 的第一个操作数是基指针,其类型是一个指向聚合类型的指针 + // 第一个参数是基指针所指向的聚合类型的类型 (e.g., [10 x i32]) + auto basePtrType = dynamic_cast(gepInst->getBasePointer()->getType()); + printType(basePtrType->getBaseType()); // 打印基指针指向的类型 + + std::cout << ", "; + printType(gepInst->getBasePointer()->getType()); // 打印基指针自身的类型 (e.g., [10 x i32]*) + std::cout << " "; + printValue(gepInst->getBasePointer()); // 打印基指针 + + // 打印所有索引 + for (auto indexVal : gepInst->getIndices()) { // 使用 getIndices() 迭代器 + std::cout << ", "; + printType(indexVal->getValue()->getType()); // 打印索引的类型 (通常是 i32) + std::cout << " "; + printValue(indexVal->getValue()); // 打印索引值 + } + std::cout << std::endl; + } break; case Kind::kMemset: { auto memsetInst = dynamic_cast(pInst); @@ -433,51 +452,40 @@ void SysYPrinter::printInst(Instruction *pInst) { printValue(memsetInst->getValue()); std::cout << ", i32 "; printValue(memsetInst->getSize()); - std::cout << ", i1 false)" << std::endl; + std::cout << ", i1 false)" << std::endl; // alignment for memset is typically i1 } break; case Kind::kPhi: { auto phiInst = dynamic_cast(pInst); - printValue(phiInst->getOperand(0)); - std::cout << " = phi "; - printType(phiInst->getType()); + // Phi 指令的名称通常是结果变量 + std::cout << "%" << phiInst->getName() << " = phi "; + printType(phiInst->getType()); // Phi 结果类型 - for (unsigned i = 1; i < phiInst->getNumOperands(); i++) { - if (i > 0) std::cout << ", "; + // Phi 指令的操作数是成对的 [value, basic_block] + // 这里假设 getOperands() 返回的是 (val1, block1, val2, block2...) + // 如果你的 PhiInst 存储方式是 getIncomingValues() 和 getIncomingBlocks(),请相应调整 + // LLVM IR 格式: phi type [value1, block1], [value2, block2] + bool firstPair = true; + for (unsigned i = 0; i < phiInst->getNumOperands() / 2; ++i) { // 遍历成对的操作数 + if (!firstPair) std::cout << ", "; + firstPair = false; std::cout << "[ "; - printValue(phiInst->getOperand(i)); + printValue(phiInst->getOperand(i * 2)); // value + std::cout << ", %"; + printValue(phiInst->getOperand(i * 2 + 1)); // block std::cout << " ]"; } std::cout << std::endl; } break; - case Kind::kGetSubArray: { - auto getSubArrayInst = dynamic_cast(pInst); - std::cout << "%" << getSubArrayInst->getName() << " = getelementptr inbounds "; - - auto ptrType = dynamic_cast(getSubArrayInst->getFatherArray()->getType()); - printType(ptrType->getBaseType()); - std::cout << ", "; - printType(getSubArrayInst->getFatherArray()->getType()); - std::cout << " "; - printValue(getSubArrayInst->getFatherArray()); - std::cout << ", "; - bool firstIndex = true; - for (auto &index : getSubArrayInst->getIndices()) { - if (!firstIndex) std::cout << ", "; - firstIndex = false; - printType(Type::getIntType()); - std::cout << " "; - printValue(index->getValue()); - } - - std::cout << std::endl; - } break; + // 以下两个 Kind 应该删除或替换为 kGEP + // case Kind::kLa: { /* REMOVED */ } break; + // case Kind::kGetSubArray: { /* REMOVED */ } break; default: - assert(false && "Unsupported instruction kind"); + assert(false && "Unsupported instruction kind in SysYPrinter"); break; } } -} // namespace sysy +} // namespace sysy \ No newline at end of file diff --git a/src/include/IR.h b/src/include/IR.h index 060bdc5..6e35715 100644 --- a/src/include/IR.h +++ b/src/include/IR.h @@ -49,6 +49,7 @@ class Type { kLabel, kPointer, kFunction, + kArray, }; Kind kind; ///< 表示具体类型的变量 @@ -65,6 +66,7 @@ class Type { static Type* getPointerType(Type *baseType); ///< 返回表示指向baseType类型的Pointer类型的Type指针 static Type* getFunctionType(Type *returnType, const std::vector ¶mTypes = {}); ///< 返回表示返回类型为returnType,形参类型列表为paramTypes的函数类型的Type指针 + static Type* getArrayType(Type *elementType, unsigned numElements); public: Kind getKind() const { return kind; } ///< 返回Type对象代表原始标量类型 @@ -74,6 +76,7 @@ class Type { bool isLabel() const { return kind == kLabel; } ///< 判定是否为Label类型 bool isPointer() const { return kind == kPointer; } ///< 判定是否为Pointer类型 bool isFunction() const { return kind == kFunction; } ///< 判定是否为Function类型 + bool isArray() const { return kind == Kind::kArray; } unsigned getSize() const; ///< 返回类型所占的空间大小(字节) /// 尝试将一个变量转换为给定的Type及其派生类类型的变量 template @@ -115,6 +118,22 @@ class FunctionType : public Type { unsigned getNumParams() const { return paramTypes.size(); } ///< 获取形参数量 }; +class ArrayType : public Type { + public: + // elements:数组的元素类型 (例如,int[3] 的 elementType 是 int) + // numElements:该维度的大小 (例如,int[3] 的 numElements 是 3) + static ArrayType *get(Type *elementType, unsigned numElements); + + Type *getElementType() const { return elementType; } + unsigned getNumElements() const { return numElements; } + + protected: + ArrayType(Type *elementType, unsigned numElements) + : Type(Kind::kArray), elementType(elementType), numElements(numElements) {} + Type *elementType; + unsigned numElements; // 当前维度的大小 +}; + /*! * @} */ @@ -602,49 +621,6 @@ class User : public Value { void setOperand(unsigned index, Value *value); ///< 设置操作数 }; -class GetSubArrayInst; -/** - * 左值 具有地址的对象 - */ -class LVal { - friend class GetSubArrayInst; - - protected: - LVal *fatherLVal{}; ///< 父左值 - std::list> childrenLVals; ///< 子左值 - GetSubArrayInst *defineInst{}; /// 定义该左值的GetSubArray指令 - - protected: - LVal() = default; - - public: - virtual ~LVal() = default; - virtual std::vector getLValDims() const = 0; ///< 获取左值的维度 - virtual unsigned getLValNumDims() const = 0; ///< 获取左值的维度数量 - - public: - LVal* getFatherLVal() const { return fatherLVal; } ///< 获取父左值 - const std::list>& getChildrenLVals() const { - return childrenLVals; - } ///< 获取子左值列表 - LVal* getAncestorLVal() const { - auto curLVal = const_cast(this); - while (curLVal->getFatherLVal() != nullptr) { - curLVal = curLVal->getFatherLVal(); - } - return curLVal; - } ///< 获取祖先左值 - void setFatherLVal(LVal *father) { fatherLVal = father; } ///< 设置父左值 - void setDefineInst(GetSubArrayInst *inst) { defineInst = inst; } ///< 设置定义指令 - void addChild(LVal *child) { childrenLVals.emplace_back(child); } ///< 添加子左值 - void removeChild(LVal *child) { - auto iter = std::find_if(childrenLVals.begin(), childrenLVals.end(), - [child](const std::unique_ptr &ptr) { return ptr.get() == child; }); - childrenLVals.erase(iter); - } ///< 移除子左值 - GetSubArrayInst* getDefineInst() const { return defineInst; } ///< 获取定义指令 -}; - /*! * Base of all concrete instruction types. */ @@ -694,15 +670,15 @@ class Instruction : public User { kAlloca = 0x1UL << 33, kLoad = 0x1UL << 34, kStore = 0x1UL << 35, - kLa = 0x1UL << 36, + kGetElementPtr = 0x1UL << 36, kMemset = 0x1UL << 37, - kGetSubArray = 0x1UL << 38, + // kGetSubArray = 0x1UL << 38, // Constant Kind removed as Constants are now Values, not Instructions. // kConstant = 0x1UL << 37, // Conflicts with kMemset if kept as is // phi kPhi = 0x1UL << 39, kBitItoF = 0x1UL << 40, - kBitFtoI = 0x1UL << 41 + kBitFtoI = 0x1UL << 41, }; protected: @@ -793,14 +769,12 @@ public: return "Load"; case kStore: return "Store"; - case kLa: - return "La"; + case kGetElementPtr: + return "GetElementPtr"; case kMemset: return "Memset"; case kPhi: return "Phi"; - case kGetSubArray: - return "GetSubArray"; default: return "Unknown"; } @@ -853,9 +827,8 @@ public: bool isAlloca() const { return kind == kAlloca; } bool isLoad() const { return kind == kLoad; } bool isStore() const { return kind == kStore; } - bool isLa() const { return kind == kLa; } + bool isGetElementPtr() const { return kind == kGetElementPtr; } bool isMemset() const { return kind == kMemset; } - bool isGetSubArray() const { return kind == kGetSubArray; } bool isCall() const { return kind == kCall; } bool isReturn() const { return kind == kReturn; } bool isDefine() const { @@ -867,26 +840,6 @@ public: class Function; //! Function call. -class LaInst : public Instruction { - friend class Function; - friend class IRBuilder; - - protected: - explicit LaInst(Value *pointer, const std::vector &indices = {}, BasicBlock *parent = nullptr, - const std::string &name = "") - : Instruction(Kind::kLa, pointer->getType(), parent, name) { - assert(pointer); - addOperand(pointer); - addOperands(indices); - } - - public: - unsigned getNumIndices() const { return getNumOperands() - 1; } ///< 获取索引长度 - Value* getPointer() const { return getOperand(0); } ///< 获取目标变量的Value指针 - auto getIndices() const { return make_range(std::next(operand_begin()), operand_end()); } ///< 获取索引列表 - Value* getIndex(unsigned index) const { return getOperand(index + 1); } ///< 获取位置为index的索引分量 -}; - class PhiInst : public Instruction { friend class IRBuilder; friend class Function; @@ -1134,7 +1087,7 @@ public: }; // class CondBrInst //! Allocate memory for stack variables, used for non-global variable declartion -class AllocaInst : public Instruction , public LVal { +class AllocaInst : public Instruction { friend class IRBuilder; friend class Function; protected: @@ -1145,14 +1098,6 @@ protected: } public: - std::vector getLValDims() const override { - std::vector dims; - for (const auto &dim : getOperands()) { - dims.emplace_back(dim->getValue()); - } - return dims; - } ///< 获取作为左值的维度数组 - unsigned getLValNumDims() const override { return getNumOperands(); } int getNumDims() const { return getNumOperands(); } auto getDims() const { return getOperands(); } @@ -1161,37 +1106,40 @@ public: }; // class AllocaInst -class GetSubArrayInst : public Instruction { - friend class IRBuilder; - friend class Function; +class GetElementPtrInst : public Instruction { + friend class IRBuilder; // 如果您有IRBuilder来创建指令,需要friend - public: - GetSubArrayInst(LVal *fatherArray, LVal *childArray, const std::vector &indices, - BasicBlock *parent = nullptr, const std::string &name = "") - : Instruction(Kind::kGetSubArray, Type::getVoidType(), parent, name) { - auto predicate = [childArray](const std::unique_ptr &child) -> bool { return child.get() == childArray; }; - if (std::find_if(fatherArray->childrenLVals.begin(), fatherArray->childrenLVals.end(), predicate) == - fatherArray->childrenLVals.end()) { - fatherArray->childrenLVals.emplace_back(childArray); - } - childArray->fatherLVal = fatherArray; - childArray->defineInst = this; - auto fatherArrayValue = dynamic_cast(fatherArray); - auto childArrayValue = dynamic_cast(childArray); - assert(fatherArrayValue); - assert(childArrayValue); - addOperand(fatherArrayValue); - addOperand(childArrayValue); - addOperands(indices); +protected: + // GEP的构造函数: + // resultType: GEP计算出的地址的类型 (通常是指向目标元素类型的指针) + // basePointer: 基指针 (第一个操作数) + // indices: 索引列表 (后续操作数) + GetElementPtrInst(Value *basePointer, + const std::vector &indices = {}, + BasicBlock *parent = nullptr, const std::string &name = "") + : Instruction(Kind::kGetElementPtr, basePointer->getType(), parent, name) { + assert(basePointer && "GEP base pointer cannot be null!"); + // TODO : 安全检查 + assert(basePointer->getType()->isPointer() ); + addOperand(basePointer); // 第一个操作数是基指针 + addOperands(indices); // 随后的操作数是索引 } - public: - Value* getFatherArray() const { return getOperand(0); } ///< 获取父数组 - Value* getChildArray() const { return getOperand(1); } ///< 获取子数组 - LVal* getFatherLVal() const { return dynamic_cast(getOperand(0)); } ///< 获取父左值 - LVal* getChildLVal() const { return dynamic_cast(getOperand(1)); } ///< 获取子左值 - auto getIndices() const { return make_range(std::next(operand_begin(), 2), operand_end()); } ///< 获取索引 - unsigned getNumIndices() const { return getNumOperands() - 2; } ///< 获取索引数量 +public: + Value* getBasePointer() const { return getOperand(0); } + unsigned getNumIndices() const { return getNumOperands() - 1; } + auto getIndices() const { return make_range(std::next(operand_begin()), operand_end());} + Value* getIndex(unsigned index) const { + assert(index < getNumIndices() && "Index out of bounds for GEP!"); + return getOperand(index + 1); + } + + // 静态工厂方法,用于创建GEP指令 (如果需要外部直接创建而非通过IRBuilder) + static GetElementPtrInst* create(Type *resultType, Value *basePointer, + const std::vector &indices = {}, + BasicBlock *parent = nullptr, const std::string &name = "") { + return new GetElementPtrInst(basePointer, indices, parent, name); + } }; //! Load a value from memory address specified by a pointer value @@ -1215,22 +1163,7 @@ public: return make_range(std::next(operand_begin()), operand_end()); } Value* getIndex(int index) const { return getOperand(index + 1); } - std::list getAncestorIndices() const { - std::list indices; - for (const auto &index : getIndices()) { - indices.emplace_back(index->getValue()); - } - auto curPointer = dynamic_cast(getPointer()); - while (curPointer->getFatherLVal() != nullptr) { - auto inserter = std::next(indices.begin()); - for (const auto &index : curPointer->getDefineInst()->getIndices()) { - indices.insert(inserter, index->getValue()); - } - curPointer = curPointer->getFatherLVal(); - } - - return indices; - } ///< 获取相对于祖先数组的索引列表 + }; // class LoadInst //! Store a value to memory address specified by a pointer value @@ -1256,22 +1189,6 @@ public: return make_range(std::next(operand_begin(), 2), operand_end()); } Value* getIndex(int index) const { return getOperand(index + 2); } - std::list getAncestorIndices() const { - std::list indices; - for (const auto &index : getIndices()) { - indices.emplace_back(index->getValue()); - } - auto curPointer = dynamic_cast(getPointer()); - while (curPointer->getFatherLVal() != nullptr) { - auto inserter = std::next(indices.begin()); - for (const auto &index : curPointer->getDefineInst()->getIndices()) { - indices.insert(inserter, index->getValue()); - } - curPointer = curPointer->getFatherLVal(); - } - - return indices; - } ///< 获取相对于祖先数组的索引列表 }; // class StoreInst @@ -1373,7 +1290,7 @@ protected: }; //! Global value declared at file scope -class GlobalValue : public User, public LVal { +class GlobalValue : public User { friend class Module; protected: @@ -1407,16 +1324,6 @@ protected: } public: - unsigned getLValNumDims() const override { return numDims; } ///< 获取作为左值的维度数量 - std::vector getLValDims() const override { - std::vector dims; - for (const auto &dim : getOperands()) { - dims.emplace_back(dim->getValue()); - } - - return dims; - } ///< 获取作为左值的维度列表 - unsigned getNumDims() const { return numDims; } ///< 获取维度数量 Value* getDim(unsigned index) const { return getOperand(index); } ///< 获取位置为index的维度 auto getDims() const { return getOperands(); } ///< 获取维度列表 @@ -1438,7 +1345,7 @@ public: }; // class GlobalValue -class ConstantVariable : public User, public LVal { +class ConstantVariable : public User { friend class Module; protected: @@ -1457,15 +1364,6 @@ class ConstantVariable : public User, public LVal { } public: - unsigned getLValNumDims() const override { return numDims; } ///< 获取作为左值的维度数量 - std::vector getLValDims() const override { - std::vector dims; - for (const auto &dim : getOperands()) { - dims.emplace_back(dim->getValue()); - } - - return dims; - } ///< 获取作为左值的维度列表 Value* getByIndex(unsigned index) const { return initValues.getValue(index); } ///< 通过一维位置index获取值 Value* getByIndices(const std::vector &indices) const { int index = 0; diff --git a/src/include/IRBuilder.h b/src/include/IRBuilder.h index 6df82e7..03df66a 100644 --- a/src/include/IRBuilder.h +++ b/src/include/IRBuilder.h @@ -280,46 +280,6 @@ class IRBuilder { block->getInstructions().emplace(position, inst); return inst; } ///< 创建load指令 - LaInst * createLaInst(Value *pointer, const std::vector &indices = {}, const std::string &name = "") { - std::string newName; - if (name.empty()) { - std::stringstream ss; - ss << tmpIndex; - newName = ss.str(); - tmpIndex++; - } else { - newName = name; - } - - auto inst = new LaInst(pointer, indices, block, newName); - assert(inst); - block->getInstructions().emplace(position, inst); - return inst; - } ///< 创建la指令 - GetSubArrayInst * createGetSubArray(LVal *fatherArray, const std::vector &indices, const std::string &name = "") { - assert(fatherArray->getLValNumDims() > indices.size()); - std::vector subDims; - auto dims = fatherArray->getLValDims(); - auto iter = std::next(dims.begin(), indices.size()); - while (iter != dims.end()) { - subDims.emplace_back(*iter); - iter++; - } - - std::string childArrayName; - std::stringstream ss; - ss << "A" - << "%" << tmpIndex; - childArrayName = ss.str(); - tmpIndex++; - - auto fatherArrayValue = dynamic_cast(fatherArray); - auto childArray = new AllocaInst(fatherArrayValue->getType(), subDims, block, childArrayName); - auto inst = new GetSubArrayInst(fatherArray, childArray, indices, block, childArrayName); - assert(inst); - block->getInstructions().emplace(position, inst); - return inst; - } ///< 创建获取部分数组指令 MemsetInst * createMemsetInst(Value *pointer, Value *begin, Value *size, Value *value, const std::string &name = "") { auto inst = new MemsetInst(pointer, begin, size, value, block, name); assert(inst); @@ -340,6 +300,24 @@ class IRBuilder { block->getInstructions().emplace(block->begin(), inst); return inst; } ///< 创建Phi指令 + GetElementPtrInst* createGetElementPtrInst(Value *basePointer, + const std::vector &indices = {}, + const std::string &name = "") { + std::string newName; + if (name.empty()) { + std::stringstream ss; + ss << tmpIndex; + newName = ss.str(); + tmpIndex++; + } else { + newName = name; + } + + auto inst = new GetElementPtrInst(basePointer, indices, block, newName); + assert(inst); + block->getInstructions().emplace(position, inst); + return inst; + } }; } // namespace sysy diff --git a/src/include/SysYIRGenerator.h b/src/include/SysYIRGenerator.h index fe309e8..9dfd7c5 100644 --- a/src/include/SysYIRGenerator.h +++ b/src/include/SysYIRGenerator.h @@ -68,6 +68,7 @@ public: Module *get() const { return module.get(); } IRBuilder *getBuilder(){ return &builder; } public: + std::any visitCompUnit(SysYParser::CompUnitContext *ctx) override; std::any visitGlobalConstDecl(SysYParser::GlobalConstDeclContext *ctx) override; @@ -134,6 +135,11 @@ public: // std::any visitConstExp(SysYParser::ConstExpContext *ctx) override; +public: + // 获取GEP指令的地址 + Value* getGEPAddressInst(Value* basePointer, const std::vector& indices); + // 构建数组类型 + Type* buildArrayType(Type* baseType, const std::vector& dims); }; // class SysYIRGenerator diff --git a/src/sysyc.cpp b/src/sysyc.cpp index 4fdc7cc..1afe7c9 100644 --- a/src/sysyc.cpp +++ b/src/sysyc.cpp @@ -15,7 +15,7 @@ using namespace antlr4; #include "SysYIRPrinter.h" #include "SysYIRCFGOpt.h" #include "RISCv64Backend.h" -#include "SysYIRAnalyser.h" +// #include "SysYIRAnalyser.h" // #include "DeadCodeElimination.h" #include "AddressCalculationExpansion.h" // #include "Mem2Reg.h" @@ -135,10 +135,10 @@ int main(int argc, char **argv) { SysYCFGOpt cfgopt(moduleIR, builder); cfgopt.SysYOptimizateAfterIR(); - ControlFlowAnalysis cfa(moduleIR); - cfa.init(); - ActiveVarAnalysis ava; - ava.init(moduleIR); + // ControlFlowAnalysis cfa(moduleIR); + // cfa.init(); + // ActiveVarAnalysis ava; + // ava.init(moduleIR); if (DEBUG) { cout << "=== After CFA & AVA (Default) ===\n"; From 88604c1f94449b0089f7de6884a5d20edafa0cad Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Sun, 20 Jul 2025 18:23:48 +0800 Subject: [PATCH 15/35] =?UTF-8?q?[IR]=E6=B6=88=E9=99=A4Falltrhough?= =?UTF-8?q?=E7=8E=B0=E8=B1=A1=20[IR]=E4=BC=98=E5=8C=96=E7=94=9F=E6=88=90Re?= =?UTF-8?q?t=E6=8C=87=E4=BB=A4=E9=80=BB=E8=BE=91=20[README]=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0TODO=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 11 ++++++++++- src/SysYIRGenerator.cpp | 35 ++++++++++++++++++++++++----------- src/include/SysYIRGenerator.h | 2 -- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 65660b9..085b55b 100644 --- a/README.md +++ b/README.md @@ -37,4 +37,13 @@ mysysy/ $ bash setup.sh ``` ### 配套脚本 - (TODO: 需要完善) \ No newline at end of file + (TODO: 需要完善) + + +### TODO_list: + +除开注释中的TODO后续时间充足可以考虑的TODO: + +- store load指令由于gep指令的引入, 维度信息的记录是非必须的, 考虑删除 + +- use def关系经过mem2reg和phi函数明确转换为ssa形式, 以及函数参数通过value数组明确定义, 使得基本块的args参数信息记录非必须, 考虑删除 \ No newline at end of file diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 6302aa5..1122b3e 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -340,7 +340,6 @@ std::any SysYIRGenerator::visitFuncType(SysYParser::FuncTypeContext *ctx) { std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){ // 更新作用域 module->enterNewScope(); - HasReturnInst = false; auto name = ctx->Ident()->getText(); std::vector paramTypes; @@ -376,22 +375,34 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){ module->addVariable(paramNames[i], alloca); } + // 在处理函数体之前,创建一个新的基本块作为函数体的实际入口 + // 这样 entryBB 就可以在完成初始化后跳转到这里 + BasicBlock* funcBodyEntry = function->addBasicBlock("funcBodyEntry"); + + // 从 entryBB 无条件跳转到 funcBodyEntry + builder.createUncondBrInst(funcBodyEntry, {}); + builder.setPosition(funcBodyEntry,funcBodyEntry->end()); // 将插入点设置到 funcBodyEntry + for (auto item : ctx->blockStmt()->blockItem()) { visitBlockItem(item); } - if(HasReturnInst == false) { - // 如果没有return语句,则默认返回0 - if (returnType != Type::getVoidType()) { - Value* returnValue = ConstantInteger::get(0); - if (returnType == Type::getFloatType()) { - returnValue = ConstantFloating::get(0.0f); - } - builder.createReturnInst(returnValue); + // 如果函数没有显式的返回语句,且返回类型不是 void,则需要添加一个默认的返回值 + ReturnInst* retinst = nullptr; + retinst = dynamic_cast(builder.getBasicBlock()->terminator()->get()); + + if (!retinst) { + if (returnType->isVoid()) { + builder.createReturnInst(); + } else if (returnType->isInt()) { + builder.createReturnInst(ConstantInteger::get(0)); // 默认返回 0 + } else if (returnType->isFloat()) { + builder.createReturnInst(ConstantFloating::get(0.0f)); // 默认返回 0.0f } else { - builder.createReturnInst(); + assert(false && "Function with no explicit return and non-void type should return a value."); } } + module->leaveScope(); return std::any(); @@ -549,6 +560,7 @@ std::any SysYIRGenerator::visitIfStmt(SysYParser::IfStmtContext *ctx) { ctx->stmt(0)->accept(this); module->leaveScope(); } + builder.createUncondBrInst(exitBlock, {}); BasicBlock::conectBlocks(builder.getBasicBlock(), exitBlock); labelstring << "if_exit.L" << builder.getLabelIndex(); @@ -570,6 +582,7 @@ std::any SysYIRGenerator::visitWhileStmt(SysYParser::WhileStmtContext *ctx) { labelstring << "while_head.L" << builder.getLabelIndex(); BasicBlock *headBlock = function->addBasicBlock(labelstring.str()); labelstring.str(""); + builder.createUncondBrInst(headBlock, {}); BasicBlock::conectBlocks(curBlock, headBlock); builder.setPosition(headBlock, headBlock->end()); @@ -654,7 +667,7 @@ std::any SysYIRGenerator::visitReturnStmt(SysYParser::ReturnStmtContext *ctx) { } } builder.createReturnInst(returnValue); - HasReturnInst = true; + return std::any(); } diff --git a/src/include/SysYIRGenerator.h b/src/include/SysYIRGenerator.h index 9dfd7c5..66ce11c 100644 --- a/src/include/SysYIRGenerator.h +++ b/src/include/SysYIRGenerator.h @@ -62,8 +62,6 @@ private: public: SysYIRGenerator() = default; - bool HasReturnInst; - public: Module *get() const { return module.get(); } IRBuilder *getBuilder(){ return &builder; } From 550f4017bed3c68e8ef485d73f085d8c1823c9e7 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Mon, 21 Jul 2025 15:19:38 +0800 Subject: [PATCH 16/35] =?UTF-8?q?[midend]=E9=87=8D=E6=9E=84=E4=B8=AD?= =?UTF-8?q?=E7=AB=AF=EF=BC=8C=E5=BB=BA=E7=AB=8B=E9=81=8D=E7=AE=A1=E7=90=86?= =?UTF-8?q?=E5=99=A8=EF=BC=8C=E6=B3=A8=E5=86=8C=E5=99=A8=E7=AD=89=EF=BC=8C?= =?UTF-8?q?=E5=88=9D=E6=AD=A5=E6=9E=84=E5=BB=BA=E6=94=AF=E9=85=8D=E6=A0=91?= =?UTF-8?q?=E5=88=86=E6=9E=90=E9=81=8D=EF=BC=8C=E5=A2=9E=E5=8A=A0=E5=9F=BA?= =?UTF-8?q?=E6=9C=AC=E5=9D=97=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Dom.cpp | 181 ++++++++++++++++++++++ src/include/Dom.h | 52 +++++++ src/include/IR.h | 19 ++- src/include/Pass.h | 284 +++++++++++++++++++++++++++++++++++ src/include/SysYIROptUtils.h | 3 + 5 files changed, 538 insertions(+), 1 deletion(-) create mode 100644 src/Dom.cpp create mode 100644 src/include/Dom.h create mode 100644 src/include/Pass.h diff --git a/src/Dom.cpp b/src/Dom.cpp new file mode 100644 index 0000000..beb1d65 --- /dev/null +++ b/src/Dom.cpp @@ -0,0 +1,181 @@ +#include "Dom.h" +#include // for std::numeric_limits +#include + +namespace sysy { + +// 初始化 支配树静态 ID +char DominatorTreeAnalysisPass::ID = 0; + +// ============================================================== +// DominatorTree 结果类的实现 +// ============================================================== + +DominatorTree::DominatorTree(Function *F) : AssociatedFunction(F) { + // 构造时可以不计算,在分析遍运行里计算并填充 +} + +const std::set *DominatorTree::getDominators(BasicBlock *BB) const { + auto it = Dominators.find(BB); + if (it != Dominators.end()) { + return &(it->second); + } + return nullptr; +} + +BasicBlock *DominatorTree::getImmediateDominator(BasicBlock *BB) const { + auto it = IDoms.find(BB); + if (it != IDoms.end()) { + return it->second; + } + return nullptr; +} + +const std::set *DominatorTree::getDominanceFrontier(BasicBlock *BB) const { + auto it = DominanceFrontiers.find(BB); + if (it != DominanceFrontiers.end()) { + return &(it->second); + } + return nullptr; +} + +void DominatorTree::computeDominators(Function *F) { + // 经典的迭代算法计算支配者集合 + // TODO: 可以替换为更高效的算法,如 Lengauer-Tarjan 算法 + BasicBlock *entryBlock = F->getEntryBlock(); + + for (const auto &bb_ptr : F->getBasicBlocks()) { + BasicBlock *bb = bb_ptr.get(); + if (bb == entryBlock) { + Dominators[bb].insert(bb); + } else { + for (const auto &all_bb_ptr : F->getBasicBlocks()) { + Dominators[bb].insert(all_bb_ptr.get()); + } + } + } + + bool changed = true; + while (changed) { + changed = false; + for (const auto &bb_ptr : F->getBasicBlocks()) { + BasicBlock *bb = bb_ptr.get(); + if (bb == entryBlock) + continue; + + std::set newDom; + bool firstPred = true; + for (BasicBlock *pred : bb->getPredecessors()) { + if (Dominators.count(pred)) { + if (firstPred) { + newDom = Dominators[pred]; + firstPred = false; + } else { + std::set intersection; + std::set_intersection(newDom.begin(), newDom.end(), Dominators[pred].begin(), Dominators[pred].end(), + std::inserter(intersection, intersection.begin())); + newDom = intersection; + } + } + } + newDom.insert(bb); + + if (newDom != Dominators[bb]) { + Dominators[bb] = newDom; + changed = true; + } + } + } +} + +void DominatorTree::computeIDoms(Function *F) { + // 采用与之前类似的简化实现。TODO:Lengauer-Tarjan等算法。 + BasicBlock *entryBlock = F->getEntryBlock(); + IDoms[entryBlock] = nullptr; + + for (const auto &bb_ptr : F->getBasicBlocks()) { + BasicBlock *bb = bb_ptr.get(); + if (bb == entryBlock) + continue; + + BasicBlock *currentIDom = nullptr; + const std::set *domsOfBB = getDominators(bb); + if (!domsOfBB) + continue; + + for (BasicBlock *D : *domsOfBB) { + if (D == bb) + continue; + + bool isCandidateIDom = true; + for (BasicBlock *candidate : *domsOfBB) { + if (candidate == bb || candidate == D) + continue; + const std::set *domsOfCandidate = getDominators(candidate); + if (domsOfCandidate && domsOfCandidate->count(D) == 0 && domsOfBB->count(candidate)) { + isCandidateIDom = false; + break; + } + } + if (isCandidateIDom) { + currentIDom = D; + break; + } + } + IDoms[bb] = currentIDom; + } +} + +void DominatorTree::computeDominanceFrontiers(Function *F) { + // 经典的支配边界计算算法 + for (const auto &bb_ptr_X : F->getBasicBlocks()) { + BasicBlock *X = bb_ptr_X.get(); + DominanceFrontiers[X].clear(); + + for (BasicBlock *Y : X->getSuccessors()) { + const std::set *domsOfY = getDominators(Y); + if (domsOfY && domsOfY->find(X) == domsOfY->end()) { + DominanceFrontiers[X].insert(Y); + } + } + + const std::set *domsOfX = getDominators(X); + if (!domsOfX) + continue; + for (const auto &bb_ptr_Z : F->getBasicBlocks()) { + BasicBlock *Z = bb_ptr_Z.get(); + if (Z == X) + continue; + const std::set *domsOfZ = getDominators(Z); + if (domsOfZ && domsOfZ->count(X) && Z != X) { + + for (BasicBlock *Y : Z->getSuccessors()) { + const std::set *domsOfY = getDominators(Y); + if (domsOfY && domsOfY->find(X) == domsOfY->end()) { + DominanceFrontiers[X].insert(Y); + } + } + } + } + } +} + +// ============================================================== +// DominatorTreeAnalysisPass 的实现 +// ============================================================== + + +bool DominatorTreeAnalysisPass::runOnFunction(Function* F) { + CurrentDominatorTree = std::make_unique(F); + CurrentDominatorTree->computeDominators(F); + CurrentDominatorTree->computeIDoms(F); + CurrentDominatorTree->computeDominanceFrontiers(F); + return false; +} + +std::unique_ptr DominatorTreeAnalysisPass::getResult() { + // 返回计算好的 DominatorTree 实例,所有权转移给 AnalysisManager + return std::move(CurrentDominatorTree); +} + +} // namespace sysy \ No newline at end of file diff --git a/src/include/Dom.h b/src/include/Dom.h new file mode 100644 index 0000000..6b38f83 --- /dev/null +++ b/src/include/Dom.h @@ -0,0 +1,52 @@ +#pragma once + +#include "Pass.h" // 包含 Pass 框架 +#include "IR.h" // 包含 IR 定义 +#include +#include +#include +#include + +namespace sysy { + +// 支配树分析结果类 (保持不变) +class DominatorTree : public AnalysisResultBase { +public: + DominatorTree(Function* F); + const std::set* getDominators(BasicBlock* BB) const; + BasicBlock* getImmediateDominator(BasicBlock* BB) const; + const std::set* getDominanceFrontier(BasicBlock* BB) const; + const std::map>& getDominatorsMap() const { return Dominators; } + const std::map& getIDomsMap() const { return IDoms; } + const std::map>& getDominanceFrontiersMap() const { return DominanceFrontiers; } + void computeDominators(Function* F); + void computeIDoms(Function* F); + void computeDominanceFrontiers(Function* F); +private: + Function* AssociatedFunction; + std::map> Dominators; + std::map IDoms; + std::map> DominanceFrontiers; +}; + + +// 支配树分析遍 +class DominatorTreeAnalysisPass : public AnalysisPass { +public: + // 唯一的 Pass ID + static char ID; // LLVM 风格的唯一 ID + + DominatorTreeAnalysisPass() : AnalysisPass("DominatorTreeAnalysis", Pass::Granularity::Function) {} + + // 实现 getPassID + void* getPassID() const override { return &ID; } + + bool runOnFunction(Function* F) override; + + std::unique_ptr getResult() override; + +private: + std::unique_ptr CurrentDominatorTree; +}; + +} // namespace sysy \ No newline at end of file diff --git a/src/include/IR.h b/src/include/IR.h index 6e35715..2abe3e1 100644 --- a/src/include/IR.h +++ b/src/include/IR.h @@ -522,12 +522,20 @@ public: void setParent(Function *func) { parent = func; } inst_list& getInstructions() { return instructions; } arg_list& getArguments() { return arguments; } - const block_list& getPredecessors() const { return predecessors; } + block_list& getPredecessors() { return predecessors; } + void clearPredecessors() { predecessors.clear(); } block_list& getSuccessors() { return successors; } + void clearSuccessors() { successors.clear(); } iterator begin() { return instructions.begin(); } iterator end() { return instructions.end(); } iterator terminator() { return std::prev(end()); } void insertArgument(AllocaInst *inst) { arguments.push_back(inst); } + bool hasSuccessor(BasicBlock *block) const { + return std::find(successors.begin(), successors.end(), block) != successors.end(); + } ///< 判断是否有后继块 + bool hasPredecessor(BasicBlock *block) const { + return std::find(predecessors.begin(), predecessors.end(), block) != predecessors.end(); + } ///< 判断是否有前驱块 void addPredecessor(BasicBlock *block) { if (std::find(predecessors.begin(), predecessors.end(), block) == predecessors.end()) { predecessors.push_back(block); @@ -580,6 +588,15 @@ public: next->addPredecessor(prev); } void removeInst(iterator pos) { instructions.erase(pos); } + void removeInst(Instruction *inst) { + auto pos = std::find_if(instructions.begin(), instructions.end(), + [inst](const std::unique_ptr &i) { return i.get() == inst; }); + if (pos != instructions.end()) { + instructions.erase(pos); + } else { + assert(false && "Instruction not found in BasicBlock"); + } + } ///< 移除指定位置的指令 iterator moveInst(iterator sourcePos, iterator targetPos, BasicBlock *block); }; diff --git a/src/include/Pass.h b/src/include/Pass.h new file mode 100644 index 0000000..063462f --- /dev/null +++ b/src/include/Pass.h @@ -0,0 +1,284 @@ +#pragma once + +#include // For std::function +#include +#include +#include +#include +#include // For std::type_index (although void* ID is more common in LLVM) +#include + +namespace sysy { + +// 抽象基类:分析结果 +class AnalysisResultBase { +public: + virtual ~AnalysisResultBase() = default; +}; + +// 抽象基类:Pass +class Pass { +public: + enum class Granularity { Module, Function, BasicBlock }; + + enum class PassKind { Analysis, Optimization }; + + Pass(const std::string &name, Granularity g, PassKind k) : Name(name), G(g), K(k) {} + virtual ~Pass() = default; + + const std::string &getName() const { return Name; } + Granularity getGranularity() const { return G; } + PassKind getPassKind() const { return K; } + + virtual bool runOnModule(Module *M, AnalysisManager& AM) { return false; } + virtual bool runOnFunction(Function *F, AnalysisManager& AM) { return false; } + virtual bool runOnBasicBlock(BasicBlock *BB, AnalysisManager& AM) { return false; } + + // 所有 Pass 都必须提供一个唯一的 ID + // 这通常是一个静态成员,并在 Pass 类外部定义 + virtual void *getPassID() const = 0; + +protected: + std::string Name; + Granularity G; + PassKind K; +}; + +// 抽象基类:分析遍 +class AnalysisPass : public Pass { +public: + AnalysisPass(const std::string &name, Granularity g) : Pass(name, g, PassKind::Analysis) {} + + virtual std::unique_ptr getResult() = 0; +}; + +// 抽象基类:优化遍 +class OptimizationPass : public Pass { +public: + OptimizationPass(const std::string &name, Granularity g) : Pass(name, g, PassKind::Optimization) {} + + virtual void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const { + // 默认不依赖也不修改任何分析 + } +}; + +// ====================================================================== +// PassRegistry: 全局 Pass 注册表 (单例) +// ====================================================================== +class PassRegistry { +public: + // Pass 工厂函数类型:返回 Pass 的唯一指针 + using PassFactory = std::function()>; + + // 获取 PassRegistry 实例 (单例模式) + static PassRegistry &getPassRegistry() { + static PassRegistry instance; + return instance; + } + + // 注册一个 Pass + // passID 是 Pass 类的唯一静态 ID (例如 MyPass::ID 的地址) + // factory 是一个 lambda 或函数指针,用于创建该 Pass 的实例 + void registerPass(void *passID, PassFactory factory) { + if (factories.count(passID)) { + // Error: Pass with this ID already registered + // You might want to throw an exception or log an error + return; + } + factories[passID] = std::move(factory); + } + + // 通过 Pass ID 创建一个 Pass 实例 + std::unique_ptr createPass(void *passID) { + auto it = factories.find(passID); + if (it == factories.end()) { + // Error: Pass with this ID not registered + return nullptr; + } + return it->second(); // 调用工厂函数创建实例 + } + +private: + PassRegistry() = default; // 私有构造函数,实现单例 + ~PassRegistry() = default; + PassRegistry(const PassRegistry &) = delete; // 禁用拷贝构造 + PassRegistry &operator=(const PassRegistry &) = delete; // 禁用赋值操作 + + std::map factories; +}; + +// ====================================================================== +// AnalysisManager: 负责管理和提供分析结果 +// ====================================================================== +class AnalysisManager { +public: + AnalysisManager() = default; + ~AnalysisManager() = default; + + // 获取分析结果 + // T 是 AnalysisResult 的具体类型,E 是 AnalysisPass 的具体类型 + // PassManager 应该在运行 Pass 之前调用 registerAnalysisPass + template T *getAnalysisResult(Function *F) { // 针对函数级别的分析,需要传入 Function* + void *analysisID = E::ID; // 获取分析遍的唯一 ID + + // 检查是否已存在有效结果 + auto it = cachedResults.find({F, analysisID}); + if (it != cachedResults.end()) { + return static_cast(it->second.get()); // 返回缓存结果 + } + + // 如果没有缓存结果,通过 PassRegistry 创建分析遍并运行它 + // 注意:这里需要 PassRegistry 实例。如果 AnalysisManager 独立于 PassManager, + // 则需要传入 PassRegistry 引用或指针。 + // 为了简化,假设 AnalysisManager 能够访问到 PassRegistry + std::unique_ptr basePass = PassRegistry::getPassRegistry().createPass(analysisID); + if (!basePass) { + // Error: Analysis pass not registered + return nullptr; + } + + AnalysisPass *analysisPass = static_cast(basePass.get()); + + // 确保分析遍的粒度与请求的上下文匹配 + if (analysisPass->getGranularity() == Pass::Granularity::Function) { + analysisPass->runOnFunction(F); // 运行分析遍 + // 获取结果并缓存 + std::unique_ptr result = analysisPass->getResult(); + T *specificResult = static_cast(result.get()); + cachedResults[{F, analysisID}] = std::move(result); // 缓存结果 + return specificResult; + } + // TODO: 处理 Module 或 BasicBlock 粒度的分析 + + return nullptr; + } + + // 使所有或特定分析结果失效 (当 IR 被修改时调用) + void invalidateAllAnalyses() { cachedResults.clear(); } + + // 使特定分析结果失效 + void invalidateAnalysis(void *analysisID, Function *F = nullptr) { + if (F) { + // 使特定函数的特定分析结果失效 + cachedResults.erase({F, analysisID}); + } else { + // 使所有函数的特定分析结果失效 + std::map, std::unique_ptr> newCachedResults; + for (auto &pair : cachedResults) { + if (pair.first.second != analysisID) { + newCachedResults.insert(std::move(pair)); + } + } + cachedResults = std::move(newCachedResults); + } + } + +private: + std::map, std::unique_ptr> cachedResults; +}; + +// ====================================================================== +// PassManager:遍管理器 +// ====================================================================== +class PassManager { + + Module *pmodule; + AnalysisManager &AM; // 引用 AnalysisManager,用于获取分析结果 + +public: + PassManager() = default; + ~PassManager() = default; + + // 添加遍:现在接受 Pass 的 ID,而不是直接的 unique_ptr + void addPass(void *passID) { + PassRegistry ®istry = PassRegistry::getPassRegistry(); + std::unique_ptr P = registry.createPass(passID); + if (!P) { + // Error: Pass not found or failed to create + return; + } + + passes.push_back(std::move(P)); + } + + // 运行所有注册的遍 + bool run(Module *M) { + bool changed = false; + for (const auto &p : passes) { + bool passChanged = false; // 记录当前遍是否修改了 IR + + // 处理优化遍的分析依赖和失效 + if (p->getPassKind() == Pass::PassKind::Optimization) { + OptimizationPass *optPass = static_cast(p.get()); + std::set analysisDependencies; + std::set analysisInvalidations; + optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); + + // PassManager 不显式运行分析依赖。 + // 而是优化遍在 runOnFunction 内部通过 AnalysisManager.getAnalysisResult 按需请求。 + } + + if (p->getGranularity() == Pass::Granularity::Module) { + passChanged = p->runOnModule(M, AM); + } else if (p->getGranularity() == Pass::Granularity::Function) { + for (auto &funcPair : M->getFunctions()) { + Function *F = funcPair.second.get(); + passChanged = p->runOnFunction(F, AM) || passChanged; + + if (passChanged && p->getPassKind() == Pass::PassKind::Optimization) { + OptimizationPass *optPass = static_cast(p.get()); + std::set analysisDependencies; + std::set analysisInvalidations; + optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); + for (void *invalidationID : analysisInvalidations) { + analysisManager.invalidateAnalysis(invalidationID, F); + } + } + } + } else if (p->getGranularity() == Pass::Granularity::BasicBlock) { + for (auto &funcPair : M->getFunctions()) { + Function *F = funcPair.second.get(); + for (auto &bbPtr : funcPair.second->getBasicBlocks()) { + passChanged = p->runOnBasicBlock(bbPtr.get(), AM) || passChanged; + + if (passChanged && p->getPassKind() == Pass::PassKind::Optimization) { + OptimizationPass *optPass = static_cast(p.get()); + std::set analysisDependencies; + std::set analysisInvalidations; + optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); + for (void *invalidationID : analysisInvalidations) { + analysisManager.invalidateAnalysis(invalidationID, F); + } + } + } + } + } + changed = changed || passChanged; + } + return changed; + } + + AnalysisManager &getAnalysisManager() { return analysisManager; } + +private: + std::vector> passes; + AnalysisManager analysisManager; +}; + +// ====================================================================== +// 辅助宏或函数,用于简化 Pass 的注册 +// ====================================================================== + +// 用于分析遍的注册 +template void registerAnalysisPass() { + PassRegistry::getPassRegistry().registerPass(&AnalysisPassType::ID, + []() { return std::make_unique(); }); +} + +// 用于优化遍的注册 +template void registerOptimizationPass() { + PassRegistry::getPassRegistry().registerPass(&OptimizationPassType::ID, + []() { return std::make_unique(); }); +} + +} // namespace sysy \ No newline at end of file diff --git a/src/include/SysYIROptUtils.h b/src/include/SysYIROptUtils.h index d2d2e55..66929d1 100644 --- a/src/include/SysYIROptUtils.h +++ b/src/include/SysYIROptUtils.h @@ -11,11 +11,14 @@ class SysYIROptUtils{ public: // 删除use关系 + // 根据指令的使用情况删除其所有的use关系 + // 找到指令的所有使用者,并从它们的使用列表中删除该指令 static void usedelete(Instruction *instr) { for (auto &use : instr->getOperands()) { Value* val = use->getValue(); val->removeUse(use); } + instr->getParent()->removeInst(instr); // 从基本块中删除指令 } // 判断是否是全局变量 From a72fc541fbee91f8fc64930322434280434efc56 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Mon, 21 Jul 2025 15:20:46 +0800 Subject: [PATCH 17/35] =?UTF-8?q?[midend]=E6=B4=BB=E8=B7=83=E5=8F=98?= =?UTF-8?q?=E9=87=8F=E5=88=86=E6=9E=90=EF=BC=8CCFG=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E9=81=8D=E9=87=8D=E5=86=99=E3=80=82=E8=BF=98=E6=9C=AA=E8=B7=91?= =?UTF-8?q?=E9=80=9A=EF=BC=8C=E6=9A=82=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/CFGOptPass.cpp | 706 +++++++++++++++++++++++++++++++++++++++ src/Liveness.cpp | 145 ++++++++ src/include/CFGOptPass.h | 40 +++ src/include/Liveness.h | 72 ++++ 4 files changed, 963 insertions(+) create mode 100644 src/CFGOptPass.cpp create mode 100644 src/Liveness.cpp create mode 100644 src/include/CFGOptPass.h create mode 100644 src/include/Liveness.h diff --git a/src/CFGOptPass.cpp b/src/CFGOptPass.cpp new file mode 100644 index 0000000..224a553 --- /dev/null +++ b/src/CFGOptPass.cpp @@ -0,0 +1,706 @@ +#include "CFGOptPass.h" // 包含新的 CFG 优化遍的头文件 +#include "Dom.h" // CFG修改会使支配树失效,包含头文件 +#include "IR.h" +#include "IRBuilder.h" +#include "Liveness.h" // CFG修改会使活跃变量分析失效,包含头文件 +#include "SysYIROptUtils.h" // 包含您提供的 SysYIROptUtils +#include +#include +#include +#include +#include +#include // For SysYDelNoPreBLock +#include + +namespace sysy { + +char CFGOptimizationPass::ID = 0; // 初始化静态 ID + +// 声明分析依赖和失效 +void CFGOptimizationPass::getAnalysisUsage(std::set &analysisDependencies, + std::set &analysisInvalidations) const { + // CFG 优化会改变控制流图,因此会使大部分数据流分析失效。 + // 特别是支配树和活跃变量分析。 + analysisInvalidations.insert(&DominatorTreeAnalysisPass::ID); + analysisInvalidations.insert(&LivenessAnalysisPass::ID); + // TODO: 如果有其他分析(如数据流分析)也可能失效,需要在此处添加 +} + +// ====================================================================== +// 静态 CFG 优化辅助函数的实现 +// 大部分代码直接从您提供的 SysYIRCFGOpt.cpp 复制过来 +// 并根据新的 PhiInst 定义调整了 Phi 节点处理逻辑 +// ====================================================================== + +bool CFGOptimizationPass::SysYDelInstAfterBr(Function *func) { + bool changed = false; + // 使用迭代器安全的遍历,因为可能会删除指令 + for (auto &basicBlock : func->getBasicBlocks()) { + if (!basicBlock) + continue; // 确保基本块有效 + + bool terminatorFound = false; + auto terminatorIter = basicBlock->getInstructions().end(); // 迭代器指向终止指令 + + // 查找终止指令并标记其后的指令进行删除 + for (auto iter = basicBlock->getInstructions().begin(); iter != basicBlock->getInstructions().end(); ++iter) { + if (terminatorFound) { + // 如果已经找到终止指令,则当前指令是无用指令删除指令 + SysYIROptUtils::usedelete(iter->get()); + } else if ((*iter)->isTerminator()) { + terminatorFound = true; + terminatorIter = iter; + } + } + + // 删除终止指令后的所有指令 + if (terminatorFound) { + auto currentIter = std::next(terminatorIter); // 从终止指令的下一个开始删除 + while (currentIter != basicBlock->getInstructions().end()) { + changed = true; + currentIter = basicBlock->getInstructions().erase(currentIter); + } + } + + // 更新前驱后继关系:由于可能删除了旧的终止指令并改变了控制流 + // 最好是先清除旧的关系,然后根据最新的终止指令重新建立关系 + if (terminatorFound) { + Instruction *currentTerminator = + basicBlock->getInstructions().empty() ? nullptr : basicBlock->getInstructions().back().get(); + if (!currentTerminator || !currentTerminator->isTerminator()) { + // 这是一种错误情况,块应该总是以终止指令结束 + // 或者说,如果删除了唯一的终止指令,那么块就没有后继了,需要后续优化来修复 + // 暂时跳过更新,让其他优化(如 SysYAddReturn)来处理 + continue; + } + + // 清除旧的后继关系 + // 注意:这里需要复制一份后继列表,因为在循环中修改原列表会使迭代器失效 + std::vector oldSuccessors(basicBlock->getSuccessors().begin(), basicBlock->getSuccessors().end()); + for (BasicBlock *succ : oldSuccessors) { + if (succ) { + succ->removePredecessor(basicBlock.get()); + basicBlock->removeSuccessor(succ); + } + } + + // 根据最新的终止指令重新建立新的后继关系 + if (currentTerminator->isUnconditional()) { + BasicBlock *branchBlock = dynamic_cast(currentTerminator->getOperand(0)); + if (branchBlock) { + basicBlock->addSuccessor(branchBlock); + branchBlock->addPredecessor(basicBlock.get()); + } + } else if (currentTerminator->isConditional()) { + BasicBlock *thenBlock = dynamic_cast(currentTerminator->getOperand(1)); + BasicBlock *elseBlock = dynamic_cast(currentTerminator->getOperand(2)); + if (thenBlock) { + basicBlock->addSuccessor(thenBlock); + thenBlock->addPredecessor(basicBlock.get()); + } + if (elseBlock && thenBlock != elseBlock) { // 避免重复添加相同后继 + basicBlock->addSuccessor(elseBlock); + elseBlock->addPredecessor(basicBlock.get()); + } + } + } + } + return changed; +} + +bool CFGOptimizationPass::SysYBlockMerge(Function *func) { + bool changed = false; + + // 使用迭代器安全的循环来遍历和删除 + for (auto blockiter = func->getBasicBlocks().begin(); blockiter != func->getBasicBlocks().end();) { + BasicBlock *currentBlock = blockiter->get(); + if (!currentBlock) { // 防止空指针 + ++blockiter; + continue; + } + + // 入口块不能被合并到前一个块(它没有前一个块),但可以作为目标块被合并 + if (currentBlock == func->getEntryBlock() && currentBlock->getNumPredecessors() == 0) { + ++blockiter; + continue; + } + + // 如果当前块只有一个后继块 + if (currentBlock->getNumSuccessors() == 1) { + BasicBlock *nextBlock = currentBlock->getSuccessors()[0]; + if (!nextBlock) { // 后继块无效 + ++blockiter; + continue; + } + + // 且后继块只有一个前驱块(这是合并的条件之一) + if (nextBlock->getNumPredecessors() == 1 && nextBlock->getPredecessors()[0] == currentBlock) { + // std::cout << "merge block: " << currentBlock->getName() << " with " << nextBlock->getName() << std::endl; + + // 删除 currentBlock 最后的 br 指令 + if (!currentBlock->getInstructions().empty()) { + Instruction *lastInst = currentBlock->getInstructions().back().get(); + if (lastInst->isTerminator()) { + SysYIROptUtils::usedelete(lastInst); + // 从指令列表中移除 + currentBlock->getInstructions().pop_back(); + } + } + + // 处理 Phi 指令: + // 如果 nextBlock 包含 Phi 指令,需要将这些 Phi 指令的操作数进行处理 + // 因为 nextBlock 的唯一前驱是 currentBlock,这些 Phi 指令在合并后变得多余。 + // 它们的值可以直接替换为来自 currentBlock 的值。 + // 然后删除这些 Phi 指令。 + auto nextBlockInstIter = nextBlock->getInstructions().begin(); + while (nextBlockInstIter != nextBlock->getInstructions().end()) { + if ((*nextBlockInstIter)->isPhi()) { + PhiInst *phi = dynamic_cast(nextBlockInstIter->get()); + if (phi) { + // 找到 Phi 对应 currentBlock 的传入值 + Value *incomingVal = phi->getvalfromBlk(currentBlock); + if (incomingVal) { + phi->replaceAllUsesWith(incomingVal); // 替换所有使用 + SysYIROptUtils::usedelete(phi); // 删除 phi 指令 + nextBlockInstIter = nextBlock->getInstructions().erase(nextBlockInstIter); + changed = true; + continue; // 继续检查下一个指令 + } + } + } else { + break; // Phi 指令总是在基本块的开头 + } + ++nextBlockInstIter; + } + + // 将 nextBlock 的指令移动到 currentBlock + for (auto institer = nextBlock->begin(); institer != nextBlock->end();) { + institer->get()->setParent(currentBlock); + currentBlock->getInstructions().emplace_back(institer->release()); // 移动 unique_ptr + institer = nextBlock->getInstructions().erase(institer); + } + + // 合并参数 (如果 nextBlock 有 Arguments) + for (auto &argm : nextBlock->getArguments()) { + argm->setParent(currentBlock); // 更新父指针 + currentBlock->insertArgument(argm); // 将参数插入到 currentBlock + } + nextBlock->getArguments().clear(); // 清空 nextBlock 的参数列表 + + // 更新前驱后继关系 + // 清理 nextBlock 与 currentBlock 之间的关系 + currentBlock->removeSuccessor(nextBlock); + nextBlock->removePredecessor(currentBlock); + + // 将 nextBlock 的所有后继转移到 currentBlock + std::vector nextBlockSuccessors(nextBlock->getSuccessors().begin(), + nextBlock->getSuccessors().end()); + for (BasicBlock *succ : nextBlockSuccessors) { + if (succ) { + currentBlock->addSuccessor(succ); + succ->replacePredecessor(nextBlock, currentBlock); // 更新后继块的前驱 + nextBlock->removeSuccessor(succ); // 从 nextBlock 移除,避免重复处理 + } + } + + // 从函数中删除 nextBlock + func->removeBasicBlock(nextBlock); + changed = true; + // 保持 blockiter 不变,以便在下一次循环中重新检查当前的 currentBlock + // 因为它的新后继可能现在又满足合并条件了 + } else { + ++blockiter; // 不满足合并条件,移动到下一个块 + } + } else { + ++blockiter; // 不满足合并条件,移动到下一个块 + } + } + return changed; +} + +bool CFGOptimizationPass::SysYDelNoPreBLock(Function *func) { + bool changed = false; + + // 标记所有块为不可达 + for (auto &block_ptr : func->getBasicBlocks()) { + if (block_ptr) + block_ptr->setreachableFalse(); + } + + // 从入口块开始进行可达性分析 (BFS) + BasicBlock *entryBlock = func->getEntryBlock(); + if (!entryBlock) + return false; // 没有入口块,则无需处理 + + entryBlock->setreachableTrue(); + std::queue blockqueue; + blockqueue.push(entryBlock); + while (!blockqueue.empty()) { + BasicBlock *block = blockqueue.front(); + blockqueue.pop(); + if (block) { + for (auto &succ : block->getSuccessors()) { + if (succ && !succ->getreachable()) { + succ->setreachableTrue(); + blockqueue.push(succ); + } + } + } + } + + // 遍历所有块,删除不可达块 + + for (auto blockIter = func->getBasicBlocks_NoRange().begin(); blockIter != func->getBasicBlocks_NoRange().end();) { + BasicBlock *currentBlock = blockIter->get(); + if (!currentBlock) { + // 如果当前块是空指针,直接跳过 + blockIter = func->getBasicBlocks_NoRange().erase(blockIter); + changed = true; + continue; + } + + if (!currentBlock->getreachable()) { + // 入口块不可删除 + if (currentBlock == func->getEntryBlock()) { + ++blockIter; + continue; + } + + // 删除不可达基本块内的所有指令 + // 由于 usedelete 会从父块中移除指令,这里直接遍历并调用即可 + auto instsToProcess = currentBlock->getInstructions(); // 复制一份,避免迭代器失效 + for (auto &iterInst_ptr : instsToProcess) { + if (iterInst_ptr) + SysYIROptUtils::usedelete(iterInst_ptr.get()); + } + + // 处理 Phi 指令:移除指向该不可达块的 Phi 操作数 + // 遍历所有后继块的 Phi 指令,移除与 currentBlock 相关的传入值 + std::vector successorsCopy(currentBlock->getSuccessors().begin(), + currentBlock->getSuccessors().end()); + for (BasicBlock *succblock : successorsCopy) { + if (!succblock) + continue; + // 遍历后继块的指令,只处理 Phi 指令(它们在块的开头) + for (auto &phiinst_ptr : succblock->getInstructions()) { + if (phiinst_ptr->getKind() != Instruction::kPhi) { + break; // Phi 指令都在块的开头 + } + PhiInst *phi = dynamic_cast(phiinst_ptr.get()); + if (phi) { + // 使用 PhiInst 的 delBlk 方法来移除与当前被删除块相关的传入值 + phi->delBlk(currentBlock); + } + } + // 更新后继块的前驱列表 (非常重要,因为 currentBlock 要被删除了) + succblock->removePredecessor(currentBlock); + } + // 清空 currentBlock 的后继,因为它将不复存在 + currentBlock->clearPredecessors(); // 清空前驱列表 + currentBlock->clearSuccessors(); // 清空后继列表 + + // 从函数中删除基本块 + blockIter = func->getBasicBlocks_NoRange().erase(blockIter); + changed = true; + } else { + ++blockIter; + } + } + return changed; +} + +bool CFGOptimizationPass::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) { + bool changed = false; + + // 收集所有“空”基本块(没有实际指令,或只有Phi和UncondBr)及其目标 + // map: 空块 -> 其唯一后继 (如果存在) + std::map EmptyBlocksMap; + + // 第一次遍历:识别空块及其跳转目标 + for (auto &basicBlock_ptr : func->getBasicBlocks()) { + BasicBlock *basicBlock = basicBlock_ptr.get(); + if (!basicBlock) + continue; + + // 判断是否是空块:没有指令或者只有 Phi 和一个终止指令 + bool isEmptyCandidate = true; + Instruction *terminatorInst = nullptr; + + if (basicBlock->getNumInstructions() == 0) { + isEmptyCandidate = true; // 完全空块 + } else { + // 检查除了最后一个指令之外是不是只有phi指令 + for(auto &inst_ptr : basicBlock->getInstructions()) { + Instruction *inst = inst_ptr.get(); + if (!inst->isPhi() && !inst->isTerminator()) { + isEmptyCandidate = false; // 有其他类型的指令 + break; + } + } + // for (size_t i = 0; i < basicBlock->getNumInstructions(); ++i) { + // Instruction *inst = basicBlock->getInstructions()[i].get(); + // if (inst->isTerminator()) { + // terminatorInst = inst; + // // 如果终止指令不是最后一个,那这个块有问题 + // if (i != basicBlock->getNumInstructions() - 1) { + // isEmptyCandidate = false; + // break; + // } + // } else if (!inst->isPhi()) { // 除了 phi 和终止指令,还有其他指令 + // isEmptyCandidate = false; + // break; + // } + // } + } + + if (isEmptyCandidate) { + if (terminatorInst && terminatorInst->isUnconditional()) { + if (basicBlock->getNumSuccessors() == 1) { // 只有一条无条件跳转 + EmptyBlocksMap[basicBlock] = dynamic_cast(terminatorInst->getOperand(0)); + } + } else if (!terminatorInst && basicBlock->getNumSuccessors() == 1) { + // 可能是完全空块,但没有终止指令,只有一个后继(需要IRBuilder补全) + // 这种情况下,它也构成空块链的一部分 + EmptyBlocksMap[basicBlock] = basicBlock->getSuccessors().front(); + } + // 如果是条件分支,不认为是“空块链”的中间节点 + } + } + + // 第二次遍历:更新前驱的跳转目标,跳过空块链 + for (auto &basicBlock_ptr : func->getBasicBlocks()) { + BasicBlock *basicBlock = basicBlock_ptr.get(); + if (!basicBlock) + continue; + + // EntryBlock 不参与空块链的删除,但可以重定向其内部跳转 + if (basicBlock == func->getEntryBlock() && EmptyBlocksMap.count(basicBlock)) { + // 如果入口块本身是空块,处理其跳转目标 + Instruction *lastInst = + basicBlock->getInstructions().empty() ? nullptr : basicBlock->getInstructions().back().get(); + if (lastInst && lastInst->isUnconditional()) { + BasicBlock *oldTargetBlock = dynamic_cast(lastInst->getOperand(0)); + BasicBlock *currentTargetBlock = oldTargetBlock; + while (EmptyBlocksMap.count(currentTargetBlock)) { + currentTargetBlock = EmptyBlocksMap[currentTargetBlock]; + } + if (currentTargetBlock != oldTargetBlock) { + changed = true; + // 更新前驱后继关系 + basicBlock->removeSuccessor(oldTargetBlock); + oldTargetBlock->removePredecessor(basicBlock); + + lastInst->replaceOperand(0, currentTargetBlock); + basicBlock->addSuccessor(currentTargetBlock); + currentTargetBlock->addPredecessor(basicBlock); + + // 处理 Phi 指令:将被跳过的空块替换为 currentBlock + for (auto &InstInNew_ptr : currentTargetBlock->getInstructions()) { + if (InstInNew_ptr->isPhi()) { + PhiInst *phi = dynamic_cast(InstInNew_ptr.get()); + if (phi) { + // 使用 replaceold2new 替换 phi 传入的基本块 + phi->replaceold2new(oldTargetBlock, basicBlock); + } + } else { + break; + } + } + } + } + continue; + } + + // 确保块有终止指令,如果没有,添加一个(防止后续处理崩溃) + // 这种情况通常发生在IR生成时没有为完全空的块插入跳转,或者前面优化删除了终止指令 + if (basicBlock->getNumInstructions() == 0 || !basicBlock->getInstructions().back()->isTerminator()) { + if (basicBlock->getNumSuccessors() == 1) { + pBuilder->setPosition(basicBlock, basicBlock->end()); + pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); + changed = true; // 添加了指令,所以有变化 + } + } + + auto lastInst = basicBlock->getInstructions().end(); + if (lastInst == basicBlock->getInstructions().begin()) { // 块是空的 + continue; + } + --lastInst; // 指向最后一个指令 + + if ((*lastInst)->isUnconditional()) { + BasicBlock *oldTargetBlock = dynamic_cast((*lastInst)->getOperand(0)); + BasicBlock *currentTargetBlock = oldTargetBlock; + + // 沿空块链查找最终目标 + while (EmptyBlocksMap.count(currentTargetBlock) && currentTargetBlock != func->getEntryBlock()) { + // 防止无限循环或将EntryBlock也视为空块 + currentTargetBlock = EmptyBlocksMap[currentTargetBlock]; + } + + if (currentTargetBlock != oldTargetBlock) { // 如果目标改变了 + changed = true; + // 更新前驱后继关系 + basicBlock->removeSuccessor(oldTargetBlock); + oldTargetBlock->removePredecessor(basicBlock); + + (*lastInst)->replaceOperand(0, currentTargetBlock); + basicBlock->addSuccessor(currentTargetBlock); + currentTargetBlock->addPredecessor(basicBlock); + + // 更新 Phi 指令:将被跳过的空块替换为 currentBlock + for (auto &InstInNew_ptr : currentTargetBlock->getInstructions()) { + if (InstInNew_ptr->isPhi()) { + PhiInst *phi = dynamic_cast(InstInNew_ptr.get()); + if (phi) { + // 使用 replaceold2new 替换 phi 传入的基本块 + phi->replaceold2new(oldTargetBlock, basicBlock); + } + } else { + break; + } + } + } + } else if ((*lastInst)->isConditional()) { + BasicBlock *oldThenBlock = dynamic_cast((*lastInst)->getOperand(1)); + BasicBlock *oldElseBlock = dynamic_cast((*lastInst)->getOperand(2)); + + BasicBlock *currentThenBlock = oldThenBlock; + BasicBlock *currentElseBlock = oldElseBlock; + + // 沿空块链查找最终目标 + while (EmptyBlocksMap.count(currentThenBlock) && currentThenBlock != func->getEntryBlock()) { + currentThenBlock = EmptyBlocksMap[currentThenBlock]; + } + while (EmptyBlocksMap.count(currentElseBlock) && currentElseBlock != func->getEntryBlock()) { + currentElseBlock = EmptyBlocksMap[currentElseBlock]; + } + + bool thenChanged = (currentThenBlock != oldThenBlock); + bool elseChanged = (currentElseBlock != oldElseBlock); + + if (thenChanged || elseChanged) { + changed = true; + // 更新前驱后继关系和 Phi 指令 + if (thenChanged) { + basicBlock->removeSuccessor(oldThenBlock); + oldThenBlock->removePredecessor(basicBlock); + (*lastInst)->replaceOperand(1, currentThenBlock); + basicBlock->addSuccessor(currentThenBlock); + currentThenBlock->addPredecessor(basicBlock); + + for (auto &InstInNew_ptr : currentThenBlock->getInstructions()) { + if (InstInNew_ptr->isPhi()) { + PhiInst *phi = dynamic_cast(InstInNew_ptr.get()); + if (phi) + phi->replaceold2new(oldThenBlock, basicBlock); + } else { + break; + } + } + } + if (elseChanged) { + basicBlock->removeSuccessor(oldElseBlock); + oldElseBlock->removePredecessor(basicBlock); + (*lastInst)->replaceOperand(2, currentElseBlock); + basicBlock->addSuccessor(currentElseBlock); + currentElseBlock->addPredecessor(basicBlock); + + for (auto &InstInNew_ptr : currentElseBlock->getInstructions()) { + if (InstInNew_ptr->isPhi()) { + PhiInst *phi = dynamic_cast(InstInNew_ptr.get()); + if (phi) + phi->replaceold2new(oldElseBlock, basicBlock); + } else { + break; + } + } + } + + // 处理 then 和 else 分支合并的情况 + if (currentThenBlock == currentElseBlock) { + SysYIROptUtils::usedelete(lastInst->get()); + basicBlock->getInstructions().erase(lastInst); + pBuilder->setPosition(basicBlock, basicBlock->end()); + pBuilder->createUncondBrInst(currentThenBlock, {}); + changed = true; + } + } + } + } + + // 第三次遍历:删除所有识别出来的空块 + for (auto iter = func->getBasicBlocks_NoRange().begin(); iter != func->getBasicBlocks_NoRange().end();) { + BasicBlock *currentBlock = iter->get(); + if (!currentBlock) { + iter = func->getBasicBlocks_NoRange().erase(iter); + changed = true; + continue; + } + + if (EmptyBlocksMap.count(currentBlock)) { + // EntryBlock 不能被删除 + if (currentBlock == func->getEntryBlock()) { + ++iter; + continue; + } + + // 删除空块内的所有指令 + auto instsToProcess = currentBlock->getInstructions(); // 复制一份 + for (auto &iterInst_ptr : instsToProcess) { + if (iterInst_ptr) + SysYIROptUtils::usedelete(iterInst_ptr.get()); + } + + // 更新其后继的前驱关系(如果之前没有完全清除,但由于 replaceold2new 已经处理了大部分) + // 这里主要为了确保被删除块的所有后继都移除了它作为前驱 + std::vector succsCopy(currentBlock->getSuccessors().begin(), currentBlock->getSuccessors().end()); + for (BasicBlock *succ : succsCopy) { + if (succ) + succ->removePredecessor(currentBlock); + } + // 清空其自身的前驱和后继 + currentBlock->getPredecessors().clear(); + currentBlock->getSuccessors().clear(); + + // 从函数中删除基本块 + iter = func->getBasicBlocks_NoRange().erase(iter); // erase 会返回下一个有效迭代器 + changed = true; + } else { + ++iter; + } + } + return changed; +} + +bool CFGOptimizationPass::SysYAddReturn(Function *func, IRBuilder *pBuilder) { + bool changed = false; + // 使用新的迭代器方式遍历 + for (auto &block_ptr : func->getBasicBlocks()) { + BasicBlock *block = block_ptr.get(); + if (!block) + continue; // 确保基本块有效 + + // 如果基本块没有后继(即是出口块) + if (block->getNumSuccessors() == 0) { + // 检查最后一个指令是否是返回指令 + if (block->getNumInstructions() == 0 || !block->getInstructions().back()->isReturn()) { + changed = true; + pBuilder->setPosition(block, block->end()); + if (func->getReturnType()->isInt()) { + pBuilder->createReturnInst(ConstantInteger::get(0)); + } else if (func->getReturnType()->isFloat()) { + pBuilder->createReturnInst(ConstantFloating::get(0.0F)); + } else { // Void 类型 + pBuilder->createReturnInst(); + } + } + } + } + return changed; +} + +bool CFGOptimizationPass::SysYCondBr2Br(Function *func, IRBuilder *pBuilder) { + bool changed = false; + + for (auto &basicblock_ptr : func->getBasicBlocks()) { + BasicBlock *basicblock = basicblock_ptr.get(); + if (!basicblock || basicblock->getNumInstructions() == 0) + continue; + + auto lastInstIter = basicblock->getInstructions().end(); + --lastInstIter; // 指向最后一个指令 + + if ((*lastInstIter)->isConditional()) { + Value *condOperand = (*lastInstIter)->getOperand(0); + ConstantValue *constOperand = dynamic_cast(condOperand); + + if (constOperand != nullptr) { // 条件操作数是常量 + changed = true; + + BasicBlock *thenBlock = dynamic_cast((*lastInstIter)->getOperand(1)); + BasicBlock *elseBlock = dynamic_cast((*lastInstIter)->getOperand(2)); + + // 删除旧的条件分支指令 + SysYIROptUtils::usedelete(lastInstIter->get()); + basicblock->getInstructions().erase(lastInstIter); + + BasicBlock *targetBlock = nullptr; + BasicBlock *prunedBlock = nullptr; // 被剪枝的路径的块 + + bool isTrue = false; + if (constOperand->isFloat()) { + isTrue = (constOperand->getFloat() != 0.0F); + } else { // 整数 + isTrue = (constOperand->getInt() != 0); + } + + if (isTrue) { + targetBlock = thenBlock; + prunedBlock = elseBlock; + } else { + targetBlock = elseBlock; + prunedBlock = thenBlock; + } + + // 创建无条件跳转指令 + pBuilder->setPosition(basicblock, basicblock->end()); + pBuilder->createUncondBrInst(targetBlock, {}); + + // 更新前驱后继关系 + // 移除被剪枝的路径 + if (prunedBlock && basicblock->hasSuccessor(prunedBlock)) { + basicblock->removeSuccessor(prunedBlock); + prunedBlock->removePredecessor(basicblock); + + // 移除被剪枝路径上的 Phi 指令操作数 + for (auto &phiinst_ptr : prunedBlock->getInstructions()) { + if (phiinst_ptr->getKind() != Instruction::kPhi) { + break; + } + PhiInst *phi = dynamic_cast(phiinst_ptr.get()); + if (phi) { + // 使用 PhiInst 的 delBlk 方法来移除与当前 basicblock 相关的传入值 + phi->delBlk(basicblock); + } + } + } + } + } + } + return changed; +} + +// ====================================================================== +// CFGOptimizationPass::runOnFunction 实现 +// ====================================================================== + +bool CFGOptimizationPass::runOnFunction(Function *F, AnalysisManager &AM) { + bool changed = false; + if (!F) + return false; + + // 创建一个临时的 IRBuilder 实例,用于在当前函数内创建指令 + IRBuilder builder(nullptr); + // 迭代进行 CFG 优化,直到不再发生变化 + bool funcChangedThisIteration = true; + while (funcChangedThisIteration) { + funcChangedThisIteration = false; // 每次循环开始时重置为 false + + // 这里的顺序很重要,某些优化依赖于其他优化(例如删除无前驱块) + // 并且某些优化可能会为其他优化创造机会,所以需要循环直到稳定 + funcChangedThisIteration |= SysYCondBr2Br(F, &builder); // 条件分支转换为无条件分支 + funcChangedThisIteration |= SysYDelInstAfterBr(F); // 删除 br 后的无用指令 + funcChangedThisIteration |= SysYDelEmptyBlock(F, &builder); // 删除空块(可能涉及跳转目标更新) + funcChangedThisIteration |= SysYDelNoPreBLock(F); // 删除无前驱块(不可达块) + funcChangedThisIteration |= SysYBlockMerge(F); // 合并基本块 + funcChangedThisIteration |= SysYAddReturn(F, &builder); // 添加返回指令 + + // 如果本轮有任何变化,则继续下一次循环 + changed = changed || funcChangedThisIteration; + } + + // 如果函数有任何变化,返回 true + return changed; +} + +} // namespace sysy \ No newline at end of file diff --git a/src/Liveness.cpp b/src/Liveness.cpp new file mode 100644 index 0000000..8b92b3d --- /dev/null +++ b/src/Liveness.cpp @@ -0,0 +1,145 @@ +#include "Liveness.h" +#include // For std::set_union, std::set_difference +#include +#include // Potentially for worklist, though not strictly needed for the iterative approach below + +namespace sysy { + +// 初始化静态 ID +char LivenessAnalysisPass::ID = 0; // 任何唯一的地址都可以,这里用 0 + +// ============================================================== +// LivenessAnalysisResult 结果类的实现 +// ============================================================== + +LivenessAnalysisResult::LivenessAnalysisResult(Function *F) : AssociatedFunction(F) { + // 构造时可以不计算,在分析遍运行里计算并填充 +} + +const std::set *LivenessAnalysisResult::getLiveIn(BasicBlock *BB) const { + auto it = liveInSets.find(BB); + if (it != liveInSets.end()) { + return &(it->second); + } + // 返回一个空集合,表示未找到或不存在 + static const std::set emptySet; + return &emptySet; +} + +const std::set *LivenessAnalysisResult::getLiveOut(BasicBlock *BB) const { + auto it = liveOutSets.find(BB); + if (it != liveOutSets.end()) { + return &(it->second); + } + static const std::set emptySet; + return &emptySet; +} + +void LivenessAnalysisResult::computeDefUse(BasicBlock *BB, std::set &def, std::set &use) { + def.clear(); + use.clear(); + + // 按照指令在块中的顺序遍历 + for (const auto &inst_ptr : BB->getInstructions()) { + Instruction *inst = inst_ptr.get(); + + // 检查指令是否产生值 (Def) + if (inst->hasValue()) { // 假设 Instruction 有 hasValue() 方法判断是否生成结果值 + // 如果这个值在此指令之前在块中被使用过,则它是一个 Use + // 否则,它是 Def + if (use.find(inst) == use.end()) { // 如果当前指令本身的值未被当前块内之前的指令使用 + def.insert(inst); + } + } + + // 检查指令的操作数 (Use) + for (Value *operand : inst->getOperands()) { // 假设 Instruction 有 getOperands() 返回 Value* + // 只有当操作数是一个Instruction或Argument且未在当前块中被定义时,才算作 Use + if (auto opInst = dynamic_cast(operand)) { + if (def.find(opInst) == def.end()) { // 如果操作数不是由当前块中之前的指令定义 + use.insert(opInst); + } + } else if (auto arg = dynamic_cast(operand)) { + use.insert(arg); + } + // 常量和全局变量不计入 Def/Use 集合,因为它们不随控制流变化 + } + } +} + +void LivenessAnalysisResult::computeLiveness(Function *F) { + // 每次计算前清空旧结果 + liveInSets[F].clear(); + liveOutSets[F].clear(); + + // 初始化所有基本块的 LiveIn 和 LiveOut 集合为空 + for (const auto &bb_ptr : F->getBasicBlocks()) { + BasicBlock *bb = bb_ptr.get(); + liveInSets[F][bb] = {}; + liveOutSets[F][bb] = {}; + } + + bool changed = true; + while (changed) { + changed = false; + + // 迭代所有基本块,通常逆序遍历(reverse post-order)可以加快收敛, + // 但为了简化,这里直接遍历所有块。 + for (const auto &bb_ptr : F->getBasicBlocks()) { + BasicBlock *bb = bb_ptr.get(); + + std::set oldLiveIn = liveInSets[F][bb]; + std::set oldLiveOut = liveOutSets[F][bb]; + + // 1. 计算 LiveOut(BB) = Union(LiveIn(Succ) for Succ in Successors(BB)) + std::set newLiveOut; + for (BasicBlock *succ : bb->getSuccessors()) { + const std::set *succLiveIn = getLiveIn(succ); // 递归获取后继的 LiveIn + if (succLiveIn) { + newLiveOut.insert(succLiveIn->begin(), succLiveIn->end()); + } + } + liveOutSets[F][bb] = newLiveOut; + + // 2. 计算 LiveIn(BB) = Use(BB) Union (LiveOut(BB) - Def(BB)) + std::set defSet, useSet; + computeDefUse(bb, defSet, useSet); // 计算当前块的 Def 和 Use + + std::set liveOutMinusDef; + std::set_difference(newLiveOut.begin(), newLiveOut.end(), defSet.begin(), defSet.end(), + std::inserter(liveOutMinusDef, liveOutMinusDef.begin())); + + std::set newLiveIn = useSet; + newLiveIn.insert(liveOutMinusDef.begin(), liveOutMinusDef.end()); + liveInSets[F][bb] = newLiveIn; + + // 检查是否发生变化 + if (oldLiveIn != newLiveIn || oldLiveOut != newLiveOut) { + changed = true; + } + } + } +} + +// ============================================================== +// LivenessAnalysisPass 的实现 +// ============================================================== + +bool LivenessAnalysisPass::runOnFunction(Function *F, AnalysisManager &AM) { + // 每次运行创建一个新的 LivenessAnalysisResult 对象来存储结果 + CurrentLivenessResult = std::make_unique(F); + + // 调用 LivenessAnalysisResult 内部的方法来计算分析结果 + // 这里的 computeLiveness 不需要 AM 参数,因为它自身不依赖其他分析。 + CurrentLivenessResult->computeLiveness(F); + + // 分析遍通常不修改 IR,所以返回 false + return false; +} + +std::unique_ptr LivenessAnalysisPass::getResult() { + // 返回计算好的 LivenessAnalysisResult 实例,所有权转移给 AnalysisManager + return std::move(CurrentLivenessResult); +} + +} // namespace sysy \ No newline at end of file diff --git a/src/include/CFGOptPass.h b/src/include/CFGOptPass.h new file mode 100644 index 0000000..d4c4f8e --- /dev/null +++ b/src/include/CFGOptPass.h @@ -0,0 +1,40 @@ +#pragma once + +#include "Pass.h" // 包含 Pass 框架 +#include "IR.h" // 包含 IR 定义 +#include "IRBuilder.h" // 包含 IRBuilder + +namespace sysy { + +// 前向声明 IRBuilder (如果在其他地方定义,确保路径正确) +// class IRBuilder; // 如果IRBuilder不在IRBuilder.h中定义,需要前向声明 + +// CFG 优化遍 +class CFGOptimizationPass : public OptimizationPass { +public: + // 唯一的 Pass ID + static char ID; + + CFGOptimizationPass() : OptimizationPass("CFGOptimization", Pass::Granularity::Function) {} + + // 实现 getPassID + void* getPassID() const override { return &ID; } + + // 声明分析依赖和失效 + void getAnalysisUsage(std::set& analysisDependencies, std::set& analysisInvalidations) const override; + + // 运行优化,现在接受 AnalysisManager& AM 参数 + bool runOnFunction(Function* F, AnalysisManager& AM) override; + +private: + // 将原 SysYCFGOpt 中的静态方法移入或直接使用 + // 这些方法可以直接声明为静态成员函数,并在 runOnFunction 中调用 + static bool SysYDelInstAfterBr(Function *func); + static bool SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder); + static bool SysYDelNoPreBLock(Function *func); + static bool SysYBlockMerge(Function *func); + static bool SysYAddReturn(Function *func, IRBuilder* pBuilder); + static bool SysYCondBr2Br(Function *func, IRBuilder* pBuilder); +}; + +} // namespace sysy \ No newline at end of file diff --git a/src/include/Liveness.h b/src/include/Liveness.h new file mode 100644 index 0000000..d804d33 --- /dev/null +++ b/src/include/Liveness.h @@ -0,0 +1,72 @@ +#pragma once + +#include "IR.h" // 包含 IR 定义 +#include "Pass.h" // 包含 Pass 框架 +#include // for std::set_union, std::set_difference +#include +#include +#include + +namespace sysy { + +// 前向声明 +class Function; +class BasicBlock; +class Value; +class Instruction; + +// 活跃变量分析结果类 +// 它将包含 LiveIn 和 LiveOut 集合 +class LivenessAnalysisResult : public AnalysisResultBase { +public: + LivenessAnalysisResult(Function *F); // 构造函数,需要一个函数来关联结果 + + // 获取给定基本块的 LiveIn 集合 + const std::set *getLiveIn(BasicBlock *BB) const; + + // 获取给定基本块的 LiveOut 集合 + const std::set *getLiveOut(BasicBlock *BB) const; + + // 暴露内部数据结构,如果需要更直接的访问 + const std::map> &getLiveInSets() const { return liveInSets; } + const std::map> &getLiveOutSets() const { return liveOutSets; } + + // 核心计算方法,由 LivenessAnalysisPass 调用 + void computeLiveness(Function *F); + +private: + Function *AssociatedFunction; // 这个活跃变量分析是为哪个函数计算的 + std::map> liveInSets; + std::map> liveOutSets; + + // 辅助函数:计算基本块的 Def 和 Use 集合 + // Def: 块内定义,且定义在所有使用之前的值 + // Use: 块内使用,且使用在所有定义之前的值 + void computeDefUse(BasicBlock *BB, std::set &def, std::set &use); +}; + +// 活跃变量分析遍 +class LivenessAnalysisPass : public AnalysisPass { +public: + // 唯一的 Pass ID + static char ID; // LLVM 风格的唯一 ID + + LivenessAnalysisPass() : AnalysisPass("LivenessAnalysis", Pass::Granularity::Function) {} + + // 实现 getPassID + void *getPassID() const override { return &ID; } + + // 运行分析并返回结果。现在接受 AnalysisManager& AM 参数 + bool runOnFunction(Function *F, AnalysisManager &AM) override; + + // 获取分析结果的指针。 + // 注意:AnalysisManager 将会调用此方法来获取结果并进行缓存。 + std::unique_ptr getResult() override; + +private: + // 存储当前分析计算出的 LivenessAnalysisResult 实例 + // runOnFunction 每次调用都会创建新的 LivenessAnalysisResult 对象 + std::unique_ptr CurrentLivenessResult; +}; + +} // namespace sysy \ No newline at end of file From f61b51b2fadb9644110baab39f235e6de16f1961 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Tue, 22 Jul 2025 21:25:07 +0800 Subject: [PATCH 18/35] =?UTF-8?q?[midend]=E4=BC=98=E5=8C=96=E4=B8=AD?= =?UTF-8?q?=E7=AB=AF=E6=A1=86=E6=9E=B6=EF=BC=8C=E7=A7=BB=E9=99=A4=E6=97=A0?= =?UTF-8?q?=E7=94=A8=E6=97=A7=E4=BB=A3=E7=A0=81=EF=BC=8Cignore=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- src/DeadCodeElimination.cpp | 259 --------------- src/Liveness.cpp | 83 ++--- src/Mem2Reg.cpp | 515 ------------------------------ src/Pass.cpp | 132 ++++++++ src/Reg2Mem.cpp | 122 ------- src/include/DeadCodeElimination.h | 36 --- src/include/Dom.h | 2 +- src/include/Liveness.h | 5 +- src/include/Mem2Reg.h | 79 ----- src/include/Pass.h | 90 +----- src/include/Reg2Mem.h | 22 -- src/include/SysYIRPass.h | 59 ---- src/sysyc.cpp | 21 +- 14 files changed, 208 insertions(+), 1220 deletions(-) delete mode 100644 src/DeadCodeElimination.cpp delete mode 100644 src/Mem2Reg.cpp create mode 100644 src/Pass.cpp delete mode 100644 src/Reg2Mem.cpp delete mode 100644 src/include/DeadCodeElimination.h delete mode 100644 src/include/Mem2Reg.h delete mode 100644 src/include/Reg2Mem.h delete mode 100644 src/include/SysYIRPass.h diff --git a/.gitignore b/.gitignore index ce3b37a..7a9b7e6 100644 --- a/.gitignore +++ b/.gitignore @@ -53,4 +53,5 @@ __init__.py .DS_* -antlr/ \ No newline at end of file +antlr/ +.clang-format diff --git a/src/DeadCodeElimination.cpp b/src/DeadCodeElimination.cpp deleted file mode 100644 index a986b18..0000000 --- a/src/DeadCodeElimination.cpp +++ /dev/null @@ -1,259 +0,0 @@ -#include "DeadCodeElimination.h" -#include - -extern int DEBUG; -namespace sysy { - -void DeadCodeElimination::runDCEPipeline() { - const auto& functions = pModule->getFunctions(); - for (const auto& function : functions) { - const auto& func = function.second; - bool changed = true; - while (changed) { - changed = false; - eliminateDeadStores(func.get(), changed); - eliminateDeadLoads(func.get(), changed); - eliminateDeadAllocas(func.get(), changed); - eliminateDeadRedundantLoadStore(func.get(), changed); - eliminateDeadGlobals(changed); - } - } -} - -// 消除无用存储 消除条件: -// 存储的目标指针(pointer)不是全局变量(!isGlobal(pointer))。 -// 存储的目标指针不是数组参数(!isArr(pointer) 或不在函数参数列表里)。 -// 该指针的所有使用者(uses)仅限 alloca 或 store(即没有 load 或其他指令使用它)。 -void DeadCodeElimination::eliminateDeadStores(Function* func, bool& changed) { - for (const auto& block : func->getBasicBlocks()) { - auto& instrs = block->getInstructions(); - for (auto iter = instrs.begin(); iter != instrs.end();) { - auto inst = iter->get(); - if (!inst->isStore()) { - ++iter; - continue; - } - - auto storeInst = dynamic_cast(inst); - auto pointer = storeInst->getPointer(); - // 如果是全局变量或者是函数的数组参数 - if (SysYIROptUtils::isGlobal(pointer) || (SysYIROptUtils::isArr(pointer) && - std::find(func->getEntryBlock()->getArguments().begin(), - func->getEntryBlock()->getArguments().end(), - pointer) != func->getEntryBlock()->getArguments().end())) { - ++iter; - continue; - } - - bool changetag = true; - for (auto& use : pointer->getUses()) { - // 依次判断store的指针是否被其他指令使用 - auto user = use->getUser(); - auto userInst = dynamic_cast(user); - // 如果使用store的指针的指令不是Alloca或Store,则不删除 - if (userInst != nullptr && !userInst->isAlloca() && !userInst->isStore()) { - changetag = false; - break; - } - } - - if (changetag) { - changed = true; - if(DEBUG){ - std::cout << "=== Dead Store Found ===\n"; - SysYPrinter::printInst(storeInst); - } - SysYIROptUtils::usedelete(storeInst); - iter = instrs.erase(iter); - } else { - ++iter; - } - } - } -} -// 消除无用加载 消除条件: -// 该指令的结果未被使用(inst->getUses().empty())。 -void DeadCodeElimination::eliminateDeadLoads(Function* func, bool& changed) { - for (const auto& block : func->getBasicBlocks()) { - auto& instrs = block->getInstructions(); - for (auto iter = instrs.begin(); iter != instrs.end();) { - auto inst = iter->get(); - if (inst->isBinary() || inst->isUnary() || inst->isLoad()) { - if (inst->getUses().empty()) { - changed = true; - if(DEBUG){ - std::cout << "=== Dead Load Binary Unary Found ===\n"; - SysYPrinter::printInst(inst); - } - SysYIROptUtils::usedelete(inst); - iter = instrs.erase(iter); - continue; - } - } - ++iter; - } - } -} - -// 消除无用加载 消除条件: -// 该 alloca 未被任何指令使用(allocaInst->getUses().empty())。 -// 该 alloca 不是函数的参数(不在 entry 块的参数列表里)。 -void DeadCodeElimination::eliminateDeadAllocas(Function* func, bool& changed) { - for (const auto& block : func->getBasicBlocks()) { - auto& instrs = block->getInstructions(); - for (auto iter = instrs.begin(); iter != instrs.end();) { - auto inst = iter->get(); - if (inst->isAlloca()) { - auto allocaInst = dynamic_cast(inst); - if (allocaInst->getUses().empty() && - std::find(func->getEntryBlock()->getArguments().begin(), - func->getEntryBlock()->getArguments().end(), - allocaInst) == func->getEntryBlock()->getArguments().end()) { - changed = true; - if(DEBUG){ - std::cout << "=== Dead Alloca Found ===\n"; - SysYPrinter::printInst(inst); - } - SysYIROptUtils::usedelete(inst); - iter = instrs.erase(iter); - continue; - } - } - ++iter; - } - } -} - -void DeadCodeElimination::eliminateDeadIndirectiveAllocas(Function* func, bool& changed) { - // 删除mem2reg时引入的且现在已经没有value使用了的隐式alloca - FunctionAnalysisInfo* funcInfo = pCFA->getFunctionAnalysisInfo(func); - for (auto it = funcInfo->getIndirectAllocas().begin(); it != funcInfo->getIndirectAllocas().end();) { - auto &allocaInst = *it; - if (allocaInst->getUses().empty()) { - changed = true; - if(DEBUG){ - std::cout << "=== Dead Indirect Alloca Found ===\n"; - SysYPrinter::printInst(allocaInst.get()); - } - it = funcInfo->getIndirectAllocas().erase(it); - } else { - ++it; - } - } -} - -// 该全局变量未被任何指令使用(global->getUses().empty())。 -void DeadCodeElimination::eliminateDeadGlobals(bool& changed) { - auto& globals = pModule->getGlobals(); - for (auto it = globals.begin(); it != globals.end();) { - auto& global = *it; - if (global->getUses().empty()) { - changed = true; - if(DEBUG){ - std::cout << "=== Dead Global Found ===\n"; - SysYPrinter::printValue(global.get()); - } - it = globals.erase(it); - } else { - ++it; - } - } -} - -// 消除冗余加载和存储 消除条件: -// phi 指令的目标指针仅被该 phi 使用(无其他 store/load 使用)。 -// memset 指令的目标指针未被使用(pointer->getUses().empty()) -// store -> load -> store 模式 -void DeadCodeElimination::eliminateDeadRedundantLoadStore(Function* func, bool& changed) { - for (const auto& block : func->getBasicBlocks()) { - auto& instrs = block->getInstructions(); - for (auto iter = instrs.begin(); iter != instrs.end();) { - auto inst = iter->get(); - if (inst->isPhi()) { - auto phiInst = dynamic_cast(inst); - auto pointer = phiInst->getPointer(); - bool tag = true; - for (const auto& use : pointer->getUses()) { - auto user = use->getUser(); - if (user != inst) { - tag = false; - break; - } - } - /// 如果 pointer 仅被该 phi 使用,可以删除 ph - if (tag) { - changed = true; - SysYIROptUtils::usedelete(inst); - iter = instrs.erase(iter); - continue; - } - // 数组指令还不完善,不保证memset优化效果 - } else if (inst->isMemset()) { - auto memsetInst = dynamic_cast(inst); - auto pointer = memsetInst->getPointer(); - if (pointer->getUses().empty()) { - changed = true; - SysYIROptUtils::usedelete(inst); - iter = instrs.erase(iter); - continue; - } - }else if(inst->isLoad()) { - if (iter != instrs.begin()) { - auto loadInst = dynamic_cast(inst); - auto loadPointer = loadInst->getPointer(); - // TODO:store -> load -> store 模式 - auto prevIter = std::prev(iter); - auto prevInst = prevIter->get(); - if (prevInst->isStore()) { - auto prevStore = dynamic_cast(prevInst); - auto prevStorePointer = prevStore->getPointer(); - auto prevStoreValue = prevStore->getOperand(0); - // 确保前一个 store 不是数组操作 - if (prevStore->getIndices().empty()) { - // 检查后一条指令是否是 store 同一个值 - auto nextIter = std::next(iter); - if (nextIter != instrs.end()) { - auto nextInst = nextIter->get(); - if (nextInst->isStore()) { - auto nextStore = dynamic_cast(nextInst); - auto nextStorePointer = nextStore->getPointer(); - auto nextStoreValue = nextStore->getOperand(0); - // 确保后一个 store 不是数组操作 - if (nextStore->getIndices().empty()) { - // 判断优化条件: - // 1. prevStore 的指针操作数 == load 的指针操作数 - // 2. nextStore 的值操作数 == load 指令本身 - if (prevStorePointer == loadPointer && - nextStoreValue == loadInst) { - // 可以优化直接把prevStorePointer的值存到nextStorePointer - changed = true; - nextStore->setOperand(0, prevStoreValue); - if(DEBUG){ - std::cout << "=== Dead Store Load Store Found(now only del Load) ===\n"; - SysYPrinter::printInst(prevStore); - SysYPrinter::printInst(loadInst); - SysYPrinter::printInst(nextStore); - } - SysYIROptUtils::usedelete(loadInst); - iter = instrs.erase(iter); - // 删除 prevStore 这里是不是可以留给删除无用store处理? - // if (prevStore->getUses().empty()) { - // usedelete(prevStore); - // instrs.erase(prevIter); // 删除 prevStore - // } - continue; // 跳过 ++iter,因为已经移动迭代器 - } - } - } - } - } - } - } - } - ++iter; - } - } -} - - -} // namespace sysy \ No newline at end of file diff --git a/src/Liveness.cpp b/src/Liveness.cpp index 8b92b3d..e1231f1 100644 --- a/src/Liveness.cpp +++ b/src/Liveness.cpp @@ -2,6 +2,7 @@ #include // For std::set_union, std::set_difference #include #include // Potentially for worklist, though not strictly needed for the iterative approach below +#include // For std::set namespace sysy { @@ -12,10 +13,6 @@ char LivenessAnalysisPass::ID = 0; // 任何唯一的地址都可以,这里用 // LivenessAnalysisResult 结果类的实现 // ============================================================== -LivenessAnalysisResult::LivenessAnalysisResult(Function *F) : AssociatedFunction(F) { - // 构造时可以不计算,在分析遍运行里计算并填充 -} - const std::set *LivenessAnalysisResult::getLiveIn(BasicBlock *BB) const { auto it = liveInSets.find(BB); if (it != liveInSets.end()) { @@ -36,70 +33,83 @@ const std::set *LivenessAnalysisResult::getLiveOut(BasicBlock *BB) cons } void LivenessAnalysisResult::computeDefUse(BasicBlock *BB, std::set &def, std::set &use) { - def.clear(); - use.clear(); + def.clear(); // 将持有在 BB 中定义的值 + use.clear(); // 将持有在 BB 中使用但在其定义之前的值 + + // 临时集合,用于跟踪当前基本块中已经定义过的变量 + std::set defined_in_block_so_far; // 按照指令在块中的顺序遍历 for (const auto &inst_ptr : BB->getInstructions()) { Instruction *inst = inst_ptr.get(); - // 检查指令是否产生值 (Def) - if (inst->hasValue()) { // 假设 Instruction 有 hasValue() 方法判断是否生成结果值 - // 如果这个值在此指令之前在块中被使用过,则它是一个 Use - // 否则,它是 Def - if (use.find(inst) == use.end()) { // 如果当前指令本身的值未被当前块内之前的指令使用 - def.insert(inst); + // 1. 处理指令的操作数 (Use) - 在定义之前的使用 + for (const auto &use_ptr : inst->getOperands()) { // 修正迭代器类型 + Value *operand = use_ptr->getValue(); // 从 shared_ptr 获取 Value* + + // 过滤掉常量和全局变量,因为它们通常不被视为活跃变量 + ConstantValue *constValue = dynamic_cast(operand); + GlobalValue *globalValue = dynamic_cast(operand); + if (constValue || globalValue) { + continue; // 跳过常量和全局变量 + } + + // 如果操作数是一个变量(Instruction 或 Argument),并且它在此基本块的当前点之前尚未被定义 + if (defined_in_block_so_far.find(operand) == defined_in_block_so_far.end()) { + use.insert(operand); } } - // 检查指令的操作数 (Use) - for (Value *operand : inst->getOperands()) { // 假设 Instruction 有 getOperands() 返回 Value* - // 只有当操作数是一个Instruction或Argument且未在当前块中被定义时,才算作 Use - if (auto opInst = dynamic_cast(operand)) { - if (def.find(opInst) == def.end()) { // 如果操作数不是由当前块中之前的指令定义 - use.insert(opInst); - } - } else if (auto arg = dynamic_cast(operand)) { - use.insert(arg); - } - // 常量和全局变量不计入 Def/Use 集合,因为它们不随控制流变化 + // 2. 处理指令自身产生的定义 (Def) + if (inst->isDefine()) { // 使用 isDefine() 方法 + // 指令自身定义了一个值。将其添加到块的 def 集合, + // 并添加到当前块中已定义的值的临时集合。 + def.insert(inst); // inst 本身就是被定义的值(例如,虚拟寄存器) + defined_in_block_so_far.insert(inst); } } } void LivenessAnalysisResult::computeLiveness(Function *F) { // 每次计算前清空旧结果 - liveInSets[F].clear(); - liveOutSets[F].clear(); + liveInSets.clear(); // 直接清空 map,不再使用 F 作为键 + liveOutSets.clear(); // 直接清空 map // 初始化所有基本块的 LiveIn 和 LiveOut 集合为空 for (const auto &bb_ptr : F->getBasicBlocks()) { BasicBlock *bb = bb_ptr.get(); - liveInSets[F][bb] = {}; - liveOutSets[F][bb] = {}; + liveInSets[bb] = {}; // 直接以 bb 为键 + liveOutSets[bb] = {}; // 直接以 bb 为键 } bool changed = true; while (changed) { changed = false; - // 迭代所有基本块,通常逆序遍历(reverse post-order)可以加快收敛, - // 但为了简化,这里直接遍历所有块。 - for (const auto &bb_ptr : F->getBasicBlocks()) { - BasicBlock *bb = bb_ptr.get(); + // TODO : 目前为逆序遍历基本块,考虑反向拓扑序遍历基本块 - std::set oldLiveIn = liveInSets[F][bb]; - std::set oldLiveOut = liveOutSets[F][bb]; + // 逆序遍历基本块 + std::list> basicBlocks(F->getBasicBlocks().begin(), F->getBasicBlocks().end()); + std::reverse(basicBlocks.begin(), basicBlocks.end()); + // 然后遍历 basicBlocks + + for (auto bb_iter = basicBlocks.begin(); bb_iter != basicBlocks.end(); ++bb_iter) { + BasicBlock *bb = bb_iter->get(); + if (!bb) + continue; // 避免空指针 + + std::set oldLiveIn = liveInSets[bb]; + std::set oldLiveOut = liveOutSets[bb]; // 1. 计算 LiveOut(BB) = Union(LiveIn(Succ) for Succ in Successors(BB)) std::set newLiveOut; for (BasicBlock *succ : bb->getSuccessors()) { - const std::set *succLiveIn = getLiveIn(succ); // 递归获取后继的 LiveIn + const std::set *succLiveIn = getLiveIn(succ); // 获取后继的 LiveIn if (succLiveIn) { newLiveOut.insert(succLiveIn->begin(), succLiveIn->end()); } } - liveOutSets[F][bb] = newLiveOut; + liveOutSets[bb] = newLiveOut; // 2. 计算 LiveIn(BB) = Use(BB) Union (LiveOut(BB) - Def(BB)) std::set defSet, useSet; @@ -111,7 +121,7 @@ void LivenessAnalysisResult::computeLiveness(Function *F) { std::set newLiveIn = useSet; newLiveIn.insert(liveOutMinusDef.begin(), liveOutMinusDef.end()); - liveInSets[F][bb] = newLiveIn; + liveInSets[bb] = newLiveIn; // 检查是否发生变化 if (oldLiveIn != newLiveIn || oldLiveOut != newLiveOut) { @@ -130,7 +140,6 @@ bool LivenessAnalysisPass::runOnFunction(Function *F, AnalysisManager &AM) { CurrentLivenessResult = std::make_unique(F); // 调用 LivenessAnalysisResult 内部的方法来计算分析结果 - // 这里的 computeLiveness 不需要 AM 参数,因为它自身不依赖其他分析。 CurrentLivenessResult->computeLiveness(F); // 分析遍通常不修改 IR,所以返回 false diff --git a/src/Mem2Reg.cpp b/src/Mem2Reg.cpp deleted file mode 100644 index 2daef27..0000000 --- a/src/Mem2Reg.cpp +++ /dev/null @@ -1,515 +0,0 @@ -#include "Mem2Reg.h" -#include "SysYIRPrinter.h" -#include -#include -#include -#include -#include -#include -#include -#include - -namespace sysy { - -// --- 私有成员函数实现 --- - -// 计算给定定义块集合的迭代支配边界 -std::unordered_set Mem2Reg::computeIteratedDomFrontiers(const std::unordered_set& blocks) { - std::unordered_set result; - std::queue worklist; // 使用队列进行 BFS-like 遍历 - - for (auto* block : blocks) - worklist.push(block); - - while (!worklist.empty()) { - auto* block = worklist.front(); - worklist.pop(); - - auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(block); - if (!blockInfo) continue; - - for (auto* df : blockInfo->getDomFrontiers()) { - if (result.find(df) == result.end()) { // If not already in result - result.insert(df); - worklist.push(df); - } - } - } - return result; -} - -// 分析一个 alloca 的所有 uses,填充 allocaDefsBlock 和 allocaUsesBlock -void Mem2Reg::allocaAnalysis(AllocaInst* alloca) { - allocaDefsBlock[alloca].clear(); - allocaUsesBlock[alloca].clear(); - - for (auto use : alloca->getUses()) { - Instruction* userInst = dynamic_cast(use->getUser()); - if (!userInst) continue; - - if (StoreInst* store = dynamic_cast(userInst)) { - if (store->getOperand(1) == alloca) { // Store's second operand is the pointer - allocaDefsBlock[alloca].insert(store->getParent()); // Store's parent is the defining block - } - } else if (LoadInst* load = dynamic_cast(userInst)) { - if (load->getOperand(0) == alloca) { // Load's first operand is the pointer - allocaUsesBlock[alloca].insert(load->getParent()); // Load's parent is the using block - } - } - } -} - -// 判断一个 alloca 是否可以被提升为寄存器 (无地址逃逸,标量类型) -bool Mem2Reg::is_promoted(AllocaInst* alloca) { - // 检查是否是标量类型 (非数组、非全局变量等) - if(!(SysYIROptUtils::isArr(alloca) || SysYIROptUtils::isGlobal(alloca))){ - return false; // 只有标量类型的 alloca 才能被提升 - } - - // 获取 alloca 指向的基类型 - PointerType* ptrType = dynamic_cast(alloca->getType()); - if (!ptrType) return false; // Should always be a pointer type - Type* allocabaseType = ptrType->getBaseType(); - - for (const auto& use : alloca->getUses()) { - Instruction* userInst = dynamic_cast(use->getUser()); - if (!userInst) { - // 如果不是指令的 use,比如作为全局变量的初始值等,通常认为逃逸 - return false; - } - - if (LoadInst* load = dynamic_cast(userInst)) { - // Load 指令结果的类型必须与 alloca 的基类型一致 - if (load->getType() != allocabaseType) { - return false; - } - } else if (StoreInst* store = dynamic_cast(userInst)) { - // Store 指令的值操作数类型必须与 alloca 的基类型一致 - // 且 store 的指针操作数必须是当前 alloca - if (store->getOperand(1) != alloca || store->getOperand(0)->getType() != allocabaseType) { - return false; - } - } else if (userInst->isGetSubArray()) { - // GSA 指令表示对数组的访问 - // 这意味着地址逃逸,不能简单提升为单个寄存器 - return false; - } else if (userInst->isCall()) { - // 如果 alloca 作为函数参数传递,通常认为地址逃逸 - return false; - } - // 如果有其他类型的指令使用 alloca 的地址,也需要判断是否是逃逸 - // 例如:BitCastInst, PtrToIntInst, 如果这些操作将地址暴露,则不能提升 - } - return true; -} - -// 在迭代支配边界处插入 Phi 指令 -void Mem2Reg::insertPhiNodes(Function* func) { - // 清空上次 Phi 插入的结果 - phiMap.clear(); - allPhiInstructions.clear(); - - std::unordered_set phiPlacementBlocks; // 存放需要插入 Phi 的块 - std::queue workQueue; // BFS 队列,用于迭代支配边界计算 - - // 遍历所有可提升的 alloca - for (AllocaInst* alloca : currentFunctionAllocas) { - phiPlacementBlocks.clear(); // 为每个 alloca 重新计算 Phi 放置位置 - - // 初始化工作队列,放入所有定义该 alloca 的基本块 - for (BasicBlock* defBB : allocaDefsBlock[alloca]) { - workQueue.push(defBB); - } - - while (!workQueue.empty()) { - BasicBlock* currentBB = workQueue.front(); - workQueue.pop(); - - auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(currentBB); - if (!blockInfo) continue; - - // 遍历当前块的支配边界 - for (BasicBlock* domFrontierBB : blockInfo->getDomFrontiers()) { - // 如果这个支配边界块还没有为当前 alloca 插入 Phi 指令 - if (phiPlacementBlocks.find(domFrontierBB) == phiPlacementBlocks.end()) { - // 获取 alloca 的基类型,作为 Phi 指令的结果类型 - Type* phiType = dynamic_cast(alloca->getType())->getBaseType(); - - // 在支配边界块的开头插入 Phi 指令 - pBuilder->setPosition(domFrontierBB->begin()); - PhiInst* newPhi = pBuilder->createPhiInst(phiType, {}, {}); // 初始入边为空 - - allPhiInstructions.push_back(newPhi); // 记录所有 Phi - phiPlacementBlocks.insert(domFrontierBB); // 标记已插入 - - // 将 Phi 指令映射到它所代表的原始 alloca - phiMap[domFrontierBB][newPhi] = alloca; - - // 如果支配边界块本身没有定义该 alloca,则其支配边界也可能需要 Phi - // 只有当这个块不是当前alloca的定义块时,才将其加入workQueue,以计算其DF。 - if (allocaDefsBlock[alloca].find(domFrontierBB) == allocaDefsBlock[alloca].end()) { - workQueue.push(domFrontierBB); - } - } - } - } - } -} - -// 获取前驱块在后继块前驱列表中的索引 -int Mem2Reg::getPredIndex(BasicBlock* pred, BasicBlock* succ) { - int index = 0; - for (auto* elem : succ->getPredecessors()) { - if (elem == pred) { - return index; - } - ++index; - } - // 断言通常在你的 IR 框架中应该确保前驱是存在的 - // assert(false && "Predecessor not found in successor's predecessor list"); - return -1; // 应该不会发生 -} - -// 递归地重命名基本块中的变量并填充 Phi 指令 -void Mem2Reg::renameBlock(BasicBlock* block, - std::unordered_map& currentIncomings, - std::unordered_set& visitedBlocks) { - - // 记录在此块中发生的定义,以便在退出时将它们从栈中弹出 - std::unordered_map definitionsInBlockCount; - - // 如果已经访问过这个块,直接返回(防止无限循环或重复处理,在DFS中尤其重要) - if (visitedBlocks.count(block)) { - return; - } - visitedBlocks.insert(block); - - // --- 1. 处理当前基本块内的指令 --- - // 使用迭代器安全地遍历和删除指令 - for (auto it = block->getInstructions().begin(); it != block->getInstructions().end(); ) { - Instruction* currentInst = it->get(); - - if (AllocaInst* alloca = dynamic_cast(currentInst)) { - // 如果是可提升的 alloca,标记为删除 - if (std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), alloca) != currentFunctionAllocas.end()) { - SysYIROptUtils::usedelete(currentInst); // 标记为删除(或直接删除取决于你的 IR 管理) - it = block->getInstructions().erase(it); // 从列表中移除 - continue; // 继续下一个指令 - } - } else if (LoadInst* load = dynamic_cast(currentInst)) { - AllocaInst* originalAlloca = dynamic_cast(load->getOperand(0)); // load 的第一个操作数是指针 - if (originalAlloca && std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), originalAlloca) != currentFunctionAllocas.end()) { - // 如果是可提升 alloca 的 load 指令 - Value* incomingVal = nullptr; - if (currentIncomings.count(originalAlloca)) { - incomingVal = currentIncomings[originalAlloca]; - } else { - // 如果在当前路径上没有找到定义,则使用 UndefinedValue - incomingVal = UndefinedValue::get(originalAlloca->getType()->isPointer() ? - dynamic_cast(originalAlloca->getType())->getBaseType() : - originalAlloca->getType()); - } - - load->replaceAllUsesWith(incomingVal); // 用最新值替换所有 load 的用途 - SysYIROptUtils::usedelete(currentInst); - it = block->getInstructions().erase(it); - continue; - } - } else if (StoreInst* store = dynamic_cast(currentInst)) { - AllocaInst* originalAlloca = dynamic_cast(store->getOperand(1)); // store 的第二个操作数是指针 - if (originalAlloca && std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), originalAlloca) != currentFunctionAllocas.end()) { - // 如果是可提升 alloca 的 store 指令,更新当前值 - currentIncomings[originalAlloca] = store->getOperand(0); // store 的第一个操作数是值 - definitionsInBlockCount[originalAlloca]++; // 记录在该块中进行的定义数量 - SysYIROptUtils::usedelete(currentInst); - it = block->getInstructions().erase(it); - continue; - } - } else if (PhiInst* phi = dynamic_cast(currentInst)) { - // 如果是 Mem2Reg 插入的 Phi 指令 (通过 phiMap 判断) - if (phiMap[block].count(phi)) { - AllocaInst* originalAlloca = phiMap[block][phi]; - currentIncomings[originalAlloca] = phi; // Phi 指令本身成为该变量的新定义 - definitionsInBlockCount[originalAlloca]++; // 记录该 Phi 的定义 - } - } - ++it; // 移动到下一个指令 - } - - // --- 2. 填充后继基本块中 Phi 指令的入边 --- - for (BasicBlock* successorBB : block->getSuccessors()) { - int predIndex = getPredIndex(block, successorBB); - if (predIndex == -1) continue; - - // Phi 指令总是在基本块的开头 - for (auto& inst_ptr : successorBB->getInstructions()) { - if (PhiInst* phi = dynamic_cast(inst_ptr.get())) { - if (phiMap[successorBB].count(phi)) { // 确保这是我们关心的 Phi 指令 - AllocaInst* originalAlloca = phiMap[successorBB][phi]; - Value* incomingValue = nullptr; - - if (currentIncomings.count(originalAlloca)) { - incomingValue = currentIncomings[originalAlloca]; - } else { - // 如果在当前块没有找到对应的定义,使用 UndefinedValue - incomingValue = UndefinedValue::get(originalAlloca->getType()->isPointer() ? - dynamic_cast(originalAlloca->getType())->getBaseType() : - originalAlloca->getType()); - } - - if (incomingValue) { - phi->addIncoming(incomingValue, block); // 添加 (值, 前驱块) 对 - } - } - } else { - // 遇到非 Phi 指令,说明已经处理完所有 Phi,可以跳出 - break; - } - } - } - - // --- 3. 递归调用支配树的子节点 --- - auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(block); - if (blockInfo) { - for (BasicBlock* dominatedChildBB : blockInfo->getSdoms()) { // getSdoms 获取直接支配的子节点 - // 递归调用,传递当前 Incomings 的副本(或通过值传递以实现回溯) - // 注意:这里是传递 `currentIncomings` 的拷贝,以便递归返回后可以恢复。 - // 但如果 `currentIncomings` 是引用传递,则这里需要回溯逻辑。 - // 鉴于它是值传递,此处的 `definitionsInBlockCount` 仅用于统计,无需实际操作 `currentIncomings`。 - renameBlock(dominatedChildBB, currentIncomings, visitedBlocks); - } - } - - // --- 4. 回溯:从栈中弹出在此块中创建的所有定义 --- - for (auto const& [alloca, count] : definitionsInBlockCount) { - // 在我们的实现中,`currentIncomings` 是通过值传递的,每次递归都收到一个新的拷贝。 - // 因此,不需要显式地 "pop" 栈。`currentIncomings` 在函数返回时会自动销毁。 - // 这种方式模拟了 "SSA 栈" 的行为,每个函数调用帧有自己的局部定义环境。 - } -} - -// 简化冗余的 Phi 指令 (当所有输入都相同时) -void Mem2Reg::simplifyphi(PhiInst* phi) { - BasicBlock* phifromblock = phi->getParent(); - if (!phifromblock) return; // 指令可能已经被删除 - - Value* commonValue = nullptr; - bool allSame = true; - - // Phi 指令的操作数是 Value, BasicBlock 交替出现,所以是 getOperandSize() / 2 个入边 - if (phi->getNumOperands() == 0) { // 空 Phi,通常是无效的,直接删除 - phi->replaceAllUsesWith(UndefinedValue::get(phi->getType())); // 用 UndefinedValue 替换所有用途 - // phi->getParent()->delete_inst(phi); - // 删除 Phi 指令后直接返回 - // phi指令在开头一个比较快 - // TODO:后续可优化查找 - auto tofind = std::find_if(phifromblock->getInstructions().begin(), phifromblock->getInstructions().end(), - [phi](const auto &instr) { return instr.get() == phi; }); - SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令 - phifromblock->getInstructions().erase(tofind); - // 从基本块中删除 Phi 指令 - return; - } - - for (size_t i = 0; i < phi->getNumIncomingValues(); ++i) { - Value* incomingVal = phi->getOperand(2 * i); // 值位于偶数索引 - - if (incomingVal == phi) { // 如果 Phi 指令引用自身 (循环变量) - // 这种情况下,Phi 暂时不能简化,除非所有入边都是它自己,这通常通过其他优化处理 - // 为避免复杂性,我们在此处不处理自引用 Phi 的简化,除非它是唯一选择。 - // 更好的做法是,如果所有入边都指向自身,则该Phi是冗余的,可以替换为undef或其第一个实际值 - // 但这需要更复杂的分析来确定循环的初始值。目前简单返回。 - // TODO:留到后续循环优化处理 - return; - } - - if (commonValue == nullptr) { - commonValue = incomingVal; - } else if (commonValue != incomingVal) { - allSame = false; - break; // 发现不同的入边值 - } - } - - if (allSame && commonValue != nullptr) { - // 所有入边值都相同,用这个值替换 Phi 指令的所有用途 - phi->replaceAllUsesWith(commonValue); - // 从基本块中删除 Phi 指令 - auto tofind = std::find_if(phifromblock->getInstructions().begin(), phifromblock->getInstructions().end(), - [phi](const auto &instr) { return instr.get() == phi; }); - SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令 - phifromblock->getInstructions().erase(tofind); - } -} - -// 对单个函数执行内存到寄存器的提升 -bool Mem2Reg::promoteMemoryToRegisters(Function* func) { - bool changed = false; - - // 每次开始对一个函数进行 Mem2Reg 时,清空所有上下文信息 - currentFunctionAllocas.clear(); - allocaDefsBlock.clear(); - allocaUsesBlock.clear(); - phiMap.clear(); - allPhiInstructions.clear(); - - // 1. 收集所有可提升的 AllocaInst,并进行初步分析 - BasicBlock* entryBB = func->getEntryBlock(); - if (!entryBB) return false; - - // 逆序遍历入口块的指令,安全地识别 Alloca - for (auto it = entryBB->getInstructions().rbegin(); it != entryBB->getInstructions().rend(); ++it) { - if (AllocaInst* alloca = dynamic_cast(it->get())) { - if (is_promoted(alloca)) { - currentFunctionAllocas.push_back(alloca); - } - } - } - // 收集后反转,使其按原始顺序排列 (如果需要的话,但对后续分析影响不大) - std::reverse(currentFunctionAllocas.begin(), currentFunctionAllocas.end()); - - // 对收集到的所有 alloca 进行 DefsBlock 和 UsesBlock 分析 - for (AllocaInst* alloca : currentFunctionAllocas) { - allocaAnalysis(alloca); - } - - // 2. 预处理:删除无用的 AllocaInst (没有 Load 和 Store) - // 迭代 currentFunctionAllocas,安全删除 - for (unsigned int i = 0; i < currentFunctionAllocas.size(); ) { - AllocaInst* alloca = currentFunctionAllocas[i]; - - bool hasRelevantUse = false; - // 检查 alloca 的 uses 列表,看是否有 Load 或 Store - // 只要有 Load/Store,就认为是"相关用途",不删除 - for (auto use_ptr : alloca->getUses()) { - Instruction* user_inst = dynamic_cast(use_ptr->getUser()); - if (user_inst && (dynamic_cast(user_inst) || dynamic_cast(user_inst))) { - hasRelevantUse = true; - break; - } - } - - // 如果没有相关用途(没有 Load 和 Store),则 alloca 是死代码 - if (!hasRelevantUse && allocaDefsBlock[alloca].empty() && allocaUsesBlock[alloca].empty()) { - if (alloca->getParent()) { - // alloca->getParent()->delete_inst(alloca); // 从其所在块删除 alloca 指令 - auto tofind = std::find_if(alloca->getParent()->getInstructions().begin(), alloca->getParent()->getInstructions().end(), - [alloca](const auto &instr) { return instr.get() == alloca; }); - SysYIROptUtils::usedelete(alloca); - alloca->getParent()->getInstructions().erase(tofind); - } - currentFunctionAllocas.erase(currentFunctionAllocas.begin() + i); // 从列表中移除 - changed = true; // 发生了改变 - } else { - i++; // 否则,移动到下一个 alloca - } - } - - // 如果没有可提升的 alloca 了,直接返回 - if (currentFunctionAllocas.empty()) { - return changed; - } - - // 3. 插入 Phi 指令 - insertPhiNodes(func); - if (!allPhiInstructions.empty()) changed = true; - - // 4. 重命名变量,转换为 SSA 形式并填充 Phi 指令 - std::unordered_map initialIncomings; - std::unordered_set visitedBlocks; // 用于 DFS 遍历,防止循环 - - // 初始化 entry block 的 Incomings 状态 - for (AllocaInst* alloca : currentFunctionAllocas) { - initialIncomings[alloca] = UndefinedValue::get(dynamic_cast(alloca->getType())->getBaseType()); - } - - // 从入口块开始递归重命名 - renameBlock(entryBB, initialIncomings, visitedBlocks); - - // 5. 简化 Phi 指令 - // 由于 renameBlock 可能会删除 Phi,这里复制一份列表以安全迭代 - std::vector phisToSimplify = allPhiInstructions; - for (PhiInst* phi : phisToSimplify) { - // 检查 phi 是否还在 IR 中 (可能已被其他优化删除) - // 一个简单检查是看它是否有父块 - if (phi->getParent()) { - simplifyphi(phi); - // simplifyphi 内部会删除 Phi,所以这里不需要再处理 allPhiInstructions - // 最终的 allPhiInstructions 清理将在 promoteMemoryToRegisters 结束后进行 - } - } - - // 清理所有 Phi 的列表和映射 - // 遍历 allPhiInstructions,删除那些在 simplifyphi 后可能仍然存在的、但已经没有 uses 的 Phi - std::vector remainingPhis; - for(PhiInst* phi : allPhiInstructions) { - if(phi->getParent() && phi->getUses().empty()){ // 如果还在IR中但没有用处 - - // phi->getParent()->delete_inst(phi); - // 找到phi节点对应的迭代器 - auto tofind = std::find_if(phi->getParent()->getInstructions().begin(), phi->getParent()->getInstructions().end(), - [phi](const auto &instr) { return instr.get() == phi; }); - SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令 - phi->getParent()->getInstructions().erase(tofind); - - changed = true; - } else if (phi->getParent()) { // 仍在IR中且有uses - remainingPhis.push_back(phi); - } - } - allPhiInstructions = remainingPhis; // 更新为仅包含未被删除的 Phi - - // 重新清理 phiMap 中已经删除的 Phi 指令项 - for (auto& pairBBPhiMap : phiMap) { - std::vector phisToRemoveFromMap; - for (auto& pairPhiAlloca : pairBBPhiMap.second) { - if (!pairPhiAlloca.first->getParent()) { // 如果 Phi 已经被删除 - phisToRemoveFromMap.push_back(pairPhiAlloca.first); - } - } - for (PhiInst* phi : phisToRemoveFromMap) { - pairBBPhiMap.second.erase(phi); - } - } - - - return changed; -} - -// --- run函数实现 --- -void Mem2Reg::run() { - // 每次运行整个 Mem2Reg Pass 时,重新进行分析 - controlFlowAnalysis->clear(); - controlFlowAnalysis->runControlFlowAnalysis(); - activeVarAnalysis->clear(); - // 假设 dataFlowAnalysisUtils 可以管理和运行各个分析器 - dataFlowAnalysisUtils.addBackwardAnalyzer(activeVarAnalysis); - dataFlowAnalysisUtils.backwardAnalyze(pModule); // 运行活跃变量分析 - - bool globalChanged = false; - // 循环直到没有更多的 alloca 可以被提升 - // 每次 promoteMemoryToRegisters 会尝试在一个函数内完成所有 Mem2Reg 优化 - do { - globalChanged = false; - for (const auto& [_, func] : pModule->getFunctions()) { - // 对每个函数执行 Mem2Reg - if (promoteMemoryToRegisters(func.get())) { - globalChanged = true; - // 如果一个函数发生改变,可能影响其他函数或需要重新分析 - // 因此需要重新运行控制流和活跃变量分析,以备下一次循环 - controlFlowAnalysis->clear(); - controlFlowAnalysis->runControlFlowAnalysis(); - activeVarAnalysis->clear(); - dataFlowAnalysisUtils.backwardAnalyze(pModule); // 重新分析活跃变量 - } - } - } while (globalChanged); // 如果全局有任何函数发生改变,则继续迭代 - - // 最终清理和重新分析 - controlFlowAnalysis->clear(); - controlFlowAnalysis->runControlFlowAnalysis(); - activeVarAnalysis->clear(); - dataFlowAnalysisUtils.backwardAnalyze(pModule); -} - -} // namespace sysy \ No newline at end of file diff --git a/src/Pass.cpp b/src/Pass.cpp new file mode 100644 index 0000000..c9ec38d --- /dev/null +++ b/src/Pass.cpp @@ -0,0 +1,132 @@ +// Pass.cpp +#include "Pass.h" +#include "SysYIRCFGOpt.h" +#include "SysYIRPrinter.h" +#include +#include +#include +#include +#include +#include +#include "Dom.h" +#include "Liveness.h" + +namespace sysy { + +extern int DEBUG; // 全局调试标志 + +// ====================================================================== +// 封装优化流程的函数:包含Pass注册和迭代运行逻辑 +// ====================================================================== + +void PassManager::runOptimizationPipeline(Module* moduleIR, int optLevel) { + if (DEBUG) std::cout << "--- Starting Middle-End Optimizations (Level -O" << optLevel << ") ---\n"; + + // 1. 注册所有可用的分析遍和优化遍 + // 这些注册只需执行一次。 + sysy::registerAnalysisPass(); + sysy::registerAnalysisPass(); + + + // 2. 创建遍管理器 + sysy::PassManager pm(moduleIR); + + // 3. 根据优化级别添加不同的优化遍 + if (optLevel >= 1) { + if (DEBUG) std::cout << "Applying -O1 optimizations.\n"; + + // 4. 循环执行遍,直到 IR 稳定 (不再有任何遍修改 IR) + bool changed_in_iteration = true; + int iteration_count = 0; + while(changed_in_iteration) { + iteration_count++; + if (DEBUG) std::cout << "Optimization iteration: " << iteration_count << std::endl; + changed_in_iteration = pm.run(); // 运行一次所有添加到 PassManager 的遍 + if (DEBUG && changed_in_iteration) { + std::cout << "=== IR after iteration " << iteration_count << " ===\n"; + SysYPrinter printer_iter(moduleIR); + printer_iter.printIR(); + } + } + if (DEBUG) std::cout << "Optimizations stabilized after " << iteration_count << " iterations.\n"; + } + + + if (DEBUG) { + std::cout << "=== Final IR After Middle-End Optimizations (Level -O" << optLevel << ") ===\n"; + SysYPrinter printer(moduleIR); + printer.printIR(); + } +} + +void PassManager::addPass(void *passID) { + + PassRegistry ®istry = PassRegistry::getPassRegistry(); + std::unique_ptr P = registry.createPass(passID); + if (!P) { + // Error: Pass not found or failed to create + return; + } + + passes.push_back(std::move(P)); +} + +// 运行所有注册的遍 +bool PassManager::run() { + bool changed = false; + for (const auto &p : passes) { + bool passChanged = false; // 记录当前遍是否修改了 IR + + // 处理优化遍的分析依赖和失效 + if (p->getPassKind() == Pass::PassKind::Optimization) { + OptimizationPass *optPass = static_cast(p.get()); + std::set analysisDependencies; + std::set analysisInvalidations; + optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); + + // PassManager 不显式运行分析依赖。 + // 而是优化遍在 runOnFunction 内部通过 AnalysisManager.getAnalysisResult 按需请求。 + } + + if (p->getGranularity() == Pass::Granularity::Module) { + passChanged = p->runOnModule(pmodule, analysisManager); + } else if (p->getGranularity() == Pass::Granularity::Function) { + for (auto &funcPair : pmodule->getFunctions()) { + Function *F = funcPair.second.get(); + passChanged = p->runOnFunction(F, analysisManager) || passChanged; + + if (passChanged && p->getPassKind() == Pass::PassKind::Optimization) { + OptimizationPass *optPass = static_cast(p.get()); + std::set analysisDependencies; + std::set analysisInvalidations; + optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); + for (void *invalidationID : analysisInvalidations) { + analysisManager.invalidateAnalysis(invalidationID, F); + } + } + } + } else if (p->getGranularity() == Pass::Granularity::BasicBlock) { + for (auto &funcPair : pmodule->getFunctions()) { + Function *F = funcPair.second.get(); + for (auto &bbPtr : funcPair.second->getBasicBlocks()) { + passChanged = p->runOnBasicBlock(bbPtr.get(), analysisManager) || passChanged; + + if (passChanged && p->getPassKind() == Pass::PassKind::Optimization) { + OptimizationPass *optPass = static_cast(p.get()); + std::set analysisDependencies; + std::set analysisInvalidations; + optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); + for (void *invalidationID : analysisInvalidations) { + analysisManager.invalidateAnalysis(invalidationID, F); + } + } + } + } + } + changed = changed || passChanged; + } + return changed; + +} + +} // namespace sysy \ No newline at end of file diff --git a/src/Reg2Mem.cpp b/src/Reg2Mem.cpp deleted file mode 100644 index d90812a..0000000 --- a/src/Reg2Mem.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "Reg2Mem.h" -#include -#include -#include -#include - -namespace sysy { - -/** - * 删除phi节点 - * 删除phi节点后可能会生成冗余存储代码 - */ -void Reg2Mem::DeletePhiInst(){ - auto &functions = pModule->getFunctions(); - for (auto &function : functions) { - auto basicBlocks = function.second->getBasicBlocks(); - for (auto &basicBlock : basicBlocks) { - - for (auto iter = basicBlock->begin(); iter != basicBlock->end();) { - auto &instruction = *iter; - if (instruction->isPhi()) { - auto predBlocks = basicBlock->getPredecessors(); - // 寻找源和目的 - // 目的就是phi指令的第一个操作数 - // 源就是phi指令的后续操作数 - auto destination = instruction->getOperand(0); - int predBlockindex = 0; - for (auto &predBlock : predBlocks) { - ++predBlockindex; - // 判断前驱块儿只有一个后继还是多个后继 - // 如果有多个 - auto source = instruction->getOperand(predBlockindex); - if (source == destination) { - continue; - } - // std::cout << predBlock->getNumSuccessors() << std::endl; - if (predBlock->getNumSuccessors() > 1) { - // 创建一个basicblock - auto newbasicBlock = function.second->addBasicBlock(); - std::stringstream ss; - ss << "phidel.L" << pBuilder->getLabelIndex(); - newbasicBlock->setName(ss.str()); - ss.str(""); - // // 修改前驱后继关系 - basicBlock->replacePredecessor(predBlock, newbasicBlock); - // predBlock = newbasicBlock; - newbasicBlock->addPredecessor(predBlock); - newbasicBlock->addSuccessor(basicBlock.get()); - predBlock->removeSuccessor(basicBlock.get()); - predBlock->addSuccessor(newbasicBlock); - // std::cout << "the block name is " << basicBlock->getName() << std::endl; - // for (auto pb : basicBlock->getPredecessors()) { - // // newbasicBlock->addPredecessor(pb); - // std::cout << pb->getName() << std::endl; - // } - // sysy::BasicBlock::conectBlocks(newbasicBlock, static_cast(basicBlock.get())); - // 若后为跳转指令,应该修改跳转指令所到达的位置 - auto thelastinst = predBlock->end(); - (--thelastinst); - - if (thelastinst->get()->isConditional() || thelastinst->get()->isUnconditional()) { // 如果是跳转指令 - auto opnum = thelastinst->get()->getNumOperands(); - for (size_t i = 0; i < opnum; i++) { - if (thelastinst->get()->getOperand(i) == basicBlock.get()) { - thelastinst->get()->replaceOperand(i, newbasicBlock); - } - } - } - // 在新块中插入store指令 - pBuilder->setPosition(newbasicBlock, newbasicBlock->end()); - // pBuilder->createStoreInst(source, destination); - if (source->isInt() || source->isFloat()) { - pBuilder->createStoreInst(source, destination); - } else { - auto loadInst = pBuilder->createLoadInst(source); - pBuilder->createStoreInst(loadInst, destination); - } - // pBuilder->createMoveInst(Instruction::kMove, destination->getType(), destination, source, - // newbasicBlock); - pBuilder->setPosition(newbasicBlock, newbasicBlock->end()); - pBuilder->createUncondBrInst(basicBlock.get(), {}); - } else { - // 如果前驱块只有一个后继 - auto thelastinst = predBlock->end(); - (--thelastinst); - // std::cout << predBlock->getName() << std::endl; - // std::cout << thelastinst->get() << std::endl; - // std::cout << "First point 11 " << std::endl; - if (thelastinst->get()->isConditional() || thelastinst->get()->isUnconditional()) { - // 在跳转语句前insert st指令 - pBuilder->setPosition(predBlock, thelastinst); - } else { - pBuilder->setPosition(predBlock, predBlock->end()); - } - - if (source->isInt() || source->isFloat()) { - pBuilder->createStoreInst(source, destination); - } else { - auto loadInst = pBuilder->createLoadInst(source); - pBuilder->createStoreInst(loadInst, destination); - } - } - } - // 删除phi指令 - auto &instructions = basicBlock->getInstructions(); - SysYIROptUtils::usedelete(iter->get()); - iter = instructions.erase(iter); - if (basicBlock->getNumInstructions() == 0) { - if (basicBlock->getNumSuccessors() == 1) { - pBuilder->setPosition(basicBlock.get(), basicBlock->end()); - pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); - } - } - } else { - break; - } - } - } - } -} - -} // namespace sysy diff --git a/src/include/DeadCodeElimination.h b/src/include/DeadCodeElimination.h deleted file mode 100644 index 9864a2d..0000000 --- a/src/include/DeadCodeElimination.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include "IR.h" -#include "SysYIRAnalyser.h" -#include "SysYIRPrinter.h" -#include "SysYIROptUtils.h" - -namespace sysy { - -class DeadCodeElimination { - private: - Module *pModule; - ControlFlowAnalysis *pCFA; // 控制流分析指针 - ActiveVarAnalysis *pAVA; // 活跃变量分析指针 - DataFlowAnalysisUtils dataFlowAnalysisUtils; // 数据流分析工具类 - - public: - explicit DeadCodeElimination(Module *pMoudle, - ControlFlowAnalysis *pCFA = nullptr, - ActiveVarAnalysis *pAVA = nullptr) - : pModule(pMoudle), pCFA(pCFA), pAVA(pAVA), dataFlowAnalysisUtils() {} // 构造函数 - - // TODO:根据参数传入的passes来运行不同的死代码删除流程 - // void runDCEPipeline(const std::vector& passes = { - // "dead-store", "redundant-load-store", "dead-load", "dead-alloca", "dead-global" - // }); - void runDCEPipeline(); // 运行死代码删除 - - void eliminateDeadStores(Function* func, bool& changed); // 消除无用存储 - void eliminateDeadLoads(Function* func, bool& changed); // 消除无用加载 - void eliminateDeadAllocas(Function* func, bool& changed); // 消除无用内存分配 - void eliminateDeadGlobals(bool& changed); // 消除无用全局变量 - void eliminateDeadIndirectiveAllocas(Function* func, bool& changed); // 消除无用间接内存分配(phi节点) - void eliminateDeadRedundantLoadStore(Function* func, bool& changed); // 消除冗余加载和存储 -}; -} // namespace sysy diff --git a/src/include/Dom.h b/src/include/Dom.h index 6b38f83..1a09f34 100644 --- a/src/include/Dom.h +++ b/src/include/Dom.h @@ -41,7 +41,7 @@ public: // 实现 getPassID void* getPassID() const override { return &ID; } - bool runOnFunction(Function* F) override; + bool runOnFunction(Function* F, AnalysisManager &AM) override; std::unique_ptr getResult() override; diff --git a/src/include/Liveness.h b/src/include/Liveness.h index d804d33..713053a 100644 --- a/src/include/Liveness.h +++ b/src/include/Liveness.h @@ -20,7 +20,8 @@ class Instruction; class LivenessAnalysisResult : public AnalysisResultBase { public: LivenessAnalysisResult(Function *F); // 构造函数,需要一个函数来关联结果 - + LivenessAnalysisResult::LivenessAnalysisResult(Function *F) : AssociatedFunction(F) {} + // 获取给定基本块的 LiveIn 集合 const std::set *getLiveIn(BasicBlock *BB) const; @@ -52,7 +53,7 @@ public: static char ID; // LLVM 风格的唯一 ID LivenessAnalysisPass() : AnalysisPass("LivenessAnalysis", Pass::Granularity::Function) {} - + // 实现 getPassID void *getPassID() const override { return &ID; } diff --git a/src/include/Mem2Reg.h b/src/include/Mem2Reg.h deleted file mode 100644 index 2c65583..0000000 --- a/src/include/Mem2Reg.h +++ /dev/null @@ -1,79 +0,0 @@ -// 假设 Mem2Reg.h 看起来像这样 (你需要根据实际情况调整) -#ifndef SYSY_MEM2REG_H -#define SYSY_MEM2REG_H - -#include -#include -#include -#include -#include // For computeIteratedDomFrontiers - -// Include your IR and analysis headers -#include "IR.h" -#include "IRBuilder.h" -#include "SysYIRAnalyser.h" -#include "SysYIROptUtils.h" - -namespace sysy { - -class Mem2Reg { -private: - Module* pModule; - IRBuilder* pBuilder; - ControlFlowAnalysis* controlFlowAnalysis; - ActiveVarAnalysis* activeVarAnalysis; - DataFlowAnalysisUtils dataFlowAnalysisUtils; // If this is part of Mem2Reg or an external helper - -public: - Mem2Reg(Module* module, IRBuilder* builder, ControlFlowAnalysis* cfa, ActiveVarAnalysis* ava) - : pModule(module), pBuilder(builder), controlFlowAnalysis(cfa), activeVarAnalysis(ava) {} - // Constructor initializes members - void run(); - - // --- 新增的私有成员变量和方法,用于SSA转换上下文 --- - // 这是核心,用于存储 SSA 转换过程中的状态 - std::vector currentFunctionAllocas; // 当前函数中所有可提升的 alloca - // alloca -> set of BasicBlocks where it's defined (stored into) - std::unordered_map> allocaDefsBlock; - // alloca -> set of BasicBlocks where it's used (loaded from) - std::unordered_map> allocaUsesBlock; - - // BasicBlock -> Map of (PhiInst, Original AllocaInst) - // 用于在 rename 阶段通过 phi 指令找到它代表的原始 alloca - std::unordered_map> phiMap; - std::vector allPhiInstructions; // 收集所有创建的 Phi 指令以便后续简化和清理 - - // --- 核心 SSA 转换辅助函数 --- - // 计算给定定义块集合的迭代支配边界 - std::unordered_set computeIteratedDomFrontiers(const std::unordered_set& blocks); - - // 分析一个 alloca 的所有 uses,填充 allocaDefsBlock 和 allocaUsesBlock - void allocaAnalysis(AllocaInst* alloca); - - // 判断一个 alloca 是否可以被提升为寄存器 (无地址逃逸,标量类型) - bool is_promoted(AllocaInst* alloca); - - // 在迭代支配边界处插入 Phi 指令 - void insertPhiNodes(Function* func); - - // 递归地重命名基本块中的变量并填充 Phi 指令 - // 这里的 `count` 和 `stacks` 是临时的,用于 DFS 过程中传递状态 - void renameBlock(BasicBlock* block, - std::unordered_map& currentIncomings, - std::unordered_set& visitedBlocks); // 修改为传递 map 和 set - - // 简化冗余的 Phi 指令 (当所有输入都相同时) - void simplifyphi(PhiInst* phi); - - // 获取前驱块在后继块前驱列表中的索引,用于 Phi 指令入边 - int getPredIndex(BasicBlock* pred, BasicBlock* succ); - - // --- Mem2Reg 的主要工作流函数 --- - // 对单个函数执行内存到寄存器的提升 - bool promoteMemoryToRegisters(Function* func); - -}; - -} // namespace sysy - -#endif // SYSY_MEM2REG_H \ No newline at end of file diff --git a/src/include/Pass.h b/src/include/Pass.h index 063462f..17a61b8 100644 --- a/src/include/Pass.h +++ b/src/include/Pass.h @@ -111,6 +111,9 @@ private: // AnalysisManager: 负责管理和提供分析结果 // ====================================================================== class AnalysisManager { +private: + std::map, std::unique_ptr> cachedResults; + // cachedResults 存储分析结果,键是 (Function*, AnalysisPass ID) public: AnalysisManager() = default; ~AnalysisManager() = default; @@ -172,97 +175,38 @@ public: cachedResults = std::move(newCachedResults); } } - -private: - std::map, std::unique_ptr> cachedResults; }; // ====================================================================== // PassManager:遍管理器 // ====================================================================== class PassManager { - +private: + std::vector> passes; + AnalysisManager analysisManager; Module *pmodule; - AnalysisManager &AM; // 引用 AnalysisManager,用于获取分析结果 public: PassManager() = default; ~PassManager() = default; - // 添加遍:现在接受 Pass 的 ID,而不是直接的 unique_ptr - void addPass(void *passID) { - PassRegistry ®istry = PassRegistry::getPassRegistry(); - std::unique_ptr P = registry.createPass(passID); - if (!P) { - // Error: Pass not found or failed to create - return; - } - - passes.push_back(std::move(P)); + PassManager(Module *module) : pmodule(module) { + analysisManager = AnalysisManager(); // 初始化分析管理器 } - + // 运行所有注册的遍 - bool run(Module *M) { - bool changed = false; - for (const auto &p : passes) { - bool passChanged = false; // 记录当前遍是否修改了 IR + bool run(); + + // 运行优化管道主要负责注册和运行优化遍 + // 这里可以根据 optLevel 和 DEBUG 控制不同的优化遍 + void runOptimizationPipeline(Module* moduleIR, int optLevel); - // 处理优化遍的分析依赖和失效 - if (p->getPassKind() == Pass::PassKind::Optimization) { - OptimizationPass *optPass = static_cast(p.get()); - std::set analysisDependencies; - std::set analysisInvalidations; - optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); - - // PassManager 不显式运行分析依赖。 - // 而是优化遍在 runOnFunction 内部通过 AnalysisManager.getAnalysisResult 按需请求。 - } - - if (p->getGranularity() == Pass::Granularity::Module) { - passChanged = p->runOnModule(M, AM); - } else if (p->getGranularity() == Pass::Granularity::Function) { - for (auto &funcPair : M->getFunctions()) { - Function *F = funcPair.second.get(); - passChanged = p->runOnFunction(F, AM) || passChanged; - - if (passChanged && p->getPassKind() == Pass::PassKind::Optimization) { - OptimizationPass *optPass = static_cast(p.get()); - std::set analysisDependencies; - std::set analysisInvalidations; - optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); - for (void *invalidationID : analysisInvalidations) { - analysisManager.invalidateAnalysis(invalidationID, F); - } - } - } - } else if (p->getGranularity() == Pass::Granularity::BasicBlock) { - for (auto &funcPair : M->getFunctions()) { - Function *F = funcPair.second.get(); - for (auto &bbPtr : funcPair.second->getBasicBlocks()) { - passChanged = p->runOnBasicBlock(bbPtr.get(), AM) || passChanged; - - if (passChanged && p->getPassKind() == Pass::PassKind::Optimization) { - OptimizationPass *optPass = static_cast(p.get()); - std::set analysisDependencies; - std::set analysisInvalidations; - optPass->getAnalysisUsage(analysisDependencies, analysisInvalidations); - for (void *invalidationID : analysisInvalidations) { - analysisManager.invalidateAnalysis(invalidationID, F); - } - } - } - } - } - changed = changed || passChanged; - } - return changed; - } + // 添加遍:现在接受 Pass 的 ID,而不是直接的 unique_ptr + void addPass(void *passID); AnalysisManager &getAnalysisManager() { return analysisManager; } -private: - std::vector> passes; - AnalysisManager analysisManager; + }; // ====================================================================== diff --git a/src/include/Reg2Mem.h b/src/include/Reg2Mem.h deleted file mode 100644 index 8eec1b6..0000000 --- a/src/include/Reg2Mem.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include "IR.h" -#include "IRBuilder.h" -#include "SysYIROptUtils.h" - -namespace sysy { -/** - * Reg2Mem(后端未做phi指令翻译) - */ -class Reg2Mem { -private: - Module *pModule; - IRBuilder *pBuilder; - -public: - Reg2Mem(Module *pMoudle, IRBuilder *pBuilder) : pModule(pMoudle), pBuilder(pBuilder) {} - - void DeletePhiInst(); -}; - -} // namespace sysy \ No newline at end of file diff --git a/src/include/SysYIRPass.h b/src/include/SysYIRPass.h deleted file mode 100644 index cb6e6a9..0000000 --- a/src/include/SysYIRPass.h +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once - -#include "IR.h" - -namespace sysy { - -// 前置声明 -class FunctionPass; -class ModulePass; -class AnalysisPass; -class PassManager; - -// 抽象基类 Pass -class Pass { -public: - enum PassKind { - PK_Function, - PK_Module, - PK_Analysis - }; - - Pass(PassKind kind, const std::string& name) : Kind(kind), Name(name) {} - virtual ~Pass() = default; - - PassKind getPassKind() const { return Kind; } - const std::string& getPassName() const { return Name; } - - // 每个Pass需要实现此方法来执行其逻辑 - // 具体的run方法将根据Pass类型在FunctionPass和ModulePass中定义 -protected: - PassKind Kind; - std::string Name; -}; - -// 针对函数的优化遍 -class FunctionPass : public Pass { -public: - FunctionPass(const std::string& name) : Pass(PK_Function, name) {} - // 真正的优化逻辑将在此方法中实现 - virtual bool runOnFunction(Function& F) = 0; -}; - -// 针对模块的优化遍 -class ModulePass : public Pass { -public: - ModulePass(const std::string& name) : Pass(PK_Module, name) {} - // 真正的优化逻辑将在此方法中实现 - virtual bool runOnModule(Module& M) = 0; -}; - -// 分析遍 -class AnalysisPass : public Pass { -public: - AnalysisPass(const std::string& name) : Pass(PK_Analysis, name) {} - // 分析遍通常需要一个模块或函数作为输入,并计算出分析结果 - // 具体分析结果的存储和访问方式需要设计 -}; - -} // namespace sysy \ No newline at end of file diff --git a/src/sysyc.cpp b/src/sysyc.cpp index 1afe7c9..c22c0a8 100644 --- a/src/sysyc.cpp +++ b/src/sysyc.cpp @@ -13,13 +13,10 @@ using namespace antlr4; #include "SysYIRGenerator.h" #include "SysYIRPrinter.h" -#include "SysYIRCFGOpt.h" +#include "SysYIRCFGOpt.h" // 包含 CFG 优化 #include "RISCv64Backend.h" -// #include "SysYIRAnalyser.h" -// #include "DeadCodeElimination.h" +#include "Pass.h" // 包含新的 Pass 框架 #include "AddressCalculationExpansion.h" -// #include "Mem2Reg.h" -// #include "Reg2Mem.h" using namespace sysy; @@ -131,17 +128,13 @@ int main(int argc, char **argv) { if (argStopAfter == "ird") { DEBUG = 1; // 这里可能需要更精细地控制 DEBUG 的开启时机和范围 } - // 默认优化 pass (在所有优化级别都会执行) - SysYCFGOpt cfgopt(moduleIR, builder); - cfgopt.SysYOptimizateAfterIR(); - - // ControlFlowAnalysis cfa(moduleIR); - // cfa.init(); - // ActiveVarAnalysis ava; - // ava.init(moduleIR); + + // 创建 Pass 管理器并运行优化管道 + PassManager passManager(moduleIR); // 创建 Pass 管理器 + passManager.runOptimizationPipeline(moduleIR, optLevel); if (DEBUG) { - cout << "=== After CFA & AVA (Default) ===\n"; + cout << "=== Init IR ===\n"; SysYPrinter(moduleIR).printIR(); // 临时打印器用于调试 } AddressCalculationExpansion ace(moduleIR, builder); From 3df3d7a097e91f15002b97ce361cc03f54f8c8c8 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Tue, 22 Jul 2025 22:38:47 +0800 Subject: [PATCH 19/35] =?UTF-8?q?[midend]=E9=80=9A=E8=BF=87=E7=BC=96?= =?UTF-8?q?=E8=AF=91=EF=BC=8CTODO=EF=BC=9A=E9=AA=8C=E8=AF=81=E6=AD=A3?= =?UTF-8?q?=E7=A1=AE=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/CMakeLists.txt | 4 +++- src/Dom.cpp | 2 +- src/Liveness.cpp | 15 +++++++++++---- src/Pass.cpp | 5 +++-- src/include/Liveness.h | 3 +-- src/include/Pass.h | 22 +++++++++++++--------- 6 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a052e5b..d82e1e3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,7 +22,9 @@ add_executable(sysyc SysYIRGenerator.cpp SysYIRPrinter.cpp SysYIRCFGOpt.cpp - # SysYIRAnalyser.cpp + Pass.cpp + Dom.cpp + Liveness.cpp # DeadCodeElimination.cpp AddressCalculationExpansion.cpp # Mem2Reg.cpp diff --git a/src/Dom.cpp b/src/Dom.cpp index beb1d65..e82dc7d 100644 --- a/src/Dom.cpp +++ b/src/Dom.cpp @@ -165,7 +165,7 @@ void DominatorTree::computeDominanceFrontiers(Function *F) { // ============================================================== -bool DominatorTreeAnalysisPass::runOnFunction(Function* F) { +bool DominatorTreeAnalysisPass::runOnFunction(Function* F, AnalysisManager &AM) { CurrentDominatorTree = std::make_unique(F); CurrentDominatorTree->computeDominators(F); CurrentDominatorTree->computeIDoms(F); diff --git a/src/Liveness.cpp b/src/Liveness.cpp index e1231f1..2fc5142 100644 --- a/src/Liveness.cpp +++ b/src/Liveness.cpp @@ -89,12 +89,19 @@ void LivenessAnalysisResult::computeLiveness(Function *F) { // TODO : 目前为逆序遍历基本块,考虑反向拓扑序遍历基本块 // 逆序遍历基本块 - std::list> basicBlocks(F->getBasicBlocks().begin(), F->getBasicBlocks().end()); - std::reverse(basicBlocks.begin(), basicBlocks.end()); + // std::list> basicBlocks(F->getBasicBlocks().begin(), F->getBasicBlocks().end()); + // std::reverse(basicBlocks.begin(), basicBlocks.end()); // 然后遍历 basicBlocks + // 创建一个 BasicBlock* 的列表来存储指针,避免拷贝 unique_ptr + // Option 1: Using std::vector (preferred for performance with reverse) + std::vector basicBlocksPointers; + for (const auto& bb_ptr : F->getBasicBlocks()) { + basicBlocksPointers.push_back(bb_ptr.get()); + } + std::reverse(basicBlocksPointers.begin(), basicBlocksPointers.end()); - for (auto bb_iter = basicBlocks.begin(); bb_iter != basicBlocks.end(); ++bb_iter) { - BasicBlock *bb = bb_iter->get(); + for (auto bb_iter = basicBlocksPointers.begin(); bb_iter != basicBlocksPointers.end(); ++bb_iter) { + BasicBlock *bb = *bb_iter; // 获取 BasicBlock 指针 if (!bb) continue; // 避免空指针 diff --git a/src/Pass.cpp b/src/Pass.cpp index c9ec38d..87e541b 100644 --- a/src/Pass.cpp +++ b/src/Pass.cpp @@ -11,9 +11,8 @@ #include "Dom.h" #include "Liveness.h" -namespace sysy { - extern int DEBUG; // 全局调试标志 +namespace sysy { // ====================================================================== // 封装优化流程的函数:包含Pass注册和迭代运行逻辑 @@ -32,6 +31,8 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, int optLevel) { sysy::PassManager pm(moduleIR); // 3. 根据优化级别添加不同的优化遍 + // TODO : 根据 optLevel 添加不同的优化遍 + // 讨论 是不动点迭代进行优化遍还是手动客制化优化遍的顺序? if (optLevel >= 1) { if (DEBUG) std::cout << "Applying -O1 optimizations.\n"; diff --git a/src/include/Liveness.h b/src/include/Liveness.h index 713053a..7cfe01c 100644 --- a/src/include/Liveness.h +++ b/src/include/Liveness.h @@ -19,8 +19,7 @@ class Instruction; // 它将包含 LiveIn 和 LiveOut 集合 class LivenessAnalysisResult : public AnalysisResultBase { public: - LivenessAnalysisResult(Function *F); // 构造函数,需要一个函数来关联结果 - LivenessAnalysisResult::LivenessAnalysisResult(Function *F) : AssociatedFunction(F) {} + LivenessAnalysisResult(Function *F) : AssociatedFunction(F) {} // 获取给定基本块的 LiveIn 集合 const std::set *getLiveIn(BasicBlock *BB) const; diff --git a/src/include/Pass.h b/src/include/Pass.h index 17a61b8..61839fa 100644 --- a/src/include/Pass.h +++ b/src/include/Pass.h @@ -7,9 +7,14 @@ #include #include // For std::type_index (although void* ID is more common in LLVM) #include +#include "IR.h" namespace sysy { +//前向声明 +class PassManager; +class AnalysisManager; + // 抽象基类:分析结果 class AnalysisResultBase { public: @@ -144,7 +149,7 @@ public: // 确保分析遍的粒度与请求的上下文匹配 if (analysisPass->getGranularity() == Pass::Granularity::Function) { - analysisPass->runOnFunction(F); // 运行分析遍 + analysisPass->runOnFunction(F, *this); // 运行分析遍 // 获取结果并缓存 std::unique_ptr result = analysisPass->getResult(); T *specificResult = static_cast(result.get()); @@ -166,13 +171,14 @@ public: cachedResults.erase({F, analysisID}); } else { // 使所有函数的特定分析结果失效 - std::map, std::unique_ptr> newCachedResults; - for (auto &pair : cachedResults) { - if (pair.first.second != analysisID) { - newCachedResults.insert(std::move(pair)); + // 遍历并删除匹配的元素,避免拷贝 unique_ptr + for (auto it = cachedResults.begin(); it != cachedResults.end(); ) { + if (it->first.second == analysisID) { + it = cachedResults.erase(it); // erase 返回下一个元素的迭代器 + } else { + ++it; } } - cachedResults = std::move(newCachedResults); } } }; @@ -190,9 +196,7 @@ public: PassManager() = default; ~PassManager() = default; - PassManager(Module *module) : pmodule(module) { - analysisManager = AnalysisManager(); // 初始化分析管理器 - } + PassManager(Module *module) : pmodule(module) , analysisManager() {} // 运行所有注册的遍 bool run(); From 87d38be255c8d3fc61b2d7710a836af04b84f7e0 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Wed, 23 Jul 2025 17:19:11 +0800 Subject: [PATCH 20/35] =?UTF-8?q?[midend]=E6=9B=B4=E6=96=B0=E9=81=8D?= =?UTF-8?q?=E9=9D=99=E6=80=81ID=E5=AE=9A=E4=B9=89=E6=96=B9=E6=B3=95?= =?UTF-8?q?=EF=BC=8C=20=E6=B3=A8=E5=86=8C=E9=81=8D=E6=A8=A1=E6=9D=BF?= =?UTF-8?q?=E5=87=BD=E6=95=B0=E9=87=8D=E6=9E=84=EF=BC=88=E9=92=88=E5=AF=B9?= =?UTF-8?q?=E9=81=8D=E7=9A=84=E4=B8=8D=E5=90=8C=E6=9E=84=E9=80=A0=E6=96=B9?= =?UTF-8?q?=E6=B3=95=EF=BC=89=EF=BC=8C=20=E4=BF=AE=E5=A4=8Dphi=E6=8C=87?= =?UTF-8?q?=E4=BB=A4=E6=9B=B4=E6=96=B0=E5=BC=95=E8=B5=B7=E7=9A=84=E6=97=A7?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E9=94=99=E8=AF=AF=EF=BC=8C=20=E5=B0=86CFG?= =?UTF-8?q?=E4=BC=98=E5=8C=96=E9=80=82=E9=85=8D=E5=88=B0=E7=8E=B0=E6=9C=89?= =?UTF-8?q?=E7=BB=88=E7=AB=AF=E6=A1=86=E6=9E=B6=E4=B8=AD=EF=BC=8C=20?= =?UTF-8?q?=E7=8B=AC=E7=AB=8BCFG=E4=BC=98=E5=8C=96=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E4=BD=BF=E5=BE=97=E5=85=B6=E4=BB=96=E4=BC=98=E5=8C=96=E9=81=8D?= =?UTF-8?q?=E8=83=BD=E7=8B=AC=E7=AB=8B=E8=B0=83=E7=94=A8=EF=BC=8C=20usedel?= =?UTF-8?q?ete=E6=96=B9=E6=B3=95=E5=9B=9E=E8=B0=83=E5=8F=96=E6=B6=88?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=8A=9F=E8=83=BD=E3=80=82=20IRGenerator?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E9=A3=8E=E6=A0=BC=E4=BF=AE=E6=94=B9=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Pass_ID_List.md | 5 + src/Dom.cpp | 3 +- src/Liveness.cpp | 3 +- src/Pass.cpp | 70 +++++-- src/SysYIRCFGOpt.cpp | 389 +++++++++++++++++++---------------- src/SysYIRGenerator.cpp | 24 +-- src/include/Dom.h | 2 +- src/include/Liveness.h | 2 +- src/include/Pass.h | 185 ++++++++++++----- src/include/SysYIRCFGOpt.h | 106 ++++++---- src/include/SysYIROptUtils.h | 5 +- src/sysyc.cpp | 13 +- 12 files changed, 509 insertions(+), 298 deletions(-) create mode 100644 Pass_ID_List.md diff --git a/Pass_ID_List.md b/Pass_ID_List.md new file mode 100644 index 0000000..79de64c --- /dev/null +++ b/Pass_ID_List.md @@ -0,0 +1,5 @@ +# 记录中端遍的唯一标识ID + +| 名称 | 优化级别 | 开发进度 | +| ------------ | ------------ | ---------- | +| CFG优化 | 函数级 | 已完成 | \ No newline at end of file diff --git a/src/Dom.cpp b/src/Dom.cpp index e82dc7d..d476c49 100644 --- a/src/Dom.cpp +++ b/src/Dom.cpp @@ -5,8 +5,7 @@ namespace sysy { // 初始化 支配树静态 ID -char DominatorTreeAnalysisPass::ID = 0; - +void *DominatorTreeAnalysisPass::ID = (void *)&DominatorTreeAnalysisPass::ID; // ============================================================== // DominatorTree 结果类的实现 // ============================================================== diff --git a/src/Liveness.cpp b/src/Liveness.cpp index 2fc5142..11c0f71 100644 --- a/src/Liveness.cpp +++ b/src/Liveness.cpp @@ -7,8 +7,7 @@ namespace sysy { // 初始化静态 ID -char LivenessAnalysisPass::ID = 0; // 任何唯一的地址都可以,这里用 0 - +void *LivenessAnalysisPass::ID = (void *)&LivenessAnalysisPass::ID; // ============================================================== // LivenessAnalysisResult 结果类的实现 // ============================================================== diff --git a/src/Pass.cpp b/src/Pass.cpp index 87e541b..46c0588 100644 --- a/src/Pass.cpp +++ b/src/Pass.cpp @@ -1,15 +1,14 @@ -// Pass.cpp -#include "Pass.h" +#include "Dom.h" +#include "Liveness.h" #include "SysYIRCFGOpt.h" #include "SysYIRPrinter.h" +#include "Pass.h" #include #include #include #include #include #include -#include "Dom.h" -#include "Liveness.h" extern int DEBUG; // 全局调试标志 namespace sysy { @@ -18,18 +17,45 @@ namespace sysy { // 封装优化流程的函数:包含Pass注册和迭代运行逻辑 // ====================================================================== -void PassManager::runOptimizationPipeline(Module* moduleIR, int optLevel) { +void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR, int optLevel) { if (DEBUG) std::cout << "--- Starting Middle-End Optimizations (Level -O" << optLevel << ") ---\n"; - // 1. 注册所有可用的分析遍和优化遍 - // 这些注册只需执行一次。 - sysy::registerAnalysisPass(); - sysy::registerAnalysisPass(); + /* + 中端开发框架基本流程: + 1) 分析pass + 1. 实现分析pass并引入Pass.cpp + 2. 注册分析pass + 2) 优化pass + 1. 实现优化pass并引入Pass.cpp + 2. 注册优化pass + 3. 添加优化passid + */ + // 注册分析遍 + registerAnalysisPass(); + registerAnalysisPass(); + // 注册优化遍 + registerOptimizationPass(); + registerOptimizationPass(); + registerOptimizationPass(); + + registerOptimizationPass(builderIR); + registerOptimizationPass(builderIR); + registerOptimizationPass(builderIR); + + if (optLevel >= 1) { + if (DEBUG) std::cout << "Applying -O1 optimizations.\n"; + + // 只添加优化遍的 ID + this->addPass(&SysYDelInstAfterBrPass::ID); + this->addPass(&SysYDelNoPreBLockPass::ID); + this->addPass(&SysYBlockMergePass::ID); + this->addPass(&SysYDelEmptyBlockPass::ID); + this->addPass(&SysYCondBr2BrPass::ID); + this->addPass(&SysYAddReturnPass::ID); + } // 2. 创建遍管理器 - sysy::PassManager pm(moduleIR); - // 3. 根据优化级别添加不同的优化遍 // TODO : 根据 optLevel 添加不同的优化遍 // 讨论 是不动点迭代进行优化遍还是手动客制化优化遍的顺序? @@ -42,7 +68,7 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, int optLevel) { while(changed_in_iteration) { iteration_count++; if (DEBUG) std::cout << "Optimization iteration: " << iteration_count << std::endl; - changed_in_iteration = pm.run(); // 运行一次所有添加到 PassManager 的遍 + changed_in_iteration = run(); // 运行一次所有添加到 PassManager 的遍 if (DEBUG && changed_in_iteration) { std::cout << "=== IR after iteration " << iteration_count << " ===\n"; SysYPrinter printer_iter(moduleIR); @@ -130,4 +156,24 @@ bool PassManager::run() { } + +template void registerAnalysisPass() { + PassRegistry::getPassRegistry().registerPass(&AnalysisPassType::ID, + []() { return std::make_unique(); }); +} + +template ::value, int>::type = 0> +void registerOptimizationPass(IRBuilder* builder) { + PassRegistry::getPassRegistry().registerPass(&OptimizationPassType::ID, + [builder]() { return std::make_unique(builder); }); +} + +template ::value, int>::type = 0> +void registerOptimizationPass() { + PassRegistry::getPassRegistry().registerPass(&OptimizationPassType::ID, + []() { return std::make_unique(); }); +} + } // namespace sysy \ No newline at end of file diff --git a/src/SysYIRCFGOpt.cpp b/src/SysYIRCFGOpt.cpp index c386379..63d67d4 100644 --- a/src/SysYIRCFGOpt.cpp +++ b/src/SysYIRCFGOpt.cpp @@ -6,14 +6,25 @@ #include #include #include -#include "IR.h" -#include "IRBuilder.h" +#include // 引入队列,SysYDelNoPreBLock需要 namespace sysy { +// 定义静态ID +void *SysYDelInstAfterBrPass::ID = (void *)&SysYDelInstAfterBrPass::ID; +void *SysYDelEmptyBlockPass::ID = (void *)&SysYDelEmptyBlockPass::ID; +void *SysYDelNoPreBLockPass::ID = (void *)&SysYDelNoPreBLockPass::ID; +void *SysYBlockMergePass::ID = (void *)&SysYBlockMergePass::ID; +void *SysYAddReturnPass::ID = (void *)&SysYAddReturnPass::ID; +void *SysYCondBr2BrPass::ID = (void *)&SysYCondBr2BrPass::ID; + + +// ====================================================================== +// SysYCFGOptUtils: 辅助工具类,包含实际的CFG优化逻辑 +// ====================================================================== // 删除br后的无用指令 -bool SysYCFGOpt::SysYDelInstAfterBr(Function *func) { +bool SysYCFGOptUtils::SysYDelInstAfterBr(Function *func) { bool changed = false; auto basicBlocks = func->getBasicBlocks(); @@ -22,11 +33,10 @@ bool SysYCFGOpt::SysYDelInstAfterBr(Function *func) { auto &instructions = basicBlock->getInstructions(); auto Branchiter = instructions.end(); for (auto iter = instructions.begin(); iter != instructions.end(); ++iter) { - if (Branch) - SysYIROptUtils::usedelete(iter->get()); - else if ((*iter)->isTerminator()){ + if ((*iter)->isTerminator()){ Branch = true; Branchiter = iter; + break; } } if (Branchiter != instructions.end()) ++Branchiter; @@ -61,8 +71,8 @@ bool SysYCFGOpt::SysYDelInstAfterBr(Function *func) { return changed; } -// 合并空基本块 -bool SysYCFGOpt::SysYBlockMerge(Function *func) { +// 合并基本块 +bool SysYCFGOptUtils::SysYBlockMerge(Function *func) { bool changed = false; for (auto blockiter = func->getBasicBlocks().begin(); @@ -82,12 +92,12 @@ bool SysYCFGOpt::SysYBlockMerge(Function *func) { (--thelastinstinst); if (thelastinstinst->get()->isUnconditional()) { SysYIROptUtils::usedelete(thelastinstinst->get()); - block->getInstructions().erase(thelastinstinst); + thelastinstinst = block->getInstructions().erase(thelastinstinst); } else if (thelastinstinst->get()->isConditional()) { // 如果是条件分支,判断条件是否相同,主要优化相同布尔表达式 if (thelastinstinst->get()->getOperand(1)->getName() == thelastinstinst->get()->getOperand(1)->getName()) { SysYIROptUtils::usedelete(thelastinstinst->get()); - block->getInstructions().erase(thelastinstinst); + thelastinstinst = block->getInstructions().erase(thelastinstinst); } } } @@ -132,7 +142,7 @@ bool SysYCFGOpt::SysYBlockMerge(Function *func) { } // 删除无前驱块,兼容SSA后的处理 -bool SysYCFGOpt::SysYDelNoPreBLock(Function *func) { +bool SysYCFGOptUtils::SysYDelNoPreBLock(Function *func) { bool changed = false; @@ -156,29 +166,26 @@ bool SysYCFGOpt::SysYDelNoPreBLock(Function *func) { } // 删除不可达基本块指令 - for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();blockIter++) { - if (!blockIter->get()->getreachable()) - for (auto &iterInst : blockIter->get()->getInstructions()) - SysYIROptUtils::usedelete(iterInst.get()); - + for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end(); blockIter++) { + if (!blockIter->get()->getreachable()) { + for (auto instIter = blockIter->get()->getInstructions().begin(); + instIter != blockIter->get()->getInstructions().end();) { + SysYIROptUtils::usedelete(instIter->get()); + instIter = blockIter->get()->getInstructions().erase(instIter); + } } +} for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();) { if (!blockIter->get()->getreachable()) { for (auto succblock : blockIter->get()->getSuccessors()) { - int indexphi = 1; - for (auto pred : succblock->getPredecessors()) { - if (pred == blockIter->get()) { - break; - } - indexphi++; - } for (auto &phiinst : succblock->getInstructions()) { - if (phiinst->getKind() != Instruction::kPhi) { - break; - } - phiinst->removeOperand(indexphi); + if (phiinst->getKind() != Instruction::kPhi) { + break; + } + // 使用 delBlk 方法正确地删除对应于被删除基本块的传入值 + dynamic_cast(phiinst.get())->delBlk(blockIter->get()); } } // 删除不可达基本块,注意迭代器不可达问题 @@ -193,7 +200,7 @@ bool SysYCFGOpt::SysYDelNoPreBLock(Function *func) { } // 删除空块 -bool SysYCFGOpt::SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder) { +bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder) { bool changed = false; // 收集不可达基本块 @@ -218,24 +225,29 @@ bool SysYCFGOpt::SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder) { break; } } - if(onlyPhi) + if(onlyPhi && basicBlock->getNumSuccessors() == 1) // 确保有后继且只有一个 EmptyBlocks[basicBlock.get()] = basicBlock->getSuccessors().front(); } - - } // 更新基本块信息,增加必要指令 for (auto &basicBlock : basicBlocks) { - // 把空块转换成只有跳转指令的不可达块 + // 把空块转换成只有跳转指令的不可达块 (这段逻辑在优化遍中可能需要调整,这里是原样保留) + // 通常,DelEmptyBlock 应该在BlockMerge之后运行,如果存在完全空块,它会尝试填充一个Br指令。 + // 但是,它主要目的是重定向跳转。 if (distance(basicBlock->begin(), basicBlock->end()) == 0) { if (basicBlock->getNumSuccessors() == 0) { continue; } if (basicBlock->getNumSuccessors() > 1) { - assert(""); + // 如果一个空块有多个后继,说明CFG结构有问题或者需要特殊处理,这里简单assert + assert(false && "Empty block with multiple successors found during SysYDelEmptyBlock"); } - pBuilder->setPosition(basicBlock.get(), basicBlock->end()); - pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); + // 这里的逻辑有点问题,如果一个块是空的,且只有一个后继,应该直接跳转到后继。 + // 如果这个块最终被删除了,那么其前驱也需要重定向。 + // 这个循环的目的是重定向现有的跳转指令,而不是创建新的。 + // 所以下面的逻辑才是核心。 + // pBuilder->setPosition(basicBlock.get(), basicBlock->end()); + // pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); continue; } @@ -247,50 +259,55 @@ bool SysYCFGOpt::SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder) { BasicBlock* OldBrBlock = dynamic_cast(thelastinst->get()->getOperand(0)); BasicBlock *thelastBlockOld = nullptr; // 如果空块链表为多个块 - while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(0))) != - EmptyBlocks.end()) { + while (EmptyBlocks.count(dynamic_cast(thelastinst->get()->getOperand(0)))) { thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(0)); thelastinst->get()->replaceOperand(0, EmptyBlocks[thelastBlockOld]); } - basicBlock->removeSuccessor(OldBrBlock); - OldBrBlock->removePredecessor(basicBlock.get()); - basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(0))); - dynamic_cast(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get()); + // 如果有重定向发生 + if (thelastBlockOld != nullptr) { + basicBlock->removeSuccessor(OldBrBlock); + OldBrBlock->removePredecessor(basicBlock.get()); + basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(0))); + dynamic_cast(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get()); + changed = true; // 标记IR被修改 + } + if (thelastBlockOld != nullptr) { - int indexphi = 0; - for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors()) { - if (pred == thelastBlockOld) { - break; - } - indexphi++; - } - - // 更新phi指令的操作数 - // 移除thelastBlockOld对应的phi操作数 for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(0))->getInstructions()) { - if (InstInNew->isPhi()) { - dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); - } else { - break; - } + if (InstInNew->isPhi()) { + // 使用 delBlk 方法删除 oldBlock 对应的传入值 + dynamic_cast(InstInNew.get())->delBlk(thelastBlockOld); + } else { + break; } + } } } else if (thelastinst->get()->getKind() == Instruction::kCondBr) { auto OldThenBlock = dynamic_cast(thelastinst->get()->getOperand(1)); auto OldElseBlock = dynamic_cast(thelastinst->get()->getOperand(2)); + bool thenChanged = false; + bool elseChanged = false; + BasicBlock *thelastBlockOld = nullptr; - while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(1))) != - EmptyBlocks.end()) { + while (EmptyBlocks.count(dynamic_cast(thelastinst->get()->getOperand(1)))) { thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(1)); thelastinst->get()->replaceOperand( 1, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(1))]); + thenChanged = true; } - basicBlock->removeSuccessor(OldThenBlock); - OldThenBlock->removePredecessor(basicBlock.get()); + + if (thenChanged) { + basicBlock->removeSuccessor(OldThenBlock); + OldThenBlock->removePredecessor(basicBlock.get()); + basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(1))); + dynamic_cast(thelastinst->get()->getOperand(1))->addPredecessor(basicBlock.get()); + changed = true; // 标记IR被修改 + } + // 处理 then 和 else 分支合并的情况 if (dynamic_cast(thelastinst->get()->getOperand(1)) == dynamic_cast(thelastinst->get()->getOperand(2))) { @@ -299,39 +316,37 @@ bool SysYCFGOpt::SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder) { thelastinst = basicBlock->getInstructions().erase(thelastinst); pBuilder->setPosition(basicBlock.get(), basicBlock->end()); pBuilder->createUncondBrInst(thebrBlock, {}); + changed = true; // 标记IR被修改 continue; } - basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(1))); - dynamic_cast(thelastinst->get()->getOperand(1))->addPredecessor(basicBlock.get()); - // auto indexInNew = dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors(). - + if (thelastBlockOld != nullptr) { - int indexphi = 0; - for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(1))->getPredecessors()) { - if (pred == thelastBlockOld) { - break; - } - indexphi++; - } - for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(1))->getInstructions()) { if (InstInNew->isPhi()) { - dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); + // 使用 delBlk 方法删除 oldBlock 对应的传入值 + dynamic_cast(InstInNew.get())->delBlk(thelastBlockOld); } else { break; } - } + } } thelastBlockOld = nullptr; - while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(2))) != - EmptyBlocks.end()) { + while (EmptyBlocks.count(dynamic_cast(thelastinst->get()->getOperand(2)))) { thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(2)); thelastinst->get()->replaceOperand( 2, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(2))]); + elseChanged = true; } - basicBlock->removeSuccessor(OldElseBlock); - OldElseBlock->removePredecessor(basicBlock.get()); + + if (elseChanged) { + basicBlock->removeSuccessor(OldElseBlock); + OldElseBlock->removePredecessor(basicBlock.get()); + basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(2))); + dynamic_cast(thelastinst->get()->getOperand(2))->addPredecessor(basicBlock.get()); + changed = true; // 标记IR被修改 + } + // 处理 then 和 else 分支合并的情况 if (dynamic_cast(thelastinst->get()->getOperand(1)) == dynamic_cast(thelastinst->get()->getOperand(2))) { @@ -340,93 +355,94 @@ bool SysYCFGOpt::SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder) { thelastinst = basicBlock->getInstructions().erase(thelastinst); pBuilder->setPosition(basicBlock.get(), basicBlock->end()); pBuilder->createUncondBrInst(thebrBlock, {}); + changed = true; // 标记IR被修改 continue; } - basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(2))); - dynamic_cast(thelastinst->get()->getOperand(2))->addPredecessor(basicBlock.get()); + + // 如果有重定向发生 + // 需要更新后继块的前驱关系 if (thelastBlockOld != nullptr) { - int indexphi = 0; - for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(2))->getPredecessors()) { - if (pred == thelastBlockOld) { - break; - } - indexphi++; - } for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(2))->getInstructions()) { if (InstInNew->isPhi()) { - dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); + // 使用 delBlk 方法删除 oldBlock 对应的传入值 + dynamic_cast(InstInNew.get())->delBlk(thelastBlockOld); } else { break; } - } + } } + } else { + // 如果不是终止指令,但有后继 (例如,末尾没有显式终止指令的块) + // 这段逻辑可能需要更严谨的CFG检查来确保正确性 if (basicBlock->getNumSuccessors() == 1) { - pBuilder->setPosition(basicBlock.get(), basicBlock->end()); - pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); - auto thelastinst = basicBlock->getInstructions().end(); - (--thelastinst); - auto OldBrBlock = dynamic_cast(thelastinst->get()->getOperand(0)); - sysy::BasicBlock *thelastBlockOld = nullptr; - while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(0))) != - EmptyBlocks.end()) { - thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(0)); + // 这里的逻辑似乎是想为没有terminator的块添加一个,但通常这应该在CFG构建阶段完成。 + // 如果这里仍然执行,确保它符合预期。 + // pBuilder->setPosition(basicBlock.get(), basicBlock->end()); + // pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); + // auto thelastinst = basicBlock->getInstructions().end(); + // (--thelastinst); + // auto OldBrBlock = dynamic_cast(thelastinst->get()->getOperand(0)); + // sysy::BasicBlock *thelastBlockOld = nullptr; + // while (EmptyBlocks.find(dynamic_cast(thelastinst->get()->getOperand(0))) != + // EmptyBlocks.end()) { + // thelastBlockOld = dynamic_cast(thelastinst->get()->getOperand(0)); - thelastinst->get()->replaceOperand( - 0, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(0))]); - } + // thelastinst->get()->replaceOperand( + // 0, EmptyBlocks[dynamic_cast(thelastinst->get()->getOperand(0))]); + // } - basicBlock->removeSuccessor(OldBrBlock); - OldBrBlock->removePredecessor(basicBlock.get()); - basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(0))); - dynamic_cast(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get()); - if (thelastBlockOld != nullptr) { - int indexphi = 0; - for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors()) { - if (pred == thelastBlockOld) { - break; - } - indexphi++; - } + // basicBlock->removeSuccessor(OldBrBlock); + // OldBrBlock->removePredecessor(basicBlock.get()); + // basicBlock->addSuccessor(dynamic_cast(thelastinst->get()->getOperand(0))); + // dynamic_cast(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get()); + // changed = true; // 标记IR被修改 + // if (thelastBlockOld != nullptr) { + // int indexphi = 0; + // for (auto &pred : dynamic_cast(thelastinst->get()->getOperand(0))->getPredecessors()) { + // if (pred == thelastBlockOld) { + // break; + // } + // indexphi++; + // } - for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(0))->getInstructions()) { - if (InstInNew->isPhi()) { - dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); - } else { - break; - } - } - } + // for (auto &InstInNew : dynamic_cast(thelastinst->get()->getOperand(0))->getInstructions()) { + // if (InstInNew->isPhi()) { + // dynamic_cast(InstInNew.get())->removeOperand(indexphi + 1); + // } else { + // break; + // } + // } + // } } } } + // 真正的删除空块 for (auto iter = func->getBasicBlocks().begin(); iter != func->getBasicBlocks().end();) { - if (EmptyBlocks.find(iter->get()) != EmptyBlocks.end()) { + if (EmptyBlocks.count(iter->get())) { // EntryBlock跳过 if (iter->get() == func->getEntryBlock()) { ++iter; continue; } - for (auto &iterInst : iter->get()->getInstructions()) - SysYIROptUtils::usedelete(iterInst.get()); + for (auto instIter = iter->get()->getInstructions().begin(); + instIter != iter->get()->getInstructions().end();) { + SysYIROptUtils::usedelete(instIter->get()); // 仅删除 use 关系 + // 显式地从基本块中删除指令并更新迭代器 + instIter = iter->get()->getInstructions().erase(instIter); + } // 删除不可达基本块的phi指令的操作数 for (auto &succ : iter->get()->getSuccessors()) { - int index = 0; - for (auto &pred : succ->getPredecessors()) { - if (pred == iter->get()) { - break; - } - index++; - } - for (auto &instinsucc : succ->getInstructions()) { if (instinsucc->isPhi()) { - dynamic_cast(instinsucc.get())->removeOperand(index); + // iter->get() 就是当前被删除的空基本块,它作为前驱连接到这里的Phi指令 + dynamic_cast(instinsucc.get())->delBlk(iter->get()); } else { + // Phi 指令通常在基本块的开头,如果不是 Phi 指令就停止检查 break; } } @@ -440,34 +456,35 @@ bool SysYCFGOpt::SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder) { } return changed; - } // 如果函数没有返回指令,则添加一个默认返回指令(主要解决void函数没有返回指令的问题) -bool SysYCFGOpt::SysYAddReturn(Function *func, IRBuilder* pBuilder) { +bool SysYCFGOptUtils::SysYAddReturn(Function *func, IRBuilder* pBuilder) { bool changed = false; auto basicBlocks = func->getBasicBlocks(); for (auto &block : basicBlocks) { if (block->getNumSuccessors() == 0) { - changed = true; // 如果基本块没有后继块,则添加一个返回指令 if (block->getNumInstructions() == 0) { pBuilder->setPosition(block.get(), block->end()); pBuilder->createReturnInst(); - } - auto thelastinst = block->getInstructions().end(); - --thelastinst; - if (thelastinst->get()->getKind() != Instruction::kReturn) { - // std::cout << "Warning: Function " << func->getName() << " has no return instruction, adding default return." << std::endl; + changed = true; // 标记IR被修改 + } else { + auto thelastinst = block->getInstructions().end(); + --thelastinst; + if (thelastinst->get()->getKind() != Instruction::kReturn) { + // std::cout << "Warning: Function " << func->getName() << " has no return instruction, adding default return." << std::endl; - pBuilder->setPosition(block.get(), block->end()); - // TODO: 如果int float函数缺少返回值是否需要报错 - if (func->getReturnType()->isInt()) { - pBuilder->createReturnInst(ConstantInteger::get(0)); - } else if (func->getReturnType()->isFloat()) { - pBuilder->createReturnInst(ConstantFloating::get(0.0F)); - } else { - pBuilder->createReturnInst(); + pBuilder->setPosition(block.get(), block->end()); + // TODO: 如果int float函数缺少返回值是否需要报错 + if (func->getReturnType()->isInt()) { + pBuilder->createReturnInst(ConstantInteger::get(0)); + } else if (func->getReturnType()->isFloat()) { + pBuilder->createReturnInst(ConstantFloating::get(0.0F)); + } else { + pBuilder->createReturnInst(); + } + changed = true; // 标记IR被修改 } } } @@ -480,7 +497,7 @@ bool SysYCFGOpt::SysYAddReturn(Function *func, IRBuilder* pBuilder) { // 主要针对已知条件值的分支转换为无条件分支 // 例如 if (cond) { ... } else { ... } 中的 cond 已经 // 确定为 true 或 false 的情况 -bool SysYCFGOpt::SysYCondBr2Br(Function *func, IRBuilder* pBuilder) { +bool SysYCFGOptUtils::SysYCondBr2Br(Function *func, IRBuilder* pBuilder) { bool changed = false; for (auto &basicblock : func->getBasicBlocks()) { @@ -515,45 +532,41 @@ bool SysYCFGOpt::SysYCondBr2Br(Function *func, IRBuilder* pBuilder) { SysYIROptUtils::usedelete(thelast->get()); thelast = basicblock->getInstructions().erase(thelast); if ((constfloat_Use && constfloat == 1.0F) || (constint_Use && constint == 1)) { - + // cond为true或非0 pBuilder->setPosition(basicblock.get(), basicblock->end()); pBuilder->createUncondBrInst(thenBlock, {}); - int phiindex = 0; - for (auto pred : elseBlock->getPredecessors()) { - phiindex++; - if (pred == basicblock.get()) { - break; - } - } - + + // 更新CFG关系 + basicblock->removeSuccessor(elseBlock); + elseBlock->removePredecessor(basicblock.get()); + + // 删除elseBlock的phi指令中对应的basicblock.get()的传入值 for (auto &phiinst : elseBlock->getInstructions()) { if (phiinst->getKind() != Instruction::kPhi) { break; } - phiinst->removeOperand(phiindex); + // 使用 delBlk 方法删除 basicblock.get() 对应的传入值 + dynamic_cast(phiinst.get())->delBlk(basicblock.get()); } - basicblock->removeSuccessor(elseBlock); - elseBlock->removePredecessor(basicblock.get()); - } else { + + } else { // cond为false或0 pBuilder->setPosition(basicblock.get(), basicblock->end()); pBuilder->createUncondBrInst(elseBlock, {}); - int phiindex = 0; - for (auto pred : thenBlock->getPredecessors()) { - phiindex++; - if (pred == basicblock.get()) { - break; - } - } + // 更新CFG关系 + basicblock->removeSuccessor(thenBlock); + thenBlock->removePredecessor(basicblock.get()); + + // 删除thenBlock的phi指令中对应的basicblock.get()的传入值 for (auto &phiinst : thenBlock->getInstructions()) { if (phiinst->getKind() != Instruction::kPhi) { break; } - phiinst->removeOperand(phiindex); + // 使用 delBlk 方法删除 basicblock.get() 对应的传入值 + dynamic_cast(phiinst.get())->delBlk(basicblock.get()); } - basicblock->removeSuccessor(thenBlock); - thenBlock->removePredecessor(basicblock.get()); + } } } @@ -562,4 +575,32 @@ bool SysYCFGOpt::SysYCondBr2Br(Function *func, IRBuilder* pBuilder) { return changed; } -} // namespace sysy +// ====================================================================== +// 独立的CFG优化遍的实现 +// ====================================================================== + +bool SysYDelInstAfterBrPass::runOnFunction(Function *F, AnalysisManager& AM) { + return SysYCFGOptUtils::SysYDelInstAfterBr(F); +} + +bool SysYDelEmptyBlockPass::runOnFunction(Function *F, AnalysisManager& AM) { + return SysYCFGOptUtils::SysYDelEmptyBlock(F, pBuilder); +} + +bool SysYDelNoPreBLockPass::runOnFunction(Function *F, AnalysisManager& AM) { + return SysYCFGOptUtils::SysYDelNoPreBLock(F); +} + +bool SysYBlockMergePass::runOnFunction(Function *F, AnalysisManager& AM) { + return SysYCFGOptUtils::SysYBlockMerge(F); +} + +bool SysYAddReturnPass::runOnFunction(Function *F, AnalysisManager& AM) { + return SysYCFGOptUtils::SysYAddReturn(F, pBuilder); +} + +bool SysYCondBr2BrPass::runOnFunction(Function *F, AnalysisManager& AM) { + return SysYCFGOptUtils::SysYCondBr2Br(F, pBuilder); +} + +} // namespace sysy \ No newline at end of file diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 1122b3e..50c24ea 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -64,7 +64,7 @@ Value* SysYIRGenerator::getGEPAddressInst(Value* basePointer, const std::vector< // 遍历用户提供的索引(不包括我们添加的第一个0),逐步确定 GEP 的最终结果类型 // 每个索引都“深入”一个维度 - for (size_t i = 0; i < indices.size(); ++i) { // 这里遍历的是用户提供的索引 + for (int i = 0; i < indices.size(); ++i) { // 这里遍历的是用户提供的索引 if (finalTargetType && finalTargetType->isArray()) { finalTargetType = finalTargetType->as()->getElementType(); } else { @@ -247,7 +247,7 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { ConstantInteger::get(0)); } else { - for (size_t k = 0; k < counterValues.size(); ++k) { + for (int k = 0; k < counterValues.size(); ++k) { std::vector currentIndices; int tempLinearIndex = k; @@ -368,7 +368,7 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){ BasicBlock* entry = function->getEntryBlock(); builder.setPosition(entry, entry->end()); - for (size_t i = 0; i < paramTypes.size(); ++i) { + for (int i = 0; i < paramTypes.size(); ++i) { AllocaInst* alloca = builder.createAllocaInst(Type::getPointerType(paramTypes[i]), paramDims[i], paramNames[i]); entry->insertArgument(alloca); @@ -440,7 +440,7 @@ std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) { Type* targetElementType = variable->getType()->as()->getBaseType(); // 从基指针指向的类型开始 // 模拟 GEP 路径,根据 dims 确定最终元素的类型 - for (size_t i = 0; i < dims.size(); ++i) { + for (int i = 0; i < dims.size(); ++i) { if (targetElementType && targetElementType->isArray()) { targetElementType = targetElementType->as()->getElementType(); } else { @@ -805,7 +805,7 @@ std::any SysYIRGenerator::visitCall(SysYParser::CallContext *ctx) { } auto params = function->getEntryBlock()->getArguments(); - for (size_t i = 0; i < args.size(); i++) { + for (int i = 0; i < args.size(); i++) { // 参数类型转换 if (params[i]->getType() != args[i]->getType() && (params[i]->getNumDims() != 0 || @@ -891,7 +891,7 @@ std::any SysYIRGenerator::visitFuncRParams(SysYParser::FuncRParamsContext *ctx) std::any SysYIRGenerator::visitMulExp(SysYParser::MulExpContext *ctx) { Value * result = std::any_cast(visitUnaryExp(ctx->unaryExp(0))); - for (size_t i = 1; i < ctx->unaryExp().size(); i++) { + for (int i = 1; i < ctx->unaryExp().size(); i++) { auto opNode = dynamic_cast(ctx->children[2*i-1]); int opType = opNode->getSymbol()->getType(); @@ -967,7 +967,7 @@ std::any SysYIRGenerator::visitMulExp(SysYParser::MulExpContext *ctx) { std::any SysYIRGenerator::visitAddExp(SysYParser::AddExpContext *ctx) { Value* result = std::any_cast(visitMulExp(ctx->mulExp(0))); - for (size_t i = 1; i < ctx->mulExp().size(); i++) { + for (int i = 1; i < ctx->mulExp().size(); i++) { auto opNode = dynamic_cast(ctx->children[2*i-1]); int opType = opNode->getSymbol()->getType(); @@ -1028,7 +1028,7 @@ std::any SysYIRGenerator::visitAddExp(SysYParser::AddExpContext *ctx) { std::any SysYIRGenerator::visitRelExp(SysYParser::RelExpContext *ctx) { Value* result = std::any_cast(visitAddExp(ctx->addExp(0))); - for (size_t i = 1; i < ctx->addExp().size(); i++) { + for (int i = 1; i < ctx->addExp().size(); i++) { auto opNode = dynamic_cast(ctx->children[2*i-1]); int opType = opNode->getSymbol()->getType(); @@ -1100,7 +1100,7 @@ std::any SysYIRGenerator::visitRelExp(SysYParser::RelExpContext *ctx) { std::any SysYIRGenerator::visitEqExp(SysYParser::EqExpContext *ctx) { Value * result = std::any_cast(visitRelExp(ctx->relExp(0))); - for (size_t i = 1; i < ctx->relExp().size(); i++) { + for (int i = 1; i < ctx->relExp().size(); i++) { auto opNode = dynamic_cast(ctx->children[2*i-1]); int opType = opNode->getSymbol()->getType(); @@ -1174,7 +1174,7 @@ std::any SysYIRGenerator::visitLAndExp(SysYParser::LAndExpContext *ctx){ BasicBlock *falseBlock = builder.getFalseBlock(); auto conds = ctx->eqExp(); - for (size_t i = 0; i < conds.size() - 1; i++) { + for (int i = 0; i < conds.size() - 1; i++) { labelstring << "AND.L" << builder.getLabelIndex(); BasicBlock *newtrueBlock = function->addBasicBlock(labelstring.str()); @@ -1205,7 +1205,7 @@ auto SysYIRGenerator::visitLOrExp(SysYParser::LOrExpContext *ctx) -> std::any { Function *function = curBlock->getParent(); auto conds = ctx->lAndExp(); - for (size_t i = 0; i < conds.size() - 1; i++) { + for (int i = 0; i < conds.size() - 1; i++) { labelstring << "OR.L" << builder.getLabelIndex(); BasicBlock *newFalseBlock = function->addBasicBlock(labelstring.str()); labelstring.str(""); @@ -1292,7 +1292,7 @@ void Utils::createExternalFunction( auto entry = function->getEntryBlock(); pBuilder->setPosition(entry, entry->end()); - for (size_t i = 0; i < paramTypes.size(); ++i) { + for (int i = 0; i < paramTypes.size(); ++i) { auto alloca = pBuilder->createAllocaInst( Type::getPointerType(paramTypes[i]), paramDims[i], paramNames[i]); entry->insertArgument(alloca); diff --git a/src/include/Dom.h b/src/include/Dom.h index 1a09f34..f69dcb9 100644 --- a/src/include/Dom.h +++ b/src/include/Dom.h @@ -34,7 +34,7 @@ private: class DominatorTreeAnalysisPass : public AnalysisPass { public: // 唯一的 Pass ID - static char ID; // LLVM 风格的唯一 ID + static void *ID; DominatorTreeAnalysisPass() : AnalysisPass("DominatorTreeAnalysis", Pass::Granularity::Function) {} diff --git a/src/include/Liveness.h b/src/include/Liveness.h index 7cfe01c..f101b7c 100644 --- a/src/include/Liveness.h +++ b/src/include/Liveness.h @@ -49,7 +49,7 @@ private: class LivenessAnalysisPass : public AnalysisPass { public: // 唯一的 Pass ID - static char ID; // LLVM 风格的唯一 ID + static void *ID; // LLVM 风格的唯一 ID LivenessAnalysisPass() : AnalysisPass("LivenessAnalysis", Pass::Granularity::Function) {} diff --git a/src/include/Pass.h b/src/include/Pass.h index 61839fa..b928b91 100644 --- a/src/include/Pass.h +++ b/src/include/Pass.h @@ -7,7 +7,9 @@ #include #include // For std::type_index (although void* ID is more common in LLVM) #include +#include #include "IR.h" +#include "IRBuilder.h" namespace sysy { @@ -117,64 +119,142 @@ private: // ====================================================================== class AnalysisManager { private: - std::map, std::unique_ptr> cachedResults; - // cachedResults 存储分析结果,键是 (Function*, AnalysisPass ID) + Module *pModuleRef; // 指向被分析的Module + + // 缓存不同粒度的分析结果 + std::map> moduleCachedResults; + std::map, std::unique_ptr> functionCachedResults; + std::map, std::unique_ptr> basicBlockCachedResults; + + public: - AnalysisManager() = default; + // 构造函数接收 Module 指针 + AnalysisManager(Module *M) : pModuleRef(M) {} + AnalysisManager() = delete; // 禁止无参构造 + ~AnalysisManager() = default; - // 获取分析结果 + // 获取分析结果的通用模板函数 // T 是 AnalysisResult 的具体类型,E 是 AnalysisPass 的具体类型 - // PassManager 应该在运行 Pass 之前调用 registerAnalysisPass - template T *getAnalysisResult(Function *F) { // 针对函数级别的分析,需要传入 Function* - void *analysisID = E::ID; // 获取分析遍的唯一 ID + // F 和 BB 参数用于提供上下文,根据分析遍的粒度来使用 + template T *getAnalysisResult(Function *F = nullptr, BasicBlock *BB = nullptr) { + void *analysisID = E::ID; // 获取分析遍的唯一 ID - // 检查是否已存在有效结果 - auto it = cachedResults.find({F, analysisID}); - if (it != cachedResults.end()) { - return static_cast(it->second.get()); // 返回缓存结果 - } - - // 如果没有缓存结果,通过 PassRegistry 创建分析遍并运行它 - // 注意:这里需要 PassRegistry 实例。如果 AnalysisManager 独立于 PassManager, - // 则需要传入 PassRegistry 引用或指针。 - // 为了简化,假设 AnalysisManager 能够访问到 PassRegistry + // 尝试从注册表创建分析遍实例 std::unique_ptr basePass = PassRegistry::getPassRegistry().createPass(analysisID); if (!basePass) { // Error: Analysis pass not registered + std::cerr << "Error: Analysis pass with ID " << analysisID << " not registered.\n"; return nullptr; } - AnalysisPass *analysisPass = static_cast(basePass.get()); - // 确保分析遍的粒度与请求的上下文匹配 - if (analysisPass->getGranularity() == Pass::Granularity::Function) { - analysisPass->runOnFunction(F, *this); // 运行分析遍 - // 获取结果并缓存 - std::unique_ptr result = analysisPass->getResult(); - T *specificResult = static_cast(result.get()); - cachedResults[{F, analysisID}] = std::move(result); // 缓存结果 - return specificResult; + // 根据分析遍的粒度处理 + switch (analysisPass->getGranularity()) { + case Pass::Granularity::Module: { + // 检查是否已存在有效结果 + auto it = moduleCachedResults.find(analysisID); + if (it != moduleCachedResults.end()) { + return static_cast(it->second.get()); // 返回缓存结果 + } + // 运行模块级分析遍 + if (!pModuleRef) { + std::cerr << "Error: Module reference not set for AnalysisManager to run Module Pass.\n"; + return nullptr; + } + analysisPass->runOnModule(pModuleRef, *this); + // 获取结果并缓存 + std::unique_ptr result = analysisPass->getResult(); + T *specificResult = static_cast(result.get()); + moduleCachedResults[analysisID] = std::move(result); // 缓存结果 + return specificResult; + } + case Pass::Granularity::Function: { + // 检查请求的上下文是否正确 + if (!F) { + std::cerr << "Error: Function context required for Function-level Analysis Pass.\n"; + return nullptr; + } + // 检查是否已存在有效结果 + auto it = functionCachedResults.find({F, analysisID}); + if (it != functionCachedResults.end()) { + return static_cast(it->second.get()); // 返回缓存结果 + } + // 运行函数级分析遍 + analysisPass->runOnFunction(F, *this); + // 获取结果并缓存 + std::unique_ptr result = analysisPass->getResult(); + T *specificResult = static_cast(result.get()); + functionCachedResults[{F, analysisID}] = std::move(result); // 缓存结果 + return specificResult; + } + case Pass::Granularity::BasicBlock: { + // 检查请求的上下文是否正确 + if (!BB) { + std::cerr << "Error: BasicBlock context required for BasicBlock-level Analysis Pass.\n"; + return nullptr; + } + // 检查是否已存在有效结果 + auto it = basicBlockCachedResults.find({BB, analysisID}); + if (it != basicBlockCachedResults.end()) { + return static_cast(it->second.get()); // 返回缓存结果 + } + // 运行基本块级分析遍 + analysisPass->runOnBasicBlock(BB, *this); + // 获取结果并缓存 + std::unique_ptr result = analysisPass->getResult(); + T *specificResult = static_cast(result.get()); + basicBlockCachedResults[{BB, analysisID}] = std::move(result); // 缓存结果 + return specificResult; + } } - // TODO: 处理 Module 或 BasicBlock 粒度的分析 - - return nullptr; + return nullptr; // 不会到达这里 } - // 使所有或特定分析结果失效 (当 IR 被修改时调用) - void invalidateAllAnalyses() { cachedResults.clear(); } + // 使所有分析结果失效 (当 IR 被修改时调用) + void invalidateAllAnalyses() { + moduleCachedResults.clear(); + functionCachedResults.clear(); + basicBlockCachedResults.clear(); + } // 使特定分析结果失效 - void invalidateAnalysis(void *analysisID, Function *F = nullptr) { - if (F) { - // 使特定函数的特定分析结果失效 - cachedResults.erase({F, analysisID}); + // void *analysisID: 要失效的分析的ID + // Function *F: 如果是函数级分析,指定函数;如果是模块级或基本块级,则为nullptr (取决于调用方式) + // BasicBlock *BB: 如果是基本块级分析,指定基本块;否则为nullptr + void invalidateAnalysis(void *analysisID, Function *F = nullptr, BasicBlock *BB = nullptr) { + if (BB) { + // 使特定基本块的特定分析结果失效 + basicBlockCachedResults.erase({BB, analysisID}); + } else if (F) { + // 使特定函数的特定分析结果失效 (也可能包含聚合的BasicBlock结果) + functionCachedResults.erase({F, analysisID}); + // 遍历所有属于F的基本块,使其BasicBlockCache失效 (如果该分析是BasicBlock粒度的) + // 这需要遍历F的所有基本块,效率较低,更推荐在BasicBlockPass的invalidateAnalysisUsage中精确指定 + // 或者在Function级别的invalidate时,清空该Function的所有BasicBlock分析 + // 这里的实现简单地清空该Function下所有该ID的BasicBlock缓存 + for (auto it = basicBlockCachedResults.begin(); it != basicBlockCachedResults.end(); ) { + // 假设BasicBlock::getParent()方法存在,可以获取所属Function + if (it->first.second == analysisID /* && it->first.first->getParent() == F */) { // 需要BasicBlock能获取其父函数 + it = basicBlockCachedResults.erase(it); + } else { + ++it; + } + } + } else { - // 使所有函数的特定分析结果失效 - // 遍历并删除匹配的元素,避免拷贝 unique_ptr - for (auto it = cachedResults.begin(); it != cachedResults.end(); ) { + // 使所有函数的特定分析结果失效 (Module级和所有Function/BasicBlock级) + moduleCachedResults.erase(analysisID); + for (auto it = functionCachedResults.begin(); it != functionCachedResults.end(); ) { if (it->first.second == analysisID) { - it = cachedResults.erase(it); // erase 返回下一个元素的迭代器 + it = functionCachedResults.erase(it); + } else { + ++it; + } + } + for (auto it = basicBlockCachedResults.begin(); it != basicBlockCachedResults.end(); ) { + if (it->first.second == analysisID) { + it = basicBlockCachedResults.erase(it); } else { ++it; } @@ -191,26 +271,26 @@ private: std::vector> passes; AnalysisManager analysisManager; Module *pmodule; + IRBuilder *pBuilder; public: PassManager() = default; ~PassManager() = default; - PassManager(Module *module) : pmodule(module) , analysisManager() {} + PassManager(Module *module, IRBuilder *builder) : pmodule(module) ,pBuilder(builder), analysisManager(module) {} // 运行所有注册的遍 bool run(); // 运行优化管道主要负责注册和运行优化遍 // 这里可以根据 optLevel 和 DEBUG 控制不同的优化遍 - void runOptimizationPipeline(Module* moduleIR, int optLevel); + void runOptimizationPipeline(Module* moduleIR, IRBuilder* builder, int optLevel); // 添加遍:现在接受 Pass 的 ID,而不是直接的 unique_ptr void addPass(void *passID); AnalysisManager &getAnalysisManager() { return analysisManager; } - }; // ====================================================================== @@ -218,15 +298,18 @@ public: // ====================================================================== // 用于分析遍的注册 -template void registerAnalysisPass() { - PassRegistry::getPassRegistry().registerPass(&AnalysisPassType::ID, - []() { return std::make_unique(); }); -} +template void registerAnalysisPass(); -// 用于优化遍的注册 -template void registerOptimizationPass() { - PassRegistry::getPassRegistry().registerPass(&OptimizationPassType::ID, - []() { return std::make_unique(); }); -} +// (1) 针对需要 IRBuilder 参数的优化遍的重载 +// 这个模板只在 OptimizationPassType 可以通过 IRBuilder* 构造时才有效 +template ::value, int>::type = 0> +void registerOptimizationPass(IRBuilder* builder); + +// (2) 针对不需要 IRBuilder 参数的所有其他优化遍的重载 +// 这个模板只在 OptimizationPassType 不能通过 IRBuilder* 构造时才有效 +template ::value, int>::type = 0> +void registerOptimizationPass(); } // namespace sysy \ No newline at end of file diff --git a/src/include/SysYIRCFGOpt.h b/src/include/SysYIRCFGOpt.h index 6b13a3f..4d025c8 100644 --- a/src/include/SysYIRCFGOpt.h +++ b/src/include/SysYIRCFGOpt.h @@ -2,6 +2,7 @@ #include "IR.h" #include "IRBuilder.h" +#include "Pass.h" namespace sysy { @@ -17,44 +18,79 @@ namespace sysy { // - 合并连续的跳转指令(Jump Threading)在合并不可达块中似乎已经实现了 // - 基本块重排序(Block Reordering),提升局部性 -class SysYCFGOpt { - private: - Module *pModule; - IRBuilder *pBuilder; - - public: - SysYCFGOpt(Module *pMoudle, IRBuilder *pBuilder) : pModule(pMoudle), pBuilder(pBuilder) {} - - void SysYOptimizateAfterIR(){ - - auto &functions = pModule->getFunctions(); - for (auto &function : functions) { - bool changed = false; - while(changed){ - changed = false; - changed |= SysYCondBr2Br(function.second.get(), pBuilder); - // 删除br后面的无用指令 - changed |= SysYDelInstAfterBr(function.second.get()); - // 合并空基本块 - changed |= SysYBlockMerge(function.second.get()); - // 删除无前驱块 - changed |= SysYDelNoPreBLock(function.second.get()); - // 删除空块 - changed |= SysYDelEmptyBlock(function.second.get(), pBuilder); - // 添加return指令 - changed |= SysYAddReturn(function.second.get(), pBuilder); - } - } - } - +// 辅助工具类,包含实际的CFG优化逻辑 +// 这些方法可以被独立的Pass调用 +class SysYCFGOptUtils { public: static bool SysYDelInstAfterBr(Function *func); // 删除br后面的指令 static bool SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder); // 空块删除 static bool SysYDelNoPreBLock(Function *func); // 删除无前驱块(不可达块) - static bool SysYBlockMerge(Function *func); // 合并基本块(主要针对嵌套if while的exit块, - // 也可以修改IR生成实现回填机制 - static bool SysYAddReturn(Function *func, IRBuilder* pBuilder); // 添加return指令(主要针对Void函数) - static bool SysYCondBr2Br(Function *func, IRBuilder* pBuilder); // 条件分支(已知cond的值)转换为无条件分支 + static bool SysYBlockMerge(Function *func); // 合并基本块 + static bool SysYAddReturn(Function *func, IRBuilder* pBuilder); // 添加return指令 + static bool SysYCondBr2Br(Function *func, IRBuilder* pBuilder); // 条件分支转换为无条件分支 }; -} // namespace sysy +// ====================================================================== +// 独立的CFG优化遍 +// ====================================================================== + +class SysYDelInstAfterBrPass : public OptimizationPass { +public: + static void *ID; // 唯一ID + SysYDelInstAfterBrPass() : OptimizationPass("SysYDelInstAfterBrPass", Granularity::Function) {} + bool runOnFunction(Function *F, AnalysisManager& AM) override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override { + // 这个优化可能改变CFG结构,使一些CFG相关的分析失效 + // 可以在这里指定哪些分析会失效,例如支配树、活跃变量等 + // analysisInvalidations.insert(DominatorTreeAnalysisPass::ID); // 示例 + } + void *getPassID() const override { return &ID; } +}; + +class SysYDelEmptyBlockPass : public OptimizationPass { +private: + IRBuilder *pBuilder; +public: + static void *ID; + SysYDelEmptyBlockPass(IRBuilder *builder) : OptimizationPass("SysYDelEmptyBlockPass", Granularity::Function), pBuilder(builder) {} + bool runOnFunction(Function *F, AnalysisManager& AM) override; + void *getPassID() const override { return &ID; } +}; + +class SysYDelNoPreBLockPass : public OptimizationPass { +public: + static void *ID; + SysYDelNoPreBLockPass() : OptimizationPass("SysYDelNoPreBLockPass", Granularity::Function) {} + bool runOnFunction(Function *F, AnalysisManager& AM) override; + void *getPassID() const override { return &ID; } +}; + +class SysYBlockMergePass : public OptimizationPass { +public: + static void *ID; + SysYBlockMergePass() : OptimizationPass("SysYBlockMergePass", Granularity::Function) {} + bool runOnFunction(Function *F, AnalysisManager& AM) override; + void *getPassID() const override { return &ID; } +}; + +class SysYAddReturnPass : public OptimizationPass { +private: + IRBuilder *pBuilder; +public: + static void *ID; + SysYAddReturnPass(IRBuilder *builder) : OptimizationPass("SysYAddReturnPass", Granularity::Function), pBuilder(builder) {} + bool runOnFunction(Function *F, AnalysisManager& AM) override; + void *getPassID() const override { return &ID; } +}; + +class SysYCondBr2BrPass : public OptimizationPass { +private: + IRBuilder *pBuilder; +public: + static void *ID; + SysYCondBr2BrPass(IRBuilder *builder) : OptimizationPass("SysYCondBr2BrPass", Granularity::Function), pBuilder(builder) {} + bool runOnFunction(Function *F, AnalysisManager& AM) override; + void *getPassID() const override { return &ID; } +}; + +} // namespace sysy \ No newline at end of file diff --git a/src/include/SysYIROptUtils.h b/src/include/SysYIROptUtils.h index 66929d1..1b764ec 100644 --- a/src/include/SysYIROptUtils.h +++ b/src/include/SysYIROptUtils.h @@ -10,15 +10,12 @@ namespace sysy { class SysYIROptUtils{ public: - // 删除use关系 - // 根据指令的使用情况删除其所有的use关系 - // 找到指令的所有使用者,并从它们的使用列表中删除该指令 + // 仅仅删除use关系 static void usedelete(Instruction *instr) { for (auto &use : instr->getOperands()) { Value* val = use->getValue(); val->removeUse(use); } - instr->getParent()->removeInst(instr); // 从基本块中删除指令 } // 判断是否是全局变量 diff --git a/src/sysyc.cpp b/src/sysyc.cpp index c22c0a8..1cf6fa7 100644 --- a/src/sysyc.cpp +++ b/src/sysyc.cpp @@ -129,14 +129,19 @@ int main(int argc, char **argv) { DEBUG = 1; // 这里可能需要更精细地控制 DEBUG 的开启时机和范围 } - // 创建 Pass 管理器并运行优化管道 - PassManager passManager(moduleIR); // 创建 Pass 管理器 - passManager.runOptimizationPipeline(moduleIR, optLevel); - if (DEBUG) { cout << "=== Init IR ===\n"; SysYPrinter(moduleIR).printIR(); // 临时打印器用于调试 } + + // 创建 Pass 管理器并运行优化管道 + PassManager passManager(moduleIR, builder); // 创建 Pass 管理器 + // 好像都不用传递module和builder了,因为 PassManager 初始化了 + passManager.runOptimizationPipeline(moduleIR, builder, optLevel); + + + + AddressCalculationExpansion ace(moduleIR, builder); if (ace.run()) { if (DEBUG) cout << "AddressCalculationExpansion made changes.\n"; From 2157cf6aa61619e1bc6b6f445682d745e79e9dba Mon Sep 17 00:00:00 2001 From: Lixuanwang Date: Wed, 23 Jul 2025 19:12:04 +0800 Subject: [PATCH 21/35] =?UTF-8?q?[midend]=E4=BF=AE=E5=A4=8D=E4=BA=86?= =?UTF-8?q?=E4=B8=80=E4=B8=AAPass=E7=9B=B8=E5=85=B3=E7=9A=84=E7=BC=96?= =?UTF-8?q?=E8=AF=91=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Pass.cpp b/src/Pass.cpp index 46c0588..6a6f85b 100644 --- a/src/Pass.cpp +++ b/src/Pass.cpp @@ -163,14 +163,14 @@ template void registerAnalysisPass() { } template ::value, int>::type = 0> + std::is_constructible::value, int>::type> void registerOptimizationPass(IRBuilder* builder) { PassRegistry::getPassRegistry().registerPass(&OptimizationPassType::ID, [builder]() { return std::make_unique(builder); }); } template ::value, int>::type = 0> + !std::is_constructible::value, int>::type> void registerOptimizationPass() { PassRegistry::getPassRegistry().registerPass(&OptimizationPassType::ID, []() { return std::make_unique(); }); From f4d231b989c6bb94f669ef29aadd9f949a431b52 Mon Sep 17 00:00:00 2001 From: Lixuanwang Date: Thu, 24 Jul 2025 00:39:11 +0800 Subject: [PATCH 22/35] =?UTF-8?q?[midend]=E6=B7=BB=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E5=90=8E=E7=AB=AF=E4=B8=AD=E5=AF=B9GEP=E6=8C=87=E4=BB=A4?= =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/RISCv64ISel.cpp | 160 +++++++++++++++++++++++++++++++++++++- src/include/RISCv64ISel.h | 2 + 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/src/RISCv64ISel.cpp b/src/RISCv64ISel.cpp index 2ab47b5..9bd104f 100644 --- a/src/RISCv64ISel.cpp +++ b/src/RISCv64ISel.cpp @@ -10,7 +10,7 @@ namespace sysy { // DAG节点定义 (内部实现) struct RISCv64ISel::DAGNode { - enum NodeKind { CONSTANT, LOAD, STORE, BINARY, CALL, RETURN, BRANCH, ALLOCA_ADDR, UNARY, MEMSET }; + enum NodeKind { CONSTANT, LOAD, STORE, BINARY, CALL, RETURN, BRANCH, ALLOCA_ADDR, UNARY, MEMSET, GET_ELEMENT_PTR}; NodeKind kind; Value* value = nullptr; std::vector operands; @@ -792,6 +792,112 @@ void RISCv64ISel::selectNode(DAGNode* node) { break; } + case DAGNode::GET_ELEMENT_PTR: { + auto gep = dynamic_cast(node->value); + // 获取GEP指令最终要写入的目标虚拟寄存器 + auto result_vreg = getVReg(gep); + + // --- Step 1: 获取基地址 --- + auto base_ptr_node = node->operands[0]; + // 创建一个新的vreg作为地址累加器 + auto current_addr_vreg = getNewVReg(); + + // 判断基指针是局部数组还是全局数组,并生成获取其基地址的指令 + if (auto alloca_base = dynamic_cast(base_ptr_node->value)) { + // 基指针是局部数组(在栈上),使用FRAME_ADDR伪指令获取其地址 + auto frame_addr_instr = std::make_unique(RVOpcodes::FRAME_ADDR); + frame_addr_instr->addOperand(std::make_unique(current_addr_vreg)); + frame_addr_instr->addOperand(std::make_unique(getVReg(alloca_base))); + CurMBB->addInstruction(std::move(frame_addr_instr)); + } else if (auto global_base = dynamic_cast(base_ptr_node->value)) { + // 基指针是全局数组,使用LA伪指令加载其地址 + auto la_instr = std::make_unique(RVOpcodes::LA); + la_instr->addOperand(std::make_unique(current_addr_vreg)); + la_instr->addOperand(std::make_unique(global_base->getName())); + CurMBB->addInstruction(std::move(la_instr)); + } else { + // 如果基指针是另一个计算结果(例如函数参数传递来的数组地址), + // 直接用MV指令将其值赋给地址累加器 + auto base_vreg = getVReg(base_ptr_node->value); + auto mv = std::make_unique(RVOpcodes::MV); + mv->addOperand(std::make_unique(current_addr_vreg)); + mv->addOperand(std::make_unique(base_vreg)); + CurMBB->addInstruction(std::move(mv)); + } + + // --- Step 2: 迭代处理每个索引,累加偏移量 --- + // 获取数组的类型,并剥掉最外层的指针 + Type* current_type = gep->getBasePointer()->getType()->as()->getBaseType(); + + // 遍历所有索引 (DAGNode的操作数从第1个开始是索引) + for (size_t i = 1; i < node->operands.size(); ++i) { + auto index_node = node->operands[i]; + + // [修复] 核心修复逻辑在这里 + // GEP可以索引数组,也可以索引指针(数组退化的结果) + unsigned element_size = 0; + Type* element_type = nullptr; + + if (auto array_type = current_type->as()) { + // 情况一:当前正在索引一个数组类型,例如 a[i] 中的 a + element_type = array_type->getElementType(); + element_size = getTypeSizeInBytes(element_type); + } else if (auto ptr_type = current_type->as()) { + // 情况二:当前正在索引一个指针类型,例如函数参数 p[] 经过退化后的 p + element_type = ptr_type->getBaseType(); + element_size = getTypeSizeInBytes(element_type); + } else { + // 如果既不是数组也不是指针,那么无法进行索引 + assert(false && "GEP can only index into an array or pointer type."); + } + + // 更新current_type,为下一次迭代做准备(处理多维数组) + current_type = element_type; + + // --- 计算偏移量: offset = index * element_size --- + auto offset_vreg = getNewVReg(); + auto index_vreg = getVReg(index_node->value); + + // 如果索引本身是个常量,需要先用LI指令加载到虚拟寄存器中 + if (auto const_index = dynamic_cast(index_node->value)) { + auto li = std::make_unique(RVOpcodes::LI); + li->addOperand(std::make_unique(index_vreg)); + li->addOperand(std::make_unique(const_index->getInt())); + CurMBB->addInstruction(std::move(li)); + } + + // 将元素大小加载到临时寄存器 + auto size_vreg = getNewVReg(); + auto li_size = std::make_unique(RVOpcodes::LI); + li_size->addOperand(std::make_unique(size_vreg)); + li_size->addOperand(std::make_unique(element_size)); + CurMBB->addInstruction(std::move(li_size)); + + // 执行乘法: offset_vreg = index_vreg * size_vreg + // SysY中数组索引计算用32位乘法足够 + auto mul = std::make_unique(RVOpcodes::MULW); + mul->addOperand(std::make_unique(offset_vreg)); + mul->addOperand(std::make_unique(index_vreg)); + mul->addOperand(std::make_unique(size_vreg)); + CurMBB->addInstruction(std::move(mul)); + + // 累加地址: current_addr_vreg = current_addr_vreg + offset_vreg + // 指针地址是64位的,用ADD指令 + auto add = std::make_unique(RVOpcodes::ADD); + add->addOperand(std::make_unique(current_addr_vreg)); + add->addOperand(std::make_unique(current_addr_vreg)); + add->addOperand(std::make_unique(offset_vreg)); + CurMBB->addInstruction(std::move(add)); + } + + // --- Step 3: 将最终地址存入GEP的目标vreg --- + auto final_mv = std::make_unique(RVOpcodes::MV); + final_mv->addOperand(std::make_unique(result_vreg)); + final_mv->addOperand(std::make_unique(current_addr_vreg)); + CurMBB->addInstruction(std::move(final_mv)); + break; + } + default: throw std::runtime_error("Unsupported DAGNode kind in ISel"); } @@ -850,6 +956,21 @@ std::vector> RISCv64ISel::build_dag(BasicB std::cout << " -> Operand " << i << " has kind: " << memset_node->operands[i]->kind << std::endl; } } + } else if (auto gep = dynamic_cast(inst)) { + // 如果这个GEP指令已经创建过节点,则跳过 + if(value_to_node.count(gep)) continue; + + // 创建一个新的 GET_ELEMENT_PTR 类型的节点 + auto gep_node = create_node(DAGNode::GET_ELEMENT_PTR, gep, value_to_node, nodes_storage); + + // 第一个操作数是基指针(即数组本身) + gep_node->operands.push_back(get_operand_node(gep->getBasePointer(), value_to_node, nodes_storage)); + + // 依次添加所有索引作为后续的操作数 + for (auto index : gep->getIndices()) { + // [修复] 从 Use 对象中获取真正的 Value* + gep_node->operands.push_back(get_operand_node(index->getValue(), value_to_node, nodes_storage)); + } } else if (auto load = dynamic_cast(inst)) { auto load_node = create_node(DAGNode::LOAD, load, value_to_node, nodes_storage); load_node->operands.push_back(get_operand_node(load->getPointer(), value_to_node, nodes_storage)); @@ -892,6 +1013,43 @@ std::vector> RISCv64ISel::build_dag(BasicB return nodes_storage; } +/** + * @brief 计算一个类型在内存中占用的字节数。 + * @param type 需要计算大小的IR类型。 + * @return 该类型占用的字节数。 + */ +unsigned RISCv64ISel::getTypeSizeInBytes(Type* type) { + if (!type) { + assert(false && "Cannot get size of a null type."); + return 0; + } + + switch (type->getKind()) { + // 对于SysY语言,基本类型int和float都占用4字节 + case Type::kInt: + case Type::kFloat: + return 4; + + // 指针类型在RISC-V 64位架构下占用8字节 + // 虽然SysY没有'int*'语法,但数组变量在IR层面本身就是指针类型 + case Type::kPointer: + return 8; + + // 数组类型的总大小 = 元素数量 * 单个元素的大小 + case Type::kArray: { + auto arrayType = type->as(); + // 递归调用以计算元素大小 + return arrayType->getNumElements() * getTypeSizeInBytes(arrayType->getElementType()); + } + + // 其他类型,如Void, Label等不占用栈空间,或者不应该出现在这里 + default: + // 如果遇到未处理的类型,触发断言,方便调试 + assert(false && "Unsupported type for size calculation."); + return 0; + } +} + // [新] 打印DAG图以供调试的辅助函数 void RISCv64ISel::print_dag(const std::vector>& dag, const std::string& bb_name) { // 检查是否有DEBUG宏或者全局变量,避免在非调试模式下打印 diff --git a/src/include/RISCv64ISel.h b/src/include/RISCv64ISel.h index 0bb977a..472edfe 100644 --- a/src/include/RISCv64ISel.h +++ b/src/include/RISCv64ISel.h @@ -33,6 +33,8 @@ private: std::vector> build_dag(BasicBlock* bb); DAGNode* get_operand_node(Value* val_ir, std::map&, std::vector>&); DAGNode* create_node(int kind, Value* val, std::map&, std::vector>&); + // 用于计算类型大小的辅助函数 + unsigned getTypeSizeInBytes(Type* type); void print_dag(const std::vector>& dag, const std::string& bb_name); From 4b9d067c12fabded580c2be7d9767c561a347c9c Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 24 Jul 2025 14:36:33 +0800 Subject: [PATCH 23/35] =?UTF-8?q?[midend]=E9=83=A8=E5=88=86DCE=E9=81=8D?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0=EF=BC=88=E4=BC=A0=E6=92=AD=E6=B4=BB=E8=B7=83?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=EF=BC=89=EF=BC=8C=E4=BF=AE=E6=94=B9=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E9=81=8D=E8=BF=90=E8=A1=8C=E9=80=BB=E8=BE=91=EF=BC=8C?= =?UTF-8?q?TODO=EF=BC=9A=E5=AE=8C=E5=96=84=E4=BC=98=E5=8C=96=E9=81=8D?= =?UTF-8?q?=E7=9A=84getAnalysisUsage=EF=BC=8C=E5=88=A0=E9=99=A4=E6=97=A0?= =?UTF-8?q?=E7=94=A8=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Pass_ID_List.md | 5 +- src/CFGOptPass.cpp | 706 ------------------------------------- src/CMakeLists.txt | 2 +- src/DCE.cpp | 91 +++++ src/Pass.cpp | 48 ++- src/SysYIRCFGOpt.cpp | 12 +- src/include/CFGOptPass.h | 40 --- src/include/DCE.h | 46 +++ src/include/Pass.h | 1 + src/include/SysYIRCFGOpt.h | 5 + 10 files changed, 175 insertions(+), 781 deletions(-) delete mode 100644 src/CFGOptPass.cpp create mode 100644 src/DCE.cpp delete mode 100644 src/include/CFGOptPass.h create mode 100644 src/include/DCE.h diff --git a/Pass_ID_List.md b/Pass_ID_List.md index 79de64c..14f3379 100644 --- a/Pass_ID_List.md +++ b/Pass_ID_List.md @@ -1,5 +1,6 @@ -# 记录中端遍的唯一标识ID +# 记录中端遍的开发进度 | 名称 | 优化级别 | 开发进度 | | ------------ | ------------ | ---------- | -| CFG优化 | 函数级 | 已完成 | \ No newline at end of file +| CFG优化 | 函数级 | 已完成 | +| DCE | 函数级 | 待测试 | \ No newline at end of file diff --git a/src/CFGOptPass.cpp b/src/CFGOptPass.cpp deleted file mode 100644 index 224a553..0000000 --- a/src/CFGOptPass.cpp +++ /dev/null @@ -1,706 +0,0 @@ -#include "CFGOptPass.h" // 包含新的 CFG 优化遍的头文件 -#include "Dom.h" // CFG修改会使支配树失效,包含头文件 -#include "IR.h" -#include "IRBuilder.h" -#include "Liveness.h" // CFG修改会使活跃变量分析失效,包含头文件 -#include "SysYIROptUtils.h" // 包含您提供的 SysYIROptUtils -#include -#include -#include -#include -#include -#include // For SysYDelNoPreBLock -#include - -namespace sysy { - -char CFGOptimizationPass::ID = 0; // 初始化静态 ID - -// 声明分析依赖和失效 -void CFGOptimizationPass::getAnalysisUsage(std::set &analysisDependencies, - std::set &analysisInvalidations) const { - // CFG 优化会改变控制流图,因此会使大部分数据流分析失效。 - // 特别是支配树和活跃变量分析。 - analysisInvalidations.insert(&DominatorTreeAnalysisPass::ID); - analysisInvalidations.insert(&LivenessAnalysisPass::ID); - // TODO: 如果有其他分析(如数据流分析)也可能失效,需要在此处添加 -} - -// ====================================================================== -// 静态 CFG 优化辅助函数的实现 -// 大部分代码直接从您提供的 SysYIRCFGOpt.cpp 复制过来 -// 并根据新的 PhiInst 定义调整了 Phi 节点处理逻辑 -// ====================================================================== - -bool CFGOptimizationPass::SysYDelInstAfterBr(Function *func) { - bool changed = false; - // 使用迭代器安全的遍历,因为可能会删除指令 - for (auto &basicBlock : func->getBasicBlocks()) { - if (!basicBlock) - continue; // 确保基本块有效 - - bool terminatorFound = false; - auto terminatorIter = basicBlock->getInstructions().end(); // 迭代器指向终止指令 - - // 查找终止指令并标记其后的指令进行删除 - for (auto iter = basicBlock->getInstructions().begin(); iter != basicBlock->getInstructions().end(); ++iter) { - if (terminatorFound) { - // 如果已经找到终止指令,则当前指令是无用指令删除指令 - SysYIROptUtils::usedelete(iter->get()); - } else if ((*iter)->isTerminator()) { - terminatorFound = true; - terminatorIter = iter; - } - } - - // 删除终止指令后的所有指令 - if (terminatorFound) { - auto currentIter = std::next(terminatorIter); // 从终止指令的下一个开始删除 - while (currentIter != basicBlock->getInstructions().end()) { - changed = true; - currentIter = basicBlock->getInstructions().erase(currentIter); - } - } - - // 更新前驱后继关系:由于可能删除了旧的终止指令并改变了控制流 - // 最好是先清除旧的关系,然后根据最新的终止指令重新建立关系 - if (terminatorFound) { - Instruction *currentTerminator = - basicBlock->getInstructions().empty() ? nullptr : basicBlock->getInstructions().back().get(); - if (!currentTerminator || !currentTerminator->isTerminator()) { - // 这是一种错误情况,块应该总是以终止指令结束 - // 或者说,如果删除了唯一的终止指令,那么块就没有后继了,需要后续优化来修复 - // 暂时跳过更新,让其他优化(如 SysYAddReturn)来处理 - continue; - } - - // 清除旧的后继关系 - // 注意:这里需要复制一份后继列表,因为在循环中修改原列表会使迭代器失效 - std::vector oldSuccessors(basicBlock->getSuccessors().begin(), basicBlock->getSuccessors().end()); - for (BasicBlock *succ : oldSuccessors) { - if (succ) { - succ->removePredecessor(basicBlock.get()); - basicBlock->removeSuccessor(succ); - } - } - - // 根据最新的终止指令重新建立新的后继关系 - if (currentTerminator->isUnconditional()) { - BasicBlock *branchBlock = dynamic_cast(currentTerminator->getOperand(0)); - if (branchBlock) { - basicBlock->addSuccessor(branchBlock); - branchBlock->addPredecessor(basicBlock.get()); - } - } else if (currentTerminator->isConditional()) { - BasicBlock *thenBlock = dynamic_cast(currentTerminator->getOperand(1)); - BasicBlock *elseBlock = dynamic_cast(currentTerminator->getOperand(2)); - if (thenBlock) { - basicBlock->addSuccessor(thenBlock); - thenBlock->addPredecessor(basicBlock.get()); - } - if (elseBlock && thenBlock != elseBlock) { // 避免重复添加相同后继 - basicBlock->addSuccessor(elseBlock); - elseBlock->addPredecessor(basicBlock.get()); - } - } - } - } - return changed; -} - -bool CFGOptimizationPass::SysYBlockMerge(Function *func) { - bool changed = false; - - // 使用迭代器安全的循环来遍历和删除 - for (auto blockiter = func->getBasicBlocks().begin(); blockiter != func->getBasicBlocks().end();) { - BasicBlock *currentBlock = blockiter->get(); - if (!currentBlock) { // 防止空指针 - ++blockiter; - continue; - } - - // 入口块不能被合并到前一个块(它没有前一个块),但可以作为目标块被合并 - if (currentBlock == func->getEntryBlock() && currentBlock->getNumPredecessors() == 0) { - ++blockiter; - continue; - } - - // 如果当前块只有一个后继块 - if (currentBlock->getNumSuccessors() == 1) { - BasicBlock *nextBlock = currentBlock->getSuccessors()[0]; - if (!nextBlock) { // 后继块无效 - ++blockiter; - continue; - } - - // 且后继块只有一个前驱块(这是合并的条件之一) - if (nextBlock->getNumPredecessors() == 1 && nextBlock->getPredecessors()[0] == currentBlock) { - // std::cout << "merge block: " << currentBlock->getName() << " with " << nextBlock->getName() << std::endl; - - // 删除 currentBlock 最后的 br 指令 - if (!currentBlock->getInstructions().empty()) { - Instruction *lastInst = currentBlock->getInstructions().back().get(); - if (lastInst->isTerminator()) { - SysYIROptUtils::usedelete(lastInst); - // 从指令列表中移除 - currentBlock->getInstructions().pop_back(); - } - } - - // 处理 Phi 指令: - // 如果 nextBlock 包含 Phi 指令,需要将这些 Phi 指令的操作数进行处理 - // 因为 nextBlock 的唯一前驱是 currentBlock,这些 Phi 指令在合并后变得多余。 - // 它们的值可以直接替换为来自 currentBlock 的值。 - // 然后删除这些 Phi 指令。 - auto nextBlockInstIter = nextBlock->getInstructions().begin(); - while (nextBlockInstIter != nextBlock->getInstructions().end()) { - if ((*nextBlockInstIter)->isPhi()) { - PhiInst *phi = dynamic_cast(nextBlockInstIter->get()); - if (phi) { - // 找到 Phi 对应 currentBlock 的传入值 - Value *incomingVal = phi->getvalfromBlk(currentBlock); - if (incomingVal) { - phi->replaceAllUsesWith(incomingVal); // 替换所有使用 - SysYIROptUtils::usedelete(phi); // 删除 phi 指令 - nextBlockInstIter = nextBlock->getInstructions().erase(nextBlockInstIter); - changed = true; - continue; // 继续检查下一个指令 - } - } - } else { - break; // Phi 指令总是在基本块的开头 - } - ++nextBlockInstIter; - } - - // 将 nextBlock 的指令移动到 currentBlock - for (auto institer = nextBlock->begin(); institer != nextBlock->end();) { - institer->get()->setParent(currentBlock); - currentBlock->getInstructions().emplace_back(institer->release()); // 移动 unique_ptr - institer = nextBlock->getInstructions().erase(institer); - } - - // 合并参数 (如果 nextBlock 有 Arguments) - for (auto &argm : nextBlock->getArguments()) { - argm->setParent(currentBlock); // 更新父指针 - currentBlock->insertArgument(argm); // 将参数插入到 currentBlock - } - nextBlock->getArguments().clear(); // 清空 nextBlock 的参数列表 - - // 更新前驱后继关系 - // 清理 nextBlock 与 currentBlock 之间的关系 - currentBlock->removeSuccessor(nextBlock); - nextBlock->removePredecessor(currentBlock); - - // 将 nextBlock 的所有后继转移到 currentBlock - std::vector nextBlockSuccessors(nextBlock->getSuccessors().begin(), - nextBlock->getSuccessors().end()); - for (BasicBlock *succ : nextBlockSuccessors) { - if (succ) { - currentBlock->addSuccessor(succ); - succ->replacePredecessor(nextBlock, currentBlock); // 更新后继块的前驱 - nextBlock->removeSuccessor(succ); // 从 nextBlock 移除,避免重复处理 - } - } - - // 从函数中删除 nextBlock - func->removeBasicBlock(nextBlock); - changed = true; - // 保持 blockiter 不变,以便在下一次循环中重新检查当前的 currentBlock - // 因为它的新后继可能现在又满足合并条件了 - } else { - ++blockiter; // 不满足合并条件,移动到下一个块 - } - } else { - ++blockiter; // 不满足合并条件,移动到下一个块 - } - } - return changed; -} - -bool CFGOptimizationPass::SysYDelNoPreBLock(Function *func) { - bool changed = false; - - // 标记所有块为不可达 - for (auto &block_ptr : func->getBasicBlocks()) { - if (block_ptr) - block_ptr->setreachableFalse(); - } - - // 从入口块开始进行可达性分析 (BFS) - BasicBlock *entryBlock = func->getEntryBlock(); - if (!entryBlock) - return false; // 没有入口块,则无需处理 - - entryBlock->setreachableTrue(); - std::queue blockqueue; - blockqueue.push(entryBlock); - while (!blockqueue.empty()) { - BasicBlock *block = blockqueue.front(); - blockqueue.pop(); - if (block) { - for (auto &succ : block->getSuccessors()) { - if (succ && !succ->getreachable()) { - succ->setreachableTrue(); - blockqueue.push(succ); - } - } - } - } - - // 遍历所有块,删除不可达块 - - for (auto blockIter = func->getBasicBlocks_NoRange().begin(); blockIter != func->getBasicBlocks_NoRange().end();) { - BasicBlock *currentBlock = blockIter->get(); - if (!currentBlock) { - // 如果当前块是空指针,直接跳过 - blockIter = func->getBasicBlocks_NoRange().erase(blockIter); - changed = true; - continue; - } - - if (!currentBlock->getreachable()) { - // 入口块不可删除 - if (currentBlock == func->getEntryBlock()) { - ++blockIter; - continue; - } - - // 删除不可达基本块内的所有指令 - // 由于 usedelete 会从父块中移除指令,这里直接遍历并调用即可 - auto instsToProcess = currentBlock->getInstructions(); // 复制一份,避免迭代器失效 - for (auto &iterInst_ptr : instsToProcess) { - if (iterInst_ptr) - SysYIROptUtils::usedelete(iterInst_ptr.get()); - } - - // 处理 Phi 指令:移除指向该不可达块的 Phi 操作数 - // 遍历所有后继块的 Phi 指令,移除与 currentBlock 相关的传入值 - std::vector successorsCopy(currentBlock->getSuccessors().begin(), - currentBlock->getSuccessors().end()); - for (BasicBlock *succblock : successorsCopy) { - if (!succblock) - continue; - // 遍历后继块的指令,只处理 Phi 指令(它们在块的开头) - for (auto &phiinst_ptr : succblock->getInstructions()) { - if (phiinst_ptr->getKind() != Instruction::kPhi) { - break; // Phi 指令都在块的开头 - } - PhiInst *phi = dynamic_cast(phiinst_ptr.get()); - if (phi) { - // 使用 PhiInst 的 delBlk 方法来移除与当前被删除块相关的传入值 - phi->delBlk(currentBlock); - } - } - // 更新后继块的前驱列表 (非常重要,因为 currentBlock 要被删除了) - succblock->removePredecessor(currentBlock); - } - // 清空 currentBlock 的后继,因为它将不复存在 - currentBlock->clearPredecessors(); // 清空前驱列表 - currentBlock->clearSuccessors(); // 清空后继列表 - - // 从函数中删除基本块 - blockIter = func->getBasicBlocks_NoRange().erase(blockIter); - changed = true; - } else { - ++blockIter; - } - } - return changed; -} - -bool CFGOptimizationPass::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) { - bool changed = false; - - // 收集所有“空”基本块(没有实际指令,或只有Phi和UncondBr)及其目标 - // map: 空块 -> 其唯一后继 (如果存在) - std::map EmptyBlocksMap; - - // 第一次遍历:识别空块及其跳转目标 - for (auto &basicBlock_ptr : func->getBasicBlocks()) { - BasicBlock *basicBlock = basicBlock_ptr.get(); - if (!basicBlock) - continue; - - // 判断是否是空块:没有指令或者只有 Phi 和一个终止指令 - bool isEmptyCandidate = true; - Instruction *terminatorInst = nullptr; - - if (basicBlock->getNumInstructions() == 0) { - isEmptyCandidate = true; // 完全空块 - } else { - // 检查除了最后一个指令之外是不是只有phi指令 - for(auto &inst_ptr : basicBlock->getInstructions()) { - Instruction *inst = inst_ptr.get(); - if (!inst->isPhi() && !inst->isTerminator()) { - isEmptyCandidate = false; // 有其他类型的指令 - break; - } - } - // for (size_t i = 0; i < basicBlock->getNumInstructions(); ++i) { - // Instruction *inst = basicBlock->getInstructions()[i].get(); - // if (inst->isTerminator()) { - // terminatorInst = inst; - // // 如果终止指令不是最后一个,那这个块有问题 - // if (i != basicBlock->getNumInstructions() - 1) { - // isEmptyCandidate = false; - // break; - // } - // } else if (!inst->isPhi()) { // 除了 phi 和终止指令,还有其他指令 - // isEmptyCandidate = false; - // break; - // } - // } - } - - if (isEmptyCandidate) { - if (terminatorInst && terminatorInst->isUnconditional()) { - if (basicBlock->getNumSuccessors() == 1) { // 只有一条无条件跳转 - EmptyBlocksMap[basicBlock] = dynamic_cast(terminatorInst->getOperand(0)); - } - } else if (!terminatorInst && basicBlock->getNumSuccessors() == 1) { - // 可能是完全空块,但没有终止指令,只有一个后继(需要IRBuilder补全) - // 这种情况下,它也构成空块链的一部分 - EmptyBlocksMap[basicBlock] = basicBlock->getSuccessors().front(); - } - // 如果是条件分支,不认为是“空块链”的中间节点 - } - } - - // 第二次遍历:更新前驱的跳转目标,跳过空块链 - for (auto &basicBlock_ptr : func->getBasicBlocks()) { - BasicBlock *basicBlock = basicBlock_ptr.get(); - if (!basicBlock) - continue; - - // EntryBlock 不参与空块链的删除,但可以重定向其内部跳转 - if (basicBlock == func->getEntryBlock() && EmptyBlocksMap.count(basicBlock)) { - // 如果入口块本身是空块,处理其跳转目标 - Instruction *lastInst = - basicBlock->getInstructions().empty() ? nullptr : basicBlock->getInstructions().back().get(); - if (lastInst && lastInst->isUnconditional()) { - BasicBlock *oldTargetBlock = dynamic_cast(lastInst->getOperand(0)); - BasicBlock *currentTargetBlock = oldTargetBlock; - while (EmptyBlocksMap.count(currentTargetBlock)) { - currentTargetBlock = EmptyBlocksMap[currentTargetBlock]; - } - if (currentTargetBlock != oldTargetBlock) { - changed = true; - // 更新前驱后继关系 - basicBlock->removeSuccessor(oldTargetBlock); - oldTargetBlock->removePredecessor(basicBlock); - - lastInst->replaceOperand(0, currentTargetBlock); - basicBlock->addSuccessor(currentTargetBlock); - currentTargetBlock->addPredecessor(basicBlock); - - // 处理 Phi 指令:将被跳过的空块替换为 currentBlock - for (auto &InstInNew_ptr : currentTargetBlock->getInstructions()) { - if (InstInNew_ptr->isPhi()) { - PhiInst *phi = dynamic_cast(InstInNew_ptr.get()); - if (phi) { - // 使用 replaceold2new 替换 phi 传入的基本块 - phi->replaceold2new(oldTargetBlock, basicBlock); - } - } else { - break; - } - } - } - } - continue; - } - - // 确保块有终止指令,如果没有,添加一个(防止后续处理崩溃) - // 这种情况通常发生在IR生成时没有为完全空的块插入跳转,或者前面优化删除了终止指令 - if (basicBlock->getNumInstructions() == 0 || !basicBlock->getInstructions().back()->isTerminator()) { - if (basicBlock->getNumSuccessors() == 1) { - pBuilder->setPosition(basicBlock, basicBlock->end()); - pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {}); - changed = true; // 添加了指令,所以有变化 - } - } - - auto lastInst = basicBlock->getInstructions().end(); - if (lastInst == basicBlock->getInstructions().begin()) { // 块是空的 - continue; - } - --lastInst; // 指向最后一个指令 - - if ((*lastInst)->isUnconditional()) { - BasicBlock *oldTargetBlock = dynamic_cast((*lastInst)->getOperand(0)); - BasicBlock *currentTargetBlock = oldTargetBlock; - - // 沿空块链查找最终目标 - while (EmptyBlocksMap.count(currentTargetBlock) && currentTargetBlock != func->getEntryBlock()) { - // 防止无限循环或将EntryBlock也视为空块 - currentTargetBlock = EmptyBlocksMap[currentTargetBlock]; - } - - if (currentTargetBlock != oldTargetBlock) { // 如果目标改变了 - changed = true; - // 更新前驱后继关系 - basicBlock->removeSuccessor(oldTargetBlock); - oldTargetBlock->removePredecessor(basicBlock); - - (*lastInst)->replaceOperand(0, currentTargetBlock); - basicBlock->addSuccessor(currentTargetBlock); - currentTargetBlock->addPredecessor(basicBlock); - - // 更新 Phi 指令:将被跳过的空块替换为 currentBlock - for (auto &InstInNew_ptr : currentTargetBlock->getInstructions()) { - if (InstInNew_ptr->isPhi()) { - PhiInst *phi = dynamic_cast(InstInNew_ptr.get()); - if (phi) { - // 使用 replaceold2new 替换 phi 传入的基本块 - phi->replaceold2new(oldTargetBlock, basicBlock); - } - } else { - break; - } - } - } - } else if ((*lastInst)->isConditional()) { - BasicBlock *oldThenBlock = dynamic_cast((*lastInst)->getOperand(1)); - BasicBlock *oldElseBlock = dynamic_cast((*lastInst)->getOperand(2)); - - BasicBlock *currentThenBlock = oldThenBlock; - BasicBlock *currentElseBlock = oldElseBlock; - - // 沿空块链查找最终目标 - while (EmptyBlocksMap.count(currentThenBlock) && currentThenBlock != func->getEntryBlock()) { - currentThenBlock = EmptyBlocksMap[currentThenBlock]; - } - while (EmptyBlocksMap.count(currentElseBlock) && currentElseBlock != func->getEntryBlock()) { - currentElseBlock = EmptyBlocksMap[currentElseBlock]; - } - - bool thenChanged = (currentThenBlock != oldThenBlock); - bool elseChanged = (currentElseBlock != oldElseBlock); - - if (thenChanged || elseChanged) { - changed = true; - // 更新前驱后继关系和 Phi 指令 - if (thenChanged) { - basicBlock->removeSuccessor(oldThenBlock); - oldThenBlock->removePredecessor(basicBlock); - (*lastInst)->replaceOperand(1, currentThenBlock); - basicBlock->addSuccessor(currentThenBlock); - currentThenBlock->addPredecessor(basicBlock); - - for (auto &InstInNew_ptr : currentThenBlock->getInstructions()) { - if (InstInNew_ptr->isPhi()) { - PhiInst *phi = dynamic_cast(InstInNew_ptr.get()); - if (phi) - phi->replaceold2new(oldThenBlock, basicBlock); - } else { - break; - } - } - } - if (elseChanged) { - basicBlock->removeSuccessor(oldElseBlock); - oldElseBlock->removePredecessor(basicBlock); - (*lastInst)->replaceOperand(2, currentElseBlock); - basicBlock->addSuccessor(currentElseBlock); - currentElseBlock->addPredecessor(basicBlock); - - for (auto &InstInNew_ptr : currentElseBlock->getInstructions()) { - if (InstInNew_ptr->isPhi()) { - PhiInst *phi = dynamic_cast(InstInNew_ptr.get()); - if (phi) - phi->replaceold2new(oldElseBlock, basicBlock); - } else { - break; - } - } - } - - // 处理 then 和 else 分支合并的情况 - if (currentThenBlock == currentElseBlock) { - SysYIROptUtils::usedelete(lastInst->get()); - basicBlock->getInstructions().erase(lastInst); - pBuilder->setPosition(basicBlock, basicBlock->end()); - pBuilder->createUncondBrInst(currentThenBlock, {}); - changed = true; - } - } - } - } - - // 第三次遍历:删除所有识别出来的空块 - for (auto iter = func->getBasicBlocks_NoRange().begin(); iter != func->getBasicBlocks_NoRange().end();) { - BasicBlock *currentBlock = iter->get(); - if (!currentBlock) { - iter = func->getBasicBlocks_NoRange().erase(iter); - changed = true; - continue; - } - - if (EmptyBlocksMap.count(currentBlock)) { - // EntryBlock 不能被删除 - if (currentBlock == func->getEntryBlock()) { - ++iter; - continue; - } - - // 删除空块内的所有指令 - auto instsToProcess = currentBlock->getInstructions(); // 复制一份 - for (auto &iterInst_ptr : instsToProcess) { - if (iterInst_ptr) - SysYIROptUtils::usedelete(iterInst_ptr.get()); - } - - // 更新其后继的前驱关系(如果之前没有完全清除,但由于 replaceold2new 已经处理了大部分) - // 这里主要为了确保被删除块的所有后继都移除了它作为前驱 - std::vector succsCopy(currentBlock->getSuccessors().begin(), currentBlock->getSuccessors().end()); - for (BasicBlock *succ : succsCopy) { - if (succ) - succ->removePredecessor(currentBlock); - } - // 清空其自身的前驱和后继 - currentBlock->getPredecessors().clear(); - currentBlock->getSuccessors().clear(); - - // 从函数中删除基本块 - iter = func->getBasicBlocks_NoRange().erase(iter); // erase 会返回下一个有效迭代器 - changed = true; - } else { - ++iter; - } - } - return changed; -} - -bool CFGOptimizationPass::SysYAddReturn(Function *func, IRBuilder *pBuilder) { - bool changed = false; - // 使用新的迭代器方式遍历 - for (auto &block_ptr : func->getBasicBlocks()) { - BasicBlock *block = block_ptr.get(); - if (!block) - continue; // 确保基本块有效 - - // 如果基本块没有后继(即是出口块) - if (block->getNumSuccessors() == 0) { - // 检查最后一个指令是否是返回指令 - if (block->getNumInstructions() == 0 || !block->getInstructions().back()->isReturn()) { - changed = true; - pBuilder->setPosition(block, block->end()); - if (func->getReturnType()->isInt()) { - pBuilder->createReturnInst(ConstantInteger::get(0)); - } else if (func->getReturnType()->isFloat()) { - pBuilder->createReturnInst(ConstantFloating::get(0.0F)); - } else { // Void 类型 - pBuilder->createReturnInst(); - } - } - } - } - return changed; -} - -bool CFGOptimizationPass::SysYCondBr2Br(Function *func, IRBuilder *pBuilder) { - bool changed = false; - - for (auto &basicblock_ptr : func->getBasicBlocks()) { - BasicBlock *basicblock = basicblock_ptr.get(); - if (!basicblock || basicblock->getNumInstructions() == 0) - continue; - - auto lastInstIter = basicblock->getInstructions().end(); - --lastInstIter; // 指向最后一个指令 - - if ((*lastInstIter)->isConditional()) { - Value *condOperand = (*lastInstIter)->getOperand(0); - ConstantValue *constOperand = dynamic_cast(condOperand); - - if (constOperand != nullptr) { // 条件操作数是常量 - changed = true; - - BasicBlock *thenBlock = dynamic_cast((*lastInstIter)->getOperand(1)); - BasicBlock *elseBlock = dynamic_cast((*lastInstIter)->getOperand(2)); - - // 删除旧的条件分支指令 - SysYIROptUtils::usedelete(lastInstIter->get()); - basicblock->getInstructions().erase(lastInstIter); - - BasicBlock *targetBlock = nullptr; - BasicBlock *prunedBlock = nullptr; // 被剪枝的路径的块 - - bool isTrue = false; - if (constOperand->isFloat()) { - isTrue = (constOperand->getFloat() != 0.0F); - } else { // 整数 - isTrue = (constOperand->getInt() != 0); - } - - if (isTrue) { - targetBlock = thenBlock; - prunedBlock = elseBlock; - } else { - targetBlock = elseBlock; - prunedBlock = thenBlock; - } - - // 创建无条件跳转指令 - pBuilder->setPosition(basicblock, basicblock->end()); - pBuilder->createUncondBrInst(targetBlock, {}); - - // 更新前驱后继关系 - // 移除被剪枝的路径 - if (prunedBlock && basicblock->hasSuccessor(prunedBlock)) { - basicblock->removeSuccessor(prunedBlock); - prunedBlock->removePredecessor(basicblock); - - // 移除被剪枝路径上的 Phi 指令操作数 - for (auto &phiinst_ptr : prunedBlock->getInstructions()) { - if (phiinst_ptr->getKind() != Instruction::kPhi) { - break; - } - PhiInst *phi = dynamic_cast(phiinst_ptr.get()); - if (phi) { - // 使用 PhiInst 的 delBlk 方法来移除与当前 basicblock 相关的传入值 - phi->delBlk(basicblock); - } - } - } - } - } - } - return changed; -} - -// ====================================================================== -// CFGOptimizationPass::runOnFunction 实现 -// ====================================================================== - -bool CFGOptimizationPass::runOnFunction(Function *F, AnalysisManager &AM) { - bool changed = false; - if (!F) - return false; - - // 创建一个临时的 IRBuilder 实例,用于在当前函数内创建指令 - IRBuilder builder(nullptr); - // 迭代进行 CFG 优化,直到不再发生变化 - bool funcChangedThisIteration = true; - while (funcChangedThisIteration) { - funcChangedThisIteration = false; // 每次循环开始时重置为 false - - // 这里的顺序很重要,某些优化依赖于其他优化(例如删除无前驱块) - // 并且某些优化可能会为其他优化创造机会,所以需要循环直到稳定 - funcChangedThisIteration |= SysYCondBr2Br(F, &builder); // 条件分支转换为无条件分支 - funcChangedThisIteration |= SysYDelInstAfterBr(F); // 删除 br 后的无用指令 - funcChangedThisIteration |= SysYDelEmptyBlock(F, &builder); // 删除空块(可能涉及跳转目标更新) - funcChangedThisIteration |= SysYDelNoPreBLock(F); // 删除无前驱块(不可达块) - funcChangedThisIteration |= SysYBlockMerge(F); // 合并基本块 - funcChangedThisIteration |= SysYAddReturn(F, &builder); // 添加返回指令 - - // 如果本轮有任何变化,则继续下一次循环 - changed = changed || funcChangedThisIteration; - } - - // 如果函数有任何变化,返回 true - return changed; -} - -} // namespace sysy \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d82e1e3..01f54d8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,7 +25,7 @@ add_executable(sysyc Pass.cpp Dom.cpp Liveness.cpp - # DeadCodeElimination.cpp + DCE.cpp AddressCalculationExpansion.cpp # Mem2Reg.cpp # Reg2Mem.cpp diff --git a/src/DCE.cpp b/src/DCE.cpp new file mode 100644 index 0000000..e0e9519 --- /dev/null +++ b/src/DCE.cpp @@ -0,0 +1,91 @@ +#include "DCE.h" +#include "IR.h" +#include "SysYIROptUtils.h" +#include +#include + +namespace sysy { + +// DCE 遍的静态 ID +void *DCE::ID = (void *)&DCE::ID; + +// DCE 遍的 runOnFunction 方法实现 +bool DCE::runOnFunction(Function *func, AnalysisManager &AM) { + alive_insts.clear(); + bool changed = false; + + auto basicBlocks = func->getBasicBlocks(); + + for (auto &basicBlock : basicBlocks) { + // 确保基本块有效 + if (!basicBlock) + continue; + for (auto &inst : basicBlock->getInstructions()) { + // 确保指令有效 + if (!inst) + continue; + if (isAlive(inst.get())) { + addAlive(inst.get()); + } + } + } + + // 第二遍:删除所有未被标记为活跃的指令。 + for (auto &basicBlock : basicBlocks) { + if (!basicBlock) + continue; + // 使用传统的迭代器循环,并手动管理迭代器, + // 以便在删除元素后正确前进。 + for (auto instIter = basicBlock->getInstructions().begin(); instIter != basicBlock->getInstructions().end();) { + auto &inst = *instIter; + Instruction *currentInst = inst.get(); + // 如果指令不在活跃集合中,则删除它。 + // 分支和返回指令由 isAlive 处理,并会被保留。 + if (alive_insts.count(currentInst) == 0) { + // 删除指令 + changed = true; // 标记 IR 已被修改 + SysYIROptUtils::usedelete(currentInst); + instIter = basicBlock->getInstructions().erase(instIter); // 删除后返回下一个迭代器 + } else { + ++instIter; // 指令活跃,移动到下一个 + } + } + } + + return changed; +} + +// 判断指令是否是“天然活跃”的实现 +// 只有具有副作用的指令(如存储、函数调用、原子操作) +// 和控制流指令(如分支、返回)是天然活跃的。 +bool DCE::isAlive(Instruction *inst) { + // TODO: 后续程序并发考虑原子操作 + // 其结果不被其他指令使用的指令(例如 StoreInst, BranchInst, ReturnInst)。 + // dynamic_cast(inst) 检查是否是函数调用指令, + // 函数调用通常有副作用。 + // 终止指令 (BranchInst, ReturnInst) 必须是活跃的,因为它控制了程序的执行流程。 + bool isBranchOrReturn = inst->isBranch() || inst->isReturn(); + bool isCall = inst->isCall(); + bool isStoreOrMemset = inst->isStore() && inst->isMemset(); + return isBranchOrReturn || isCall || isStoreOrMemset; +} + +// 递归地将活跃指令及其依赖加入到 alive_insts 集合中 +void DCE::addAlive(Instruction *inst) { + // 如果指令已经存在于活跃集合中,则无需重复处理 + if (alive_insts.count(inst) > 0) { + return; + } + // 将当前指令标记为活跃 + alive_insts.insert(inst); + // 遍历当前指令的所有操作数 + for (auto operand : inst->getOperands()) { + // 如果操作数是一个指令(即它是一个值的定义), + // 并且它还没有被标记为活跃 + if (auto opInst = dynamic_cast(operand->getValue())) { + addAlive(opInst); // 递归地将操作数指令标记为活跃 + } + } +} + +} // namespace sysy diff --git a/src/Pass.cpp b/src/Pass.cpp index 6a6f85b..bba8e36 100644 --- a/src/Pass.cpp +++ b/src/Pass.cpp @@ -2,6 +2,7 @@ #include "Liveness.h" #include "SysYIRCFGOpt.h" #include "SysYIRPrinter.h" +#include "DCE.h" #include "Pass.h" #include #include @@ -44,39 +45,30 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR registerOptimizationPass(builderIR); if (optLevel >= 1) { - if (DEBUG) std::cout << "Applying -O1 optimizations.\n"; + //经过设计安排优化遍的执行顺序以及执行逻辑 + if (DEBUG) std::cout << "Applying -O1 optimizations.\n"; + if (DEBUG) std::cout << "--- Running custom optimization sequence ---\n"; - // 只添加优化遍的 ID - this->addPass(&SysYDelInstAfterBrPass::ID); - this->addPass(&SysYDelNoPreBLockPass::ID); - this->addPass(&SysYBlockMergePass::ID); - this->addPass(&SysYDelEmptyBlockPass::ID); - this->addPass(&SysYCondBr2BrPass::ID); - this->addPass(&SysYAddReturnPass::ID); + this->clearPasses(); + this->addPass(&SysYDelInstAfterBrPass::ID); + this->addPass(&SysYDelNoPreBLockPass::ID); + this->addPass(&SysYBlockMergePass::ID); + this->addPass(&SysYDelEmptyBlockPass::ID); + this->addPass(&SysYCondBr2BrPass::ID); + this->addPass(&SysYAddReturnPass::ID); + this->run(); + + this->clearPasses(); + this->addPass(&DCE::ID); + this->run(); + + if (DEBUG) std::cout << "--- Custom optimization sequence finished ---\n"; } // 2. 创建遍管理器 // 3. 根据优化级别添加不同的优化遍 // TODO : 根据 optLevel 添加不同的优化遍 // 讨论 是不动点迭代进行优化遍还是手动客制化优化遍的顺序? - if (optLevel >= 1) { - if (DEBUG) std::cout << "Applying -O1 optimizations.\n"; - - // 4. 循环执行遍,直到 IR 稳定 (不再有任何遍修改 IR) - bool changed_in_iteration = true; - int iteration_count = 0; - while(changed_in_iteration) { - iteration_count++; - if (DEBUG) std::cout << "Optimization iteration: " << iteration_count << std::endl; - changed_in_iteration = run(); // 运行一次所有添加到 PassManager 的遍 - if (DEBUG && changed_in_iteration) { - std::cout << "=== IR after iteration " << iteration_count << " ===\n"; - SysYPrinter printer_iter(moduleIR); - printer_iter.printIR(); - } - } - if (DEBUG) std::cout << "Optimizations stabilized after " << iteration_count << " iterations.\n"; - } if (DEBUG) { @@ -86,6 +78,10 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR } } +void PassManager::clearPasses() { + passes.clear(); +} + void PassManager::addPass(void *passID) { PassRegistry ®istry = PassRegistry::getPassRegistry(); diff --git a/src/SysYIRCFGOpt.cpp b/src/SysYIRCFGOpt.cpp index 63d67d4..1a6c3a1 100644 --- a/src/SysYIRCFGOpt.cpp +++ b/src/SysYIRCFGOpt.cpp @@ -167,14 +167,14 @@ bool SysYCFGOptUtils::SysYDelNoPreBLock(Function *func) { // 删除不可达基本块指令 for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end(); blockIter++) { - if (!blockIter->get()->getreachable()) { - for (auto instIter = blockIter->get()->getInstructions().begin(); - instIter != blockIter->get()->getInstructions().end();) { - SysYIROptUtils::usedelete(instIter->get()); - instIter = blockIter->get()->getInstructions().erase(instIter); + if (!blockIter->get()->getreachable()) { + for (auto instIter = blockIter->get()->getInstructions().begin(); + instIter != blockIter->get()->getInstructions().end();) { + SysYIROptUtils::usedelete(instIter->get()); + instIter = blockIter->get()->getInstructions().erase(instIter); + } } } -} for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();) { diff --git a/src/include/CFGOptPass.h b/src/include/CFGOptPass.h deleted file mode 100644 index d4c4f8e..0000000 --- a/src/include/CFGOptPass.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include "Pass.h" // 包含 Pass 框架 -#include "IR.h" // 包含 IR 定义 -#include "IRBuilder.h" // 包含 IRBuilder - -namespace sysy { - -// 前向声明 IRBuilder (如果在其他地方定义,确保路径正确) -// class IRBuilder; // 如果IRBuilder不在IRBuilder.h中定义,需要前向声明 - -// CFG 优化遍 -class CFGOptimizationPass : public OptimizationPass { -public: - // 唯一的 Pass ID - static char ID; - - CFGOptimizationPass() : OptimizationPass("CFGOptimization", Pass::Granularity::Function) {} - - // 实现 getPassID - void* getPassID() const override { return &ID; } - - // 声明分析依赖和失效 - void getAnalysisUsage(std::set& analysisDependencies, std::set& analysisInvalidations) const override; - - // 运行优化,现在接受 AnalysisManager& AM 参数 - bool runOnFunction(Function* F, AnalysisManager& AM) override; - -private: - // 将原 SysYCFGOpt 中的静态方法移入或直接使用 - // 这些方法可以直接声明为静态成员函数,并在 runOnFunction 中调用 - static bool SysYDelInstAfterBr(Function *func); - static bool SysYDelEmptyBlock(Function *func, IRBuilder* pBuilder); - static bool SysYDelNoPreBLock(Function *func); - static bool SysYBlockMerge(Function *func); - static bool SysYAddReturn(Function *func, IRBuilder* pBuilder); - static bool SysYCondBr2Br(Function *func, IRBuilder* pBuilder); -}; - -} // namespace sysy \ No newline at end of file diff --git a/src/include/DCE.h b/src/include/DCE.h new file mode 100644 index 0000000..8be40bd --- /dev/null +++ b/src/include/DCE.h @@ -0,0 +1,46 @@ +#pragma once + +#include "IR.h" // 包含IR相关的定义,如Instruction, Function, BasicBlock等 +#include "IRBuilder.h" // 包含IR构建器的定义 +#include "SysYIROptUtils.h" // 包含SysY IR优化工具类的 +#include "Liveness.h" +#include "Dom.h" // 包含支配树的定义 +#include "Pass.h" // 包含Pass的基类定义 +#include // 用于存储活跃指令 + +namespace sysy { + +// DCE 优化遍类,继承自 OptimizationPass +class DCE : public OptimizationPass { + private: + std::unordered_set alive_insts; + // 判断指令是否是“天然活跃”的(即总是保留的) + // inst: 要检查的指令 + // 返回值: 如果指令是天然活跃的,则为true,否则为false + bool isAlive(Instruction *inst); + // 递归地将活跃指令及其依赖加入到 alive_insts 集合中 + // inst: 要标记为活跃的指令 + void addAlive(Instruction *inst); +public: + static void *ID; + DCE() : OptimizationPass("DCE", Granularity::Function) {} + bool runOnFunction(Function *func, AnalysisManager &AM) override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override{ + // DCE不依赖特定的分析结果,它通过遍历和副作用判断来工作。 + + // DCE会删除指令,这会影响许多分析结果。 + // 至少,它会影响活跃性分析、支配树、控制流图(如果删除导致基本块为空并被合并)。 + // 假设存在LivenessAnalysisPass和DominatorTreeAnalysisPass + // analysisInvalidations.insert(&LivenessAnalysisPass::ID); + // analysisInvalidations.insert(&DominatorTreeAnalysisPass::ID); + // 任何改变IR结构的优化,都可能导致通用分析(如活跃性、支配树、循环信息)失效。 + // 最保守的做法是使所有函数粒度的分析失效,或者只声明你明确知道会受影响的分析。 + // 考虑到这个DCE仅删除指令,如果它不删除基本块,CFG可能不变,但数据流分析会失效。 + // 对于更激进的DCE(如ADCE),CFG也会改变。 + // 这里我们假设它主要影响数据流分析,并且可能间接影响CFG相关分析。 + // 如果有SideEffectInfo,它也可能被修改,但通常SideEffectInfo是静态的,不因DCE而变。 + } + void *getPassID() const override { return &ID; } +}; + +} // namespace sysy \ No newline at end of file diff --git a/src/include/Pass.h b/src/include/Pass.h index b928b91..d387e9e 100644 --- a/src/include/Pass.h +++ b/src/include/Pass.h @@ -291,6 +291,7 @@ public: AnalysisManager &getAnalysisManager() { return analysisManager; } + void clearPasses(); }; // ====================================================================== diff --git a/src/include/SysYIRCFGOpt.h b/src/include/SysYIRCFGOpt.h index 4d025c8..f388138 100644 --- a/src/include/SysYIRCFGOpt.h +++ b/src/include/SysYIRCFGOpt.h @@ -54,6 +54,7 @@ public: static void *ID; SysYDelEmptyBlockPass(IRBuilder *builder) : OptimizationPass("SysYDelEmptyBlockPass", Granularity::Function), pBuilder(builder) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; void *getPassID() const override { return &ID; } }; @@ -62,6 +63,7 @@ public: static void *ID; SysYDelNoPreBLockPass() : OptimizationPass("SysYDelNoPreBLockPass", Granularity::Function) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; void *getPassID() const override { return &ID; } }; @@ -70,6 +72,7 @@ public: static void *ID; SysYBlockMergePass() : OptimizationPass("SysYBlockMergePass", Granularity::Function) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; void *getPassID() const override { return &ID; } }; @@ -80,6 +83,7 @@ public: static void *ID; SysYAddReturnPass(IRBuilder *builder) : OptimizationPass("SysYAddReturnPass", Granularity::Function), pBuilder(builder) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; void *getPassID() const override { return &ID; } }; @@ -90,6 +94,7 @@ public: static void *ID; SysYCondBr2BrPass(IRBuilder *builder) : OptimizationPass("SysYCondBr2BrPass", Granularity::Function), pBuilder(builder) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; void *getPassID() const override { return &ID; } }; From 2556ab7315691492f70ae509f0aba0a39f041e64 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 24 Jul 2025 15:04:29 +0800 Subject: [PATCH 24/35] =?UTF-8?q?[midend]=E4=BF=AE=E5=A4=8Dgetanalysisusag?= =?UTF-8?q?ee=E7=BC=BA=E5=A4=B1=E5=AE=9A=E4=B9=89=EF=BC=8C=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E6=95=B0=E7=BB=84=E5=88=9D=E5=A7=8B=E5=8C=96=E9=94=99?= =?UTF-8?q?=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRGenerator.cpp | 38 ++++++++++++++++++++++++-------------- src/include/SysYIRCFGOpt.h | 10 +++++----- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 50c24ea..ba25144 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -205,7 +205,7 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { builder.createStoreInst(values.getValue(0), alloca); } else { // 数组变量初始化 const std::vector &counterValues = values.getValues(); - + const std::vector &counterNumbers = values.getNumbers(); int numElements = 1; std::vector dimSizes; for (Value *dimVal : dims) { @@ -247,23 +247,33 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { ConstantInteger::get(0)); } else { + + int linearIndexOffset = 0; // 用于追踪当前处理的线性索引的偏移量 for (int k = 0; k < counterValues.size(); ++k) { - std::vector currentIndices; - int tempLinearIndex = k; + // 当前 Value 的值和重复次数 + Value* currentValue = counterValues[k]; + unsigned currentRepeatNum = counterNumbers[k]; - // 将线性索引转换为多维索引 - for (int dimIdx = dimSizes.size() - 1; dimIdx >= 0; --dimIdx) - { - currentIndices.insert(currentIndices.begin(), - ConstantInteger::get(static_cast(tempLinearIndex % dimSizes[dimIdx]))); - tempLinearIndex /= dimSizes[dimIdx]; - } + for (unsigned i = 0; i < currentRepeatNum; ++i) { + std::vector currentIndices; + int tempLinearIndex = linearIndexOffset + i; // 使用偏移量和当前重复次数内的索引 - // 计算元素的地址 - Value* elementAddress = getGEPAddressInst(alloca, currentIndices); - // 生成 store 指令 (假设 createStoreInst 接受 Value* value, Value* pointer) - builder.createStoreInst(counterValues[k], elementAddress); + // 将线性索引转换为多维索引 + for (int dimIdx = dimSizes.size() - 1; dimIdx >= 0; --dimIdx) { + currentIndices.insert(currentIndices.begin(), + ConstantInteger::get(static_cast(tempLinearIndex % dimSizes[dimIdx]))); + tempLinearIndex /= dimSizes[dimIdx]; + } + + // 计算元素的地址 + Value* elementAddress = getGEPAddressInst(alloca, currentIndices); + // 生成 store 指令 + builder.createStoreInst(currentValue, elementAddress); + } + // 更新线性索引偏移量,以便下一次迭代从正确的位置开始 + linearIndexOffset += currentRepeatNum; } + } } } diff --git a/src/include/SysYIRCFGOpt.h b/src/include/SysYIRCFGOpt.h index f388138..a7ba08b 100644 --- a/src/include/SysYIRCFGOpt.h +++ b/src/include/SysYIRCFGOpt.h @@ -54,7 +54,7 @@ public: static void *ID; SysYDelEmptyBlockPass(IRBuilder *builder) : OptimizationPass("SysYDelEmptyBlockPass", Granularity::Function), pBuilder(builder) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; - void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override {}; void *getPassID() const override { return &ID; } }; @@ -63,7 +63,7 @@ public: static void *ID; SysYDelNoPreBLockPass() : OptimizationPass("SysYDelNoPreBLockPass", Granularity::Function) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; - void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override {}; void *getPassID() const override { return &ID; } }; @@ -72,7 +72,7 @@ public: static void *ID; SysYBlockMergePass() : OptimizationPass("SysYBlockMergePass", Granularity::Function) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; - void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override {}; void *getPassID() const override { return &ID; } }; @@ -83,7 +83,7 @@ public: static void *ID; SysYAddReturnPass(IRBuilder *builder) : OptimizationPass("SysYAddReturnPass", Granularity::Function), pBuilder(builder) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; - void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override {}; void *getPassID() const override { return &ID; } }; @@ -94,7 +94,7 @@ public: static void *ID; SysYCondBr2BrPass(IRBuilder *builder) : OptimizationPass("SysYCondBr2BrPass", Granularity::Function), pBuilder(builder) {} bool runOnFunction(Function *F, AnalysisManager& AM) override; - void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override {}; void *getPassID() const override { return &ID; } }; From c68b031c01d4b08ed5e962ea1b04a23fc730dd5f Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 24 Jul 2025 15:22:38 +0800 Subject: [PATCH 25/35] =?UTF-8?q?[midend]=E4=BF=AE=E5=A4=8D=E5=85=A8?= =?UTF-8?q?=E5=B1=80=E6=95=B0=E7=BB=84=E7=B1=BB=E5=9E=8B=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRGenerator.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index ba25144..eb21a1a 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -146,7 +146,11 @@ std::any SysYIRGenerator::visitGlobalVarDecl(SysYParser::GlobalVarDeclContext *c delete root; } // 创建全局变量,并更新符号表 - module->createGlobalValue(name, Type::getPointerType(type), dims, values); + Type* variableType = type; + if (!dims.empty()) { // 如果有维度,说明是数组 + variableType = buildArrayType(type, dims); // 构建完整的 ArrayType + } + module->createGlobalValue(name, Type::getPointerType(variableType), dims, values); } return std::any(); } @@ -193,7 +197,7 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { // 对于数组,alloca 的类型将是指针指向数组类型,例如 `int[2][3]*` // 对于标量,alloca 的类型将是指针指向标量类型,例如 `int*` AllocaInst* alloca = - builder.createAllocaInst(Type::getPointerType(type), dims, name); + builder.createAllocaInst(Type::getPointerType(variableType), dims, name); if (varDef->initVal() != nullptr) { ValueCounter values; @@ -1232,6 +1236,7 @@ auto SysYIRGenerator::visitLOrExp(SysYParser::LOrExpContext *ctx) -> std::any { return std::any(); } +// attention : 这里的type是数组元素的type void Utils::tree2Array(Type *type, ArrayValueTree *root, const std::vector &dims, unsigned numDims, ValueCounter &result, IRBuilder *builder) { From 9c56bc131065f2eb73d7121d44b755ebd752a030 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 24 Jul 2025 17:02:29 +0800 Subject: [PATCH 26/35] =?UTF-8?q?[midend]=E4=BF=AE=E6=94=B9GEP=E6=8C=87?= =?UTF-8?q?=E4=BB=A4=E5=AE=9A=E4=B9=89=EF=BC=8C=E6=9B=B4=E9=9D=A0=E8=BF=91?= =?UTF-8?q?llvm=20ir=E8=AE=BE=E8=AE=A1=EF=BC=8C=E5=A2=9E=E5=8A=A0=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E6=8E=A8=E6=96=AD=E7=B1=BB=E5=9E=8B=E5=87=BD=E6=95=B0?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8Dgenerator=E4=B8=AD=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E7=94=9F=E6=88=90ir=E7=9A=84=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRGenerator.cpp | 69 +++++++---------------------------------- src/include/IR.h | 14 ++++----- src/include/IRBuilder.h | 56 ++++++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 69 deletions(-) diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index eb21a1a..eaa7ad0 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -42,44 +42,15 @@ Type* SysYIRGenerator::buildArrayType(Type* baseType, const std::vector& Value* SysYIRGenerator::getGEPAddressInst(Value* basePointer, const std::vector& indices) { // 检查 basePointer 是否为指针类型 - if (!basePointer->getType()->isPointer()) { - assert(false && "GEP base pointer must be a pointer type!"); - } - - // 获取基指针所指向的实际类型 (例如 int* 指向 int, int[2][3]* 指向 int[2][3]) - Type* currentElementType = basePointer->getType()->as()->getBaseType(); + assert(basePointer->getType()->isPointer()); + // GEP 的第一个索引通常是0,用于“穿过”指针本身,访问其指向的对象。 + // 例如,对于全局数组 @arr,其类型为 [6 x i32]*,第一个0索引是必需的。 std::vector actualGEPIndices; - // GEP 指令的第一个索引通常是0,用于“跳过”基指针指向的聚合类型本身,直接指向其第一个元素。 - // 例如,对于 AllocaInst 返回的 `int[2][3]*`,第一个 `0` 索引表示从数组的开始而不是指针本身开始索引。 - actualGEPIndices.push_back(ConstantInteger::get(0)); - - // 将用户提供的索引添加到 GEP 操作数中 - for (Value* index : indices) { - actualGEPIndices.push_back(index); - } - - // 根据索引链计算最终的元素类型 - Type* finalTargetType = currentElementType; - - // 遍历用户提供的索引(不包括我们添加的第一个0),逐步确定 GEP 的最终结果类型 - // 每个索引都“深入”一个维度 - for (int i = 0; i < indices.size(); ++i) { // 这里遍历的是用户提供的索引 - if (finalTargetType && finalTargetType->isArray()) { - finalTargetType = finalTargetType->as()->getElementType(); - } else { - // 如果索引链还在继续,但当前类型已经不是数组或聚合类型,这通常是一个错误 - // 或者表示访问的是标量,后续索引无效。此时,finalTargetType 已经是最终的标量类型,不能再深入。 - // 例如,对 int arr[5]; 访问 arr[i][j] (j 是多余的),这里会停止类型推断。 - break; - } - } - - // GEP 的结果总是指针类型,指向最终计算出的元素 - Type* gepResultType = Type::getPointerType(finalTargetType); - - // 创建 GEP 指令。假设 builder.createGetElementPtrInst 的签名为 - // (Type* resultType, Value* basePointer, const std::vector& indices) + actualGEPIndices.push_back(ConstantInteger::get(0)); // 模拟 ConstantInteger::get(0) + actualGEPIndices.insert(actualGEPIndices.end(), indices.begin(), indices.end()); + + // 直接调用 builder 的方法,无需再关心类型推断的细节 return builder.createGetElementPtrInst(basePointer, actualGEPIndices); } /* @@ -441,35 +412,19 @@ std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) { auto variable = module->getVariable(name); // 获取 AllocaInst 或 GlobalValue Value* value = std::any_cast(visitExp(ctx->exp())); // 右值 - if (variable == nullptr) { - throw std::runtime_error("Variable " + name + " not found in assignment."); - } + Type* targetElementType = variable->getType(); // 从基指针指向的类型开始 - // 计算最终赋值目标元素的类型 - // variable 本身应该是一个指针类型 (例如 int* 或 int[2][3]*) - if (!variable->getType()->isPointer()) { - assert(false && "Variable to be assigned must be a pointer type!"); - return std::any(); - } - Type* targetElementType = variable->getType()->as()->getBaseType(); // 从基指针指向的类型开始 - - // 模拟 GEP 路径,根据 dims 确定最终元素的类型 - for (int i = 0; i < dims.size(); ++i) { - if (targetElementType && targetElementType->isArray()) { - targetElementType = targetElementType->as()->getElementType(); - } else { - break; // 如果不是数组类型但还有索引,或者索引超出维度,则停止推断 - } - } + //根据 dims 确定最终元素的类型 + targetElementType = builder.getIndexedType(targetElementType, dims); // 左值右值类型不同处理:根据最终元素类型进行转换 if (targetElementType != value->getType()) { ConstantValue * constValue = dynamic_cast(value); if (constValue != nullptr) { if (targetElementType == Type::getFloatType()) { - value = ConstantFloating::get(static_cast(constValue->getInt())); + value = ConstantFloating::get(static_cast(constValue->getFloat())); } else { // 假设如果不是浮点型,就是整型 - value = ConstantInteger::get(static_cast(constValue->getFloat())); + value = ConstantInteger::get(static_cast(constValue->getInt())); } } else { if (targetElementType == Type::getFloatType()) { diff --git a/src/include/IR.h b/src/include/IR.h index 2abe3e1..1e64582 100644 --- a/src/include/IR.h +++ b/src/include/IR.h @@ -1124,24 +1124,24 @@ public: class GetElementPtrInst : public Instruction { - friend class IRBuilder; // 如果您有IRBuilder来创建指令,需要friend + friend class IRBuilder; protected: // GEP的构造函数: // resultType: GEP计算出的地址的类型 (通常是指向目标元素类型的指针) // basePointer: 基指针 (第一个操作数) // indices: 索引列表 (后续操作数) - GetElementPtrInst(Value *basePointer, - const std::vector &indices = {}, - BasicBlock *parent = nullptr, const std::string &name = "") - : Instruction(Kind::kGetElementPtr, basePointer->getType(), parent, name) { + GetElementPtrInst(Type *resultType, + Value *basePointer, + const std::vector &indices = {}, + BasicBlock *parent = nullptr, const std::string &name = "") + : Instruction(Kind::kGetElementPtr, resultType, parent, name) { assert(basePointer && "GEP base pointer cannot be null!"); // TODO : 安全检查 assert(basePointer->getType()->isPointer() ); addOperand(basePointer); // 第一个操作数是基指针 addOperands(indices); // 随后的操作数是索引 } - public: Value* getBasePointer() const { return getOperand(0); } unsigned getNumIndices() const { return getNumOperands() - 1; } @@ -1155,7 +1155,7 @@ public: static GetElementPtrInst* create(Type *resultType, Value *basePointer, const std::vector &indices = {}, BasicBlock *parent = nullptr, const std::string &name = "") { - return new GetElementPtrInst(basePointer, indices, parent, name); + return new GetElementPtrInst(resultType, basePointer, indices, parent, name); } }; diff --git a/src/include/IRBuilder.h b/src/include/IRBuilder.h index 03df66a..c97dec5 100644 --- a/src/include/IRBuilder.h +++ b/src/include/IRBuilder.h @@ -294,15 +294,41 @@ class IRBuilder { return inst; } ///< 创建store指令 PhiInst * createPhiInst(Type *type, const std::vector &vals = {}, const std::vector &blks = {}, const std::string &name = "") { - auto predNum = block->getNumPredecessors(); auto inst = new PhiInst(type, vals, blks, block, name); assert(inst); block->getInstructions().emplace(block->begin(), inst); return inst; } ///< 创建Phi指令 - GetElementPtrInst* createGetElementPtrInst(Value *basePointer, - const std::vector &indices = {}, - const std::string &name = "") { + // GetElementPtrInst* createGetElementPtrInst(Value *basePointer, + // const std::vector &indices = {}, + // const std::string &name = "") { + // std::string newName; + // if (name.empty()) { + // std::stringstream ss; + // ss << tmpIndex; + // newName = ss.str(); + // tmpIndex++; + // } else { + // newName = name; + // } + + // auto inst = new GetElementPtrInst(basePointer, indices, block, newName); + // assert(inst); + // block->getInstructions().emplace(position, inst); + // return inst; + // } + /** + * @brief 根据 LLVM 设计模式创建 GEP 指令。 + * 它会自动推断返回类型,无需手动指定。 + */ + GetElementPtrInst *createGetElementPtrInst(Value *basePointer, const std::vector &indices, + const std::string &name = "") { + Type *ResultElementType = getIndexedType(basePointer->getType(), indices); + if (!ResultElementType) { + assert(false && "Invalid GEP indexing!"); + return nullptr; + } + Type *ResultType = PointerType::get(ResultElementType); std::string newName; if (name.empty()) { std::stringstream ss; @@ -313,11 +339,31 @@ class IRBuilder { newName = name; } - auto inst = new GetElementPtrInst(basePointer, indices, block, newName); + auto inst = new GetElementPtrInst(ResultType, basePointer, indices, block, newName); assert(inst); block->getInstructions().emplace(position, inst); return inst; } + + static Type *getIndexedType(Type *pointerType, const std::vector &indices) { + assert(pointerType->isPointer() && "base must be a pointer type!"); + Type *CurrentType = pointerType; + // 遍历所有索引来深入类型层次结构Sysy只支持数组 + for (int i = 0; i < indices.size(); ++i) { + if(i == 0) { + // 第一个索引是指针类型的元素类型 + CurrentType = pointerType->as()->getBaseType(); + } else + if (CurrentType->isArray()) { + CurrentType = CurrentType->as()->getElementType(); + } + else { + // 如果类型不是聚合类型但仍有索引,说明索引过多,这是错误的 + CurrentType = nullptr; + } + } + return CurrentType; + } }; } // namespace sysy From 18dc8dbfee211e89246f5ae324e09917436878ac Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 24 Jul 2025 17:05:56 +0800 Subject: [PATCH 27/35] =?UTF-8?q?[midend]=E4=BF=AE=E6=94=B9=E6=B3=A8?= =?UTF-8?q?=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRGenerator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index eaa7ad0..32a07aa 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -44,10 +44,10 @@ Value* SysYIRGenerator::getGEPAddressInst(Value* basePointer, const std::vector< // 检查 basePointer 是否为指针类型 assert(basePointer->getType()->isPointer()); - // GEP 的第一个索引通常是0,用于“穿过”指针本身,访问其指向的对象。 - // 例如,对于全局数组 @arr,其类型为 [6 x i32]*,第一个0索引是必需的。 + // GEP 的第一个索引通常是0,用于“步过”指针本身,访问其指向的对象。 + // 例如,对于全局数组 @arr,其类型为 [6 x i32]*,第一个0索引是必需的步过偏移。 std::vector actualGEPIndices; - actualGEPIndices.push_back(ConstantInteger::get(0)); // 模拟 ConstantInteger::get(0) + actualGEPIndices.push_back(ConstantInteger::get(0)); actualGEPIndices.insert(actualGEPIndices.end(), indices.begin(), indices.end()); // 直接调用 builder 的方法,无需再关心类型推断的细节 From 5f8bf15d4d0ac395c5297895d5580d0b41bc572c Mon Sep 17 00:00:00 2001 From: Lixuanwang Date: Thu, 24 Jul 2025 21:02:28 +0800 Subject: [PATCH 28/35] =?UTF-8?q?[midend]=E6=B7=BB=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 - lib/libsysy_arm.a | Bin 0 -> 6996 bytes lib/libsysy_riscv.a | Bin 0 -> 19660 bytes 3 files changed, 1 deletion(-) create mode 100644 lib/libsysy_arm.a create mode 100644 lib/libsysy_riscv.a diff --git a/.gitignore b/.gitignore index 7a9b7e6..774f5a6 100644 --- a/.gitignore +++ b/.gitignore @@ -23,7 +23,6 @@ # Compiled Static libraries *.lai *.la -*.a *.lib # Executables diff --git a/lib/libsysy_arm.a b/lib/libsysy_arm.a new file mode 100644 index 0000000000000000000000000000000000000000..7abfa5170d177babb5794434462d3ac0c12997d4 GIT binary patch literal 6996 zcmd6r4Qx}_701uBlR!N_0?i;eE^|nNQXs}8AwY;wAS`56=(GS4qinGgJ3O}`&TNO} zV`3>FMiHt~iqMo!)uy3ot*SQaVCz~Ks6`XqP{p9yBtTWhv<@LakTI6w3ikiMcb~r? zquo|*S9H3pcSU)nYTq5H!R+VKsZe(w;O)Yp^{W3ASwYhdRaI9t%dyYRUv+p{+qHu~@jvz-Vtr zsMQlrY{Pi#UsY+Yd(~o9ar+$K{c>ZyrfJv)vuIK(NfODOY1p$X4JnmenJfcGb_~lK zwWDG`fvZ68_bsoVRSy=h-2408d&~8Fvf=O33-qB?+8Np>E;&{|aP0p6aaVf(c+Y3+ zfB*Ost`AlZU*F;wKMOfNaAY^;(c`n)U?%dZDLoZ36{^;s--|~=F zvD_cmt3yMpu3d-T za*{c(W0wbNvxd9Y3>@Bxh*H$=s~oY~_UTesH#R3X$?MYSKk@ZrrTcyT`p^D05dQTA zYXohM(fXxJU5*6hK61HT_Uf?covM`s+1iB(nH)ZPwo2 z*>-ZYpkhbdUiqWEb2ESITs>M_cw%&_rp4`lJ?BJR8^*Ui`kCSu=nc1@vz@xV$F?(W zo95Mvsv=KNe@nrGG;qd>od-_X6=C5dNjeq!xrC2FojExBq1 zy7r>%FRFJW_P{*Bc{mj})N?oayi4}$)p;JJ5E(q+IWc&I=ex(3?zghdjB>v{nRDtX zO3tZ3KfxU=!VcQ)ak*hdcuLPtoRO#W_nfm5l=|yQMo(sflbtx9X;ydY**S!>^C#!K z`PO)KV})+3p3H9i{{QOitr~NC9}=t9@~4g9O+STA(=N{0j6J8fim8zVQ(la39QAn z4p(;+Z=LJEx_)Kj13l_Rh_a@xuG(F?p|!g+*1fPX)aj3e-HSby9&cGyw_#kgeNmOi zTj^Q$xM_EndlyxCtGvr(WjXr$gZ|bKo`a}nFDY9oIgSY{Cu)z_99h$|^0Y5%Gwu1> zEX`?iX|r?Yq`q}=aJ&(lV%)Rj=DidYdzx3k_Ea}0D}%G;ku*!TOv>PCX{<>eI2k2d zW`QRr+GNXY)RQr?r3jpx;^({>!z|fW#ZbHCWaN`0W$@r*K$UFa64qEKnf)hb^iO8S zE_V4sqNmCH)JPUPGG7AZ{Md7vjNYru;I)c5Df*i;xFv(PWNqGoStxCtiZ4KyVL*%j41NX7eP#b* z)UCqtuHt$hwi!_RM=d9m{yVt-2!9ilemm-mZ6fm&PXhbFn-tFg9|NyfTmZHTdPWlOg|f{)miP&C}OY*|aR z&EMG$Og3ouhdTa^57ai@A9?tPYBUxIcgJK~dshVC!R_OHxT0Aw5=mVV=biQ^l)ahl z@bKfg^FqJLcIe+uJK8->J9HcE=)YUp_bU5AvO{Eu41F2O{;$!Fe#5jwkJ66*XO#Uz zWxqs5-XD{pSD@^FSJ`cvY0rVu?t(I|n|9~|DC5>bX0Lr*;LK)}hv_s#f{c(}A)cN?`(~rmL4f>(Cl;2^49e)6(K)ulE zQ1+WkMZYKL$M{du4{cKZ4%!i?i*{&C+4s{v+5G)^p+n04Iu-Fw(O)QX8p{07lf5Dz zk)c`YSU2sJ=^vo+jgvpCLmpk`ZTw4820exYx>b0p* z-bO~;J~H%&iVrD%m5lsfCqoY_eoOI5#ita1K*qRd$4a~~PHpNw@l zNQNFF<7MloWaw*9?#l_k)gAde~$8(Dqf(toQ!;{$j}vJ#9u{*Hjt4=BN^&b{($m#D1Vpo z_mL52I~ls0jJ)=ep@WJKC>|nX{az+Rf1>=aD*rGUaSoHAXUJ&xAsKp+jJP9Y=*Nn$ zD84~PzguLe3(r2X8%i!Equ&BDv|MqO;u)^6zgR4>mWm8ihC5}$t-ird52cu=85)hjrm)l zv52Xr4Q0k_l6Y1&t@Ze0u}G-3I~I%@iG0c%rXw0I937+Ju^@XeFRS2@WE3o;@tsm@ zq8ylF?gPZ=&vxuqq8N{Iqa00%MFYoT!Jk2+{lnVf&!N!=e*%rdcpFrmV&zwZI&-n& z<>CFv@};(03V=&$X+-(X3^Nb*lTzoIB^k z;zB3pY!&)=`ITDsjSQYr*oNg>xR6_y0l2XpkDgVmc6cl&=0M**goYfXUS!uw6P6$ETm{|T}3u$_B3Pr#%@mRMNCOg|(H?}!zSvNXy%sB=k{%EgvS6W?vaCe*C zoYKkK&N-1#q;u4PgXlt(AR%_TCqAY4xOq;zRZJr?4+(5drooo98v6$@Rd{@}KBuB#^}vt*|{ zc5LZQDfMHIBBJ9;ze*6u`CPmt)yr8D88c>%eu z?lcO^_4eggv)i|Hwa0o3hr!qW%NoLVuU`?bTb@4xZ8Ps!yZ*ZJvNCrV!l^9yYfxMR z4T2dh;c}&$HRTtZf~w&BpzMO_r{}#IRE!a?cf6F_GCr}Zwv=5edDtc2IQUu=+1qcQK0fbEaB0n|htG{fc9+PnSvYNC z@q<#)-blqYyWemDRd-Tl3nvtJW6xj@CnswF@7fkfZpU;mGRW3`S0H-+62GzU#15AxBv19sX`4 z@a22!ZYo{(Crl5U^~b7LFFJkcnYrUN%coV;th%UTBr}C+S|pu+U(7olR7q>!{@^8Uo9aj;|IXPPd3vEHniJbY+AAWc8?5L}3#I0Ah?0>c_XrJ_s9&C}` z`RPZ*kypGY$@Sdb%;erFWZx80@zB&)UvYRUbNzD9f#KNjs5}3R3>MEnQjgR;wZ+Do zfBb;Fx`Od+VQEV1`rWbS#^I-0&JXS%j?DkbZLgKTJ&7ZcqZR+{wBm6tzHVXeef#{$ z6T-sdh-dF^#?rloCD*FjzGKbY^2K*A-tXZVthmJ`Zd>W;(#tE{p7%eyYpwqrFgaza z`LA+IKR4EFkJi1(f|dN?@Y$yOhay4Y$PS+^m-T{i+pgLlJ;3BAZmV6jlY7-zb6G9k zt9v8zJ}qPv`ntUSVw;Xl;*hgW{ZK|x=_51(Fss?LqPm;4EK97T7K`MW-6f8*UJfZW{%KSo9h3JCtiOuf%F4MH2 zt|7Hp9#*>N*DYRE25<%-!&n?Jhw+V$RNLub-sOLOze=#^{I?OlnUXni~u zkJT-%ue(YYgKIZAJJXd-ZVSrRxy5N!m6>hJ8oJ_r^{w6UzPi?gEOo^D;;kupNDAB0 zmCR!ySuk;sqd?swD&11a*a7H`13!eJmEk^$tzId6{U1Xr+K(stxlfK z%deKY!j*jYv07rP2xgR^;=f}G|45!p-S5UcFAHYV7B{KSdw?JIlHx|Us$W$^`6c+B z9)7^Xk9hcWY9iz>^6(t-)_iWIe7T!6A?Fw5hi{Y0AUeqo0{DL+AN>0A$s2(0B;J+R zDuXL0>xKMB$Vbjx>T=*4Jv`;%+dO=ahad3p?|JwU4?pGMv*|5^*LA6fFY@qJ9=^fD z+dX_6@g6;1b)MsO7x}3Fpobs!@S`67a}WQmhoAQFvmSmSogz^GTn}I9;f)@?&coY0 z{IedO^YDEhexHXw>fz6L_=_I?vWK7Y@No~HAfDC!c8jOK&7hA49IrYLU+Li+JiOn- z_jvd{9{#Y0KkDH}J^be$e%iw)Jp5vJm2W4s+dL1i^YE1(zRtrpd3eUd2R!^P58v!+Sh@r-u)D_#qE}%)^g( z_;ViqqKBXM@Yg+jW_Tf5cx&|Mm&$_U1rjdoRQ?$80p)Axx@b`OT;h9`&!X#`uPZ-6 z{=LeNk^fEQ_Ygm*{3W{Hct|;4FCA9S=l}03=j#)}6@h;F$vee|ieOf8MVB6T^9#)P zf-2>5y(=8+m_MgHsvd-2gG1E$OW@c*`3dkkw;+@9Q*KfA8*LKkCgp!|i+R2=PYe9- z;B6ttWkjVrP8XW*M_21+vmDn*CE@Hc_2I?z*(;QX-zS0qT(0jWe~)@wbiPFSIPniE zZ*y~*kh9aRm#RnltK{FM{=#sx;Che0N&P3(|C0KjR36uU7+0QBo>D%fyhr&d<#^p$ z$4k|@!^7{?oHg3dpVs`ZE1yt4U-xsslk)`+|F-7L)6diEH2()4|A_jX>OZOb4Z@p3 z$zXo}fBHQ>w!XP8h^?=Sx`*a^d5Fuy5_xElho$ncOdhUr56j)(3inqpR=rsDg6f6E zg~f%%#gB_0mnW9g6>e(1{dehFTf4Ho$rUSFv&lqH$6L5*m+VYr{{79nY%bNiH5Y8_ z=*zf!gB`w`c}`1bGWO->Z{oR|h%)c(NV)rmxZ$Ta_*@^Dt^ME96vHI0+9)2c-0G$1m>PtQL>QuYA?N{LJpHN@gpkJ3|hTo&OPH@hd>CTHXNuB8PvJTGv zZ1uTLaQ5e@FYUQqf_0M5Pno1n+YiAxCuZuDePaS=Kklg$oc#^zOMC7>xSQkmI&M#J z&gnOGUSx8>*_Y3MnX292H}Xs2133GKj6Z68aQ5;0q168_cx$t-kb!FPeP(&Mb9aR)TZRqo&Te#s_EraZjD#?7yJC^oK8*{NpCa_6Kmz!SCd9 zZWuE;QN13PI8TmEfGS+0=Qt@xj^0@BEw( z&VHxKf3L|0XCL$_%2WNkW$&VTzoPFGP;CyiQ_nG`FOg=dKxPKw};cpM&M&Rrp zH2L#PJ~;ch55oE2>_2AmYfL^k`?#MX`Cl@q%nZ;|)!gN9$IzPx`Imf)O^`$tm$u<^m!pKJ1KjStQ~ z?n6mFexs1hz}b(P{3}g9IQzJt#m60-{U(!tmB|NZe}npxZ{HW->~Avpi%dQ^`?$X* z`S!d4&VGl` z=i9~yXCLEUADsQ8>T^Cg`^QXvy~zh>ANNTm z-}X0f_MbQT_{~ms1I|9~w{ku>`^QbbeGc3**)Q(fay~fwXH0&B1n30L{#o_8{lVFP z-Q+JtxSNBskNe1+56=F@df!vd=gUk!IQzICE&29*1a*!P&=sZO#X0f04;w zZt}s|$Nh262WNk^$zNge!P&=scFDK>ADsR5CV!>L2WP)cea;7GKV|YiVDiD)$9;dv zx7Rt~?Dv@bRVE*teT*OAd~o*rP5uW>J~;arhrs#Z?B8wj8%;hq`xwt4`L_Rqvwy(k zf5_y6vyX8RoDa_a<0ik!3@OO+GmL z7{?;{_IeDQ{c^oeBtJJS{uDu=uXCLEjq#oO! z!P##x^?caW1I|9i?MS};{sPW^qshPCx~c2{(z?*aP~3YiH|Qh`}<5iH<)_B*~hpk$+zDh!P&phO|D&OXMK zNxtp>;Oswd@;8}$aQ08D&-H_|f5znBX!60?$2d2xADsR2NEj;T+s!5)oPCU!lYD!A z180AZ$-l|ugR_rucam@W4>5oUA2<2n>|-3D>);R+A6TKE^k4{ow3xGx?u1`QYqh zoFunDIQzRz{-;bnIQtlH$@PP?e~-!kw8;l&ALBN;{lVEEGx?t}`QYqh{3q8B&i*4N zKm0cn;ZDHW$2d~1ADsPTCcn+p56(WuqjLS=?7v|06DA*=eT-|Be0#kO&i?OBe!IyB zXa5!Txqfi=PnrCr$p>d2<7_40o`1mEA2<0alMl{5#_MwZ;Otk;2t(!bC2jJ-*~hqF zZhvt0FE{xeCLf%Aj34Iu!P&psl*}v7~f7axKv!7C*>j!7Q)8v27dG2WZ& z2WNl4t}HGUo!bwlMl}RX_IgJ z4>*>Hb_0OC@rNAl9DE2C+I6x@7 z$IsAy@q}d9>1wwknY?|QZa=@x+Qt2Y%DqC#wmw;H^v`;W?snQ;6tmupKb8!ebI)2 Date: Fri, 25 Jul 2025 01:53:49 +0800 Subject: [PATCH 29/35] =?UTF-8?q?[midend]=E4=BF=AE=E5=A4=8D=E5=87=BD?= =?UTF-8?q?=E6=95=B0=E5=8F=82=E6=95=B0=E4=B8=BA=E6=95=B0=E7=BB=84=E6=8C=87?= =?UTF-8?q?=E9=92=88=E7=9A=84=E9=80=80=E5=8C=96=E9=97=AE=E9=A2=98=EF=BC=8C?= =?UTF-8?q?=E8=83=BD=E5=A4=9F=E6=AD=A3=E7=A1=AE=E5=8C=BA=E5=88=86=E5=B1=80?= =?UTF-8?q?=E9=83=A8=E5=8F=98=E9=87=8F=E5=92=8C=E5=87=BD=E6=95=B0=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E5=B9=B6=E7=94=9F=E6=88=90=E6=AD=A3=E7=A1=AE=E7=9A=84?= =?UTF-8?q?GEP=E6=8C=87=E4=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRGenerator.cpp | 183 +++++++++++++++++++++++++--------- src/include/IRBuilder.h | 75 +++++++++++--- src/include/SysYIRGenerator.h | 2 + 3 files changed, 200 insertions(+), 60 deletions(-) diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 32a07aa..7618d18 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -40,19 +40,21 @@ Type* SysYIRGenerator::buildArrayType(Type* baseType, const std::vector& return currentType; } +// @brief: 获取 GEP 指令的地址 +// @param basePointer: GEP 的基指针,已经过适当的加载/处理,类型为 LLVM IR 中的指针类型。 +// 例如,对于局部数组,它是 AllocaInst;对于参数数组,它是 LoadInst 的结果。 +// @param indices: 已经包含了所有必要的偏移索引 (包括可能的初始 0 索引,由 visitLValue 准备)。 +// @return: 计算得到的地址值 (也是一个指针类型) Value* SysYIRGenerator::getGEPAddressInst(Value* basePointer, const std::vector& indices) { // 检查 basePointer 是否为指针类型 - assert(basePointer->getType()->isPointer()); + assert(basePointer->getType()->isPointer() && "Base pointer must be a pointer type!"); - // GEP 的第一个索引通常是0,用于“步过”指针本身,访问其指向的对象。 - // 例如,对于全局数组 @arr,其类型为 [6 x i32]*,第一个0索引是必需的步过偏移。 - std::vector actualGEPIndices; - actualGEPIndices.push_back(ConstantInteger::get(0)); - actualGEPIndices.insert(actualGEPIndices.end(), indices.begin(), indices.end()); - - // 直接调用 builder 的方法,无需再关心类型推断的细节 - return builder.createGetElementPtrInst(basePointer, actualGEPIndices); + // `indices` 向量现在由调用方(如 visitLValue, visitVarDecl, visitAssignStmt)负责完整准备, + // 包括是否需要添加初始的 `0` 索引。 + // 所以这里直接将其传递给 `builder.createGetElementPtrInst`。 + return builder.createGetElementPtrInst(basePointer, indices); } + /* * @brief: visit compUnit * @details: @@ -168,7 +170,7 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { // 对于数组,alloca 的类型将是指针指向数组类型,例如 `int[2][3]*` // 对于标量,alloca 的类型将是指针指向标量类型,例如 `int*` AllocaInst* alloca = - builder.createAllocaInst(Type::getPointerType(variableType), dims, name); + builder.createAllocaInst(Type::getPointerType(variableType), {}, name); if (varDef->initVal() != nullptr) { ValueCounter values; @@ -239,9 +241,15 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) { ConstantInteger::get(static_cast(tempLinearIndex % dimSizes[dimIdx]))); tempLinearIndex /= dimSizes[dimIdx]; } - + + // 对于局部数组,alloca 本身就是 GEP 的基指针。 + // GEP 的第一个索引必须是 0,用于“步过”整个数组。 + std::vector gepIndicesForInit; + gepIndicesForInit.push_back(ConstantInteger::get(0)); + gepIndicesForInit.insert(gepIndicesForInit.end(), currentIndices.begin(), currentIndices.end()); + // 计算元素的地址 - Value* elementAddress = getGEPAddressInst(alloca, currentIndices); + Value* elementAddress = getGEPAddressInst(alloca, gepIndicesForInit); // 生成 store 指令 builder.createStoreInst(currentValue, elementAddress); } @@ -328,34 +336,72 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){ auto name = ctx->Ident()->getText(); std::vector paramTypes; + std::vector paramActualTypes; std::vector paramNames; std::vector> paramDims; if (ctx->funcFParams() != nullptr) { auto params = ctx->funcFParams()->funcFParam(); for (const auto ¶m : params) { - paramTypes.push_back(std::any_cast(visitBType(param->bType()))); - paramNames.push_back(param->Ident()->getText()); - std::vector dims = {}; - if (!param->LBRACK().empty()) { - dims.push_back(ConstantInteger::get(-1)); // 第一个维度不确定 + Type* baseBType = std::any_cast(visitBType(param->bType())); + std::string paramName = param->Ident()->getText(); + + // 用于收集当前参数的维度信息(如果它是数组) + std::vector currentParamDims; + if (!param->LBRACK().empty()) { // 如果参数声明中有方括号,说明是数组 + // SysY 数组参数的第一个维度可以是未知的(例如 int arr[] 或 int arr[][10]) + // 这里的 ConstantInteger::get(-1) 表示未知维度,但对于 LLVM 类型构建,我们主要关注已知维度 + currentParamDims.push_back(ConstantInteger::get(-1)); // 标记第一个维度为未知 for (const auto &exp : param->exp()) { - dims.push_back(std::any_cast(visitExp(exp))); + // 访问表达式以获取维度大小,这些维度必须是常量 + Value* dimVal = std::any_cast(visitExp(exp)); + // 确保维度是常量整数,否则 buildArrayType 会断言失败 + assert(dynamic_cast(dimVal) && "Array dimension in parameter must be a constant integer!"); + currentParamDims.push_back(dimVal); } } - paramDims.emplace_back(dims); + + // 根据解析出的信息,确定参数在 LLVM IR 中的实际类型 + Type* actualParamType; + if (currentParamDims.empty()) { // 情况1:标量参数 (e.g., int x) + actualParamType = baseBType; // 实际类型就是基本类型 + } else { // 情况2&3:数组参数 (e.g., int arr[] 或 int arr[][10]) + // 数组参数在函数传递时会退化为指针。 + // 这个指针指向的类型是除第一维外,由后续维度构成的数组类型。 + + // 从 currentParamDims 中移除第一个标记未知维度的 -1 + std::vector fixedDimsForTypeBuilding; + if (currentParamDims.size() > 1) { // 如果有固定维度 (e.g., int arr[][10]) + // 复制除第一个 -1 之外的所有维度 + fixedDimsForTypeBuilding.assign(currentParamDims.begin() + 1, currentParamDims.end()); + } + + Type* pointedToArrayType = baseBType; // 从基本类型开始构建 + // 从最内层维度向外层构建数组类型 + // buildArrayType 期望 dims 是从最外层到最内层,但它内部反向迭代,所以这里直接传入 + // 例如,对于 int arr[][10],fixedDimsForTypeBuilding 包含 [10],构建出 [10 x i32] + if (!fixedDimsForTypeBuilding.empty()) { + pointedToArrayType = buildArrayType(baseBType, fixedDimsForTypeBuilding); + } + + // 实际参数类型是指向这个构建好的数组类型的指针 + actualParamType = Type::getPointerType(pointedToArrayType); // e.g., i32* 或 [10 x i32]* + } + + paramActualTypes.push_back(actualParamType); // 存储参数的实际 LLVM IR 类型 + paramNames.push_back(paramName); // 存储参数名称 + } } Type* returnType = std::any_cast(visitFuncType(ctx->funcType())); - Type* funcType = Type::getFunctionType(returnType, paramTypes); + Type* funcType = Type::getFunctionType(returnType, paramActualTypes); Function* function = module->createFunction(name, funcType); BasicBlock* entry = function->getEntryBlock(); builder.setPosition(entry, entry->end()); - for (int i = 0; i < paramTypes.size(); ++i) { - AllocaInst* alloca = builder.createAllocaInst(Type::getPointerType(paramTypes[i]), - paramDims[i], paramNames[i]); + for (int i = 0; i < paramActualTypes.size(); ++i) { + AllocaInst* alloca = builder.createAllocaInst(Type::getPointerType(paramActualTypes[i]), {},paramNames[i]); entry->insertArgument(alloca); module->addVariable(paramNames[i], alloca); } @@ -641,30 +687,41 @@ std::any SysYIRGenerator::visitReturnStmt(SysYParser::ReturnStmtContext *ctx) { } -// SysYIRGenerator.cpp (修改部分) +// 辅助函数:计算给定类型中嵌套的数组维度数量 +// 例如: +// - 对于 i32* 类型,它指向 i32,维度为 0。 +// - 对于 [10 x i32]* 类型,它指向 [10 x i32],维度为 1。 +// - 对于 [20 x [10 x i32]]* 类型,它指向 [20 x [10 x i32]],维度为 2。 +unsigned SysYIRGenerator::countArrayDimensions(Type* type) { + unsigned dims = 0; + Type* currentType = type; + + // 如果是指针类型,先获取它指向的基础类型 + if (currentType->isPointer()) { + currentType = currentType->as()->getBaseType(); + } + + // 递归地计算数组的维度层数 + while (currentType && currentType->isArray()) { + dims++; + currentType = currentType->as()->getElementType(); + } + return dims; +} std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) { std::string name = ctx->Ident()->getText(); User* variable = module->getVariable(name); Value* value = nullptr; - if (variable == nullptr) { - throw std::runtime_error("Variable " + name + " not found."); - } + std::vector dims; for (const auto &exp : ctx->exp()) { dims.push_back(std::any_cast(visitExp(exp))); } // 1. 获取变量的声明维度数量 - unsigned declaredNumDims = 0; - if (AllocaInst* alloc = dynamic_cast(variable)) { - declaredNumDims = alloc->getNumDims(); - } else if (GlobalValue* glob = dynamic_cast(variable)) { - declaredNumDims = glob->getNumDims(); - } else if (ConstantVariable* constV = dynamic_cast(variable)) { - declaredNumDims = constV->getNumDims(); - } + unsigned declaredNumDims = countArrayDimensions(variable->getType()); // 2. 处理常量变量 (ConstantVariable) 且所有索引都是常量的情况 ConstantVariable* constVar = dynamic_cast(variable); @@ -700,20 +757,54 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) { } } else { // 访问数组元素或子数组(有索引,或变量本身是数组/多维指针) - Value* targetAddress = nullptr; - + Value* gepBasePointer = nullptr; + std::vector gepIndices; // 准备传递给 getGEPAddressInst 的索引列表 // GEP 的基指针就是变量本身(它是一个指向内存的指针) - if (dynamic_cast(variable) || dynamic_cast(variable) || (constVar != nullptr)) { - // 允许对 ConstantVariable (如果它代表全局数组常量) 进行 GEP - targetAddress = getGEPAddressInst(variable, dims); + if (AllocaInst *alloc = dynamic_cast(variable)) { + // 情况 A: 局部变量 (AllocaInst) + // 获取 AllocaInst 分配的内存的实际类型。 + // 例如:对于 `int b[10][20];`,`allocatedType` 是 `[10 x [20 x i32]]`。 + // 对于 `int b[][20]` 的函数参数,其 AllocaInst 存储的是一个指针, + // 此时 `allocatedType` 是 `[20 x i32]*`。 + Type* allocatedType = alloc->getType()->as()->getBaseType(); + + if (allocatedType->isPointer()) { + // 如果 AllocaInst 分配的是一个指针类型 (例如,用于存储函数参数的指针,如 int b[][20] 中的 b) + // 那么 GEP 的基指针是加载这个指针变量的值。 + gepBasePointer = builder.createLoadInst(alloc); // 加载出实际的指针值 (e.g., [20 x i32]*) + // 对于这种参数指针,用户提供的索引直接作用于它。不需要额外的 0。 + gepIndices = dims; + } else { + // 如果 AllocaInst 分配的是实际的数组数据 (例如,int b[10][20] 中的 b) + // 那么 AllocaInst 本身就是 GEP 的基指针。 + gepBasePointer = alloc; // 类型是 [10 x [20 x i32]]* + // 对于这种完整的数组分配,GEP 的第一个索引必须是 0,用于“步过”整个数组。 + gepIndices.push_back(ConstantInteger::get(0)); + gepIndices.insert(gepIndices.end(), dims.begin(), dims.end()); + } + } else if (GlobalValue *glob = dynamic_cast(variable)) { + // 情况 B: 全局变量 (GlobalValue) + // GlobalValue 总是指向全局数据的指针。 + gepBasePointer = glob; // 类型是 [61 x [67 x i32]]* + // 对于全局数组,GEP 的第一个索引必须是 0,用于“步过”整个数组。 + gepIndices.push_back(ConstantInteger::get(0)); + gepIndices.insert(gepIndices.end(), dims.begin(), dims.end()); + } else if (ConstantVariable *constV = dynamic_cast(variable)) { + // 情况 C: 常量变量 (ConstantVariable),如果它代表全局数组常量 + // 假设 ConstantVariable 可以直接作为 GEP 的基指针。 + gepBasePointer = constV; + // 对于常量数组,也需要 0 索引来“步过”整个数组。 + // 这里可以进一步检查 constV->getType()->as()->getBaseType()->isArray() + // 但为了简洁,假设所有 ConstantVariable 作为 GEP 基指针时都需要此 0。 + gepIndices.push_back(ConstantInteger::get(0)); + gepIndices.insert(gepIndices.end(), dims.begin(), dims.end()); } else { - // 其他情况(例如尝试对非指针类型或不支持的 LValue 进行 GEP)应报错 - assert(false && "LValue variable type not supported for GEP or dynamic load."); - return static_cast(nullptr); + assert(false && "LValue variable type not supported for GEP base pointer."); + return static_cast(nullptr); } - // 现在 targetAddress 持有元素或子数组的地址。 - // 需要判断是加载值,还是返回子数组的地址。 + // 现在调用 getGEPAddressInst,传入正确准备的基指针和索引列表 + Value *targetAddress = getGEPAddressInst(gepBasePointer, gepIndices); // 如果提供的索引数量少于声明的维度数量,则表示访问的是子数组,返回其地址 if (dims.size() < declaredNumDims) { @@ -1264,7 +1355,7 @@ void Utils::createExternalFunction( for (int i = 0; i < paramTypes.size(); ++i) { auto alloca = pBuilder->createAllocaInst( - Type::getPointerType(paramTypes[i]), paramDims[i], paramNames[i]); + Type::getPointerType(paramTypes[i]), {}, paramNames[i]); entry->insertArgument(alloca); // pModule->addVariable(paramNames[i], alloca); } diff --git a/src/include/IRBuilder.h b/src/include/IRBuilder.h index c97dec5..49941fd 100644 --- a/src/include/IRBuilder.h +++ b/src/include/IRBuilder.h @@ -347,22 +347,69 @@ class IRBuilder { static Type *getIndexedType(Type *pointerType, const std::vector &indices) { assert(pointerType->isPointer() && "base must be a pointer type!"); - Type *CurrentType = pointerType; - // 遍历所有索引来深入类型层次结构Sysy只支持数组 + // Type *CurrentType = pointerType->as()->getBaseType(); + // GEP 的类型推断从基指针所指向的类型开始。 + // 例如: + // - 如果 pointerType 是 `[20 x [10 x i32]]*` (指向一个二维数组的指针), + // 那么 `currentWalkType` 将从 `[20 x [10 x i32]]` (二维数组类型) 开始。 + // - 如果 pointerType 是 `i32*` (指向一个整数的指针), + // 那么 `currentWalkType` 将从 `i32` (整数类型) 开始。 + Type *currentWalkType = pointerType->as()->getBaseType(); + + // 遍历所有索引来深入类型层次结构。 + // 注意:这里的 `indices` 向量通常已经包含了 `getGEPAddressInst` 添加的第一个“步过”索引(通常是0)。 + // 因此,`indices[0]` 对应的是 GEP 操作的第一个逻辑步骤。 for (int i = 0; i < indices.size(); ++i) { - if(i == 0) { - // 第一个索引是指针类型的元素类型 - CurrentType = pointerType->as()->getBaseType(); - } else - if (CurrentType->isArray()) { - CurrentType = CurrentType->as()->getElementType(); - } - else { - // 如果类型不是聚合类型但仍有索引,说明索引过多,这是错误的 - CurrentType = nullptr; - } + if (currentWalkType->isArray()) { + // 情况1:当前类型是数组类型 (例如 `[20 x [10 x i32]]` 或 `[10 x i32]`)。 + // 此时,当前的索引用于选择数组中的一个元素(或子数组)。 + // 新的 `currentWalkType` 变为该数组的元素类型。 + // 例如:`[20 x [10 x i32]]` 经过一个索引后,变为 `[10 x i32]`。 + currentWalkType = currentWalkType->as()->getElementType(); + // } else if (currentWalkType->isStruct()) { + // // 情况2:当前类型是结构体类型。 + // // 此时,索引必须是一个常量整数,用于选择结构体中的特定成员。 + // // SysY 语言通常只支持数组,但如果你的 IR 支持结构体,这里需要实现。 + // ConstantInteger* structIdx = dynamic_cast(indices[i]); + // assert(structIdx && "Struct index must be a constant integer!"); + // assert(structIdx->getInt() >= 0 && "Struct index cannot be negative!"); + // // 确保 `StructType` 类有 `getNumMembers()` 和 `getMemberType()` 方法。 + // // 如果你的 Type 系统没有这些方法,需要根据实际情况调整。 + // assert(structIdx->getInt() < currentWalkType->as()->getNumMembers() && "Struct index out of bounds!"); + // currentWalkType = currentWalkType->as()->getMemberType(structIdx->getInt()); + } else { + // 情况3:当前类型既不是数组也不是结构体(即它是一个标量类型,如 `i32` 或 `float`)。 + // + // 如果 `currentWalkType` 是一个标量类型,并且**后面还有未处理的索引** (`i < indices.size() - 1`), + // 这意味着我们试图对一个标量类型进行进一步的结构性索引,这是**无效的**。 + // 例如:`int* ptr; ptr[0][0];` + // - `ptr` 的类型是 `int*`。 + // - `currentWalkType` 初始化为 `int`。 + // - `i = 0` 时,`currentWalkType` 是 `int`。它不是数组/结构体,类型不变。 + // - `i = 1` 时,`currentWalkType` 仍然是 `int`。此时 `i < indices.size() - 1` (即 `1 < 2 - 1 = 1`) 为假。 + // 因为 `indices.size()` 是 2 (`[0, 0]`),`i` 是 1。`i < indices.size() - 1` 是 `1 < 1`,为假。 + // 所以不会触发断言,`currentWalkType` 保持 `int`。这是正确的。 + // + // 让我重新检查一下 `if (i > 0)` 和 `if (i < indices.size() - 1)` 的区别。 + // 原始的 `if (i > 0)` 导致 `arr[1]` 失败是因为 `currentWalkType` 变成 `int` 后, + // `i=1` 触发了断言。 + // + // LLVM GEP 的行为是:如果当前类型是标量,并且这是 GEP 的**最后一个索引**,那么 GEP 是合法的, + // 最终的类型就是这个标量类型。如果不是最后一个索引,则报错。 + + // 修正后的判断: + // 如果当前类型是标量,并且当前索引 `i` 不是 `indices` 向量中的**最后一个索引**, + // 那么就意味着尝试对标量进行额外的结构性索引,这是错误的。 + if (i < indices.size() - 1) { + assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate type with further indices."); + return nullptr; // 返回空指针表示类型推断失败 + } + // 如果 `currentWalkType` 是标量,并且这是最后一个索引,则类型保持不变。 + // 这是合法的 GEP 操作,例如 `getelementptr i32, i32* %ptr, i64 5`。 + // `currentWalkType` 将是 `i32`,并且循环会在此结束。 + } } - return CurrentType; + return currentWalkType; } }; diff --git a/src/include/SysYIRGenerator.h b/src/include/SysYIRGenerator.h index 66ce11c..aac6ec9 100644 --- a/src/include/SysYIRGenerator.h +++ b/src/include/SysYIRGenerator.h @@ -139,6 +139,8 @@ public: // 构建数组类型 Type* buildArrayType(Type* baseType, const std::vector& dims); + unsigned countArrayDimensions(Type* type); + }; // class SysYIRGenerator } // namespace sysy \ No newline at end of file From 1e6f6ed711d9c14d7f5c718048d6aa945bd49734 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Fri, 25 Jul 2025 03:26:10 +0800 Subject: [PATCH 30/35] =?UTF-8?q?[midend]=20GEP=E7=B1=BB=E5=9E=8B=E6=8E=A8?= =?UTF-8?q?=E6=96=AD=E5=87=BD=E6=95=B0getIndexedType=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=EF=BC=8C=E5=A2=9E=E5=8A=A0=E6=95=B0=E7=BB=84?= =?UTF-8?q?type=E7=BC=93=E5=AD=98=E6=B1=A0=E9=81=BF=E5=85=8D=E7=9B=B8?= =?UTF-8?q?=E5=90=8Ctype=20=3D=3D=E6=93=8D=E4=BD=9C=E8=BF=94=E5=9B=9E?= =?UTF-8?q?=E5=81=87=EF=BC=8C=E4=BF=AE=E5=A4=8D=E5=AE=9E=E5=8F=82=E5=BD=A2?= =?UTF-8?q?=E5=8F=82=E7=B1=BB=E5=9E=8B=E8=BD=AC=E6=8D=A2=E5=88=A4=E6=96=AD?= =?UTF-8?q?=E9=80=BB=E8=BE=91=EF=BC=8Cstarttime=20stoptime=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=94=AF=E6=8C=81=EF=BC=88=E5=BE=85=E5=90=8E=E7=AB=AF?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/IR.cpp | 13 ++++++-- src/SysYIRGenerator.cpp | 65 +++++++++++++++++++++++++++---------- src/include/IRBuilder.h | 71 +++++++++++++++-------------------------- 3 files changed, 85 insertions(+), 64 deletions(-) diff --git a/src/IR.cpp b/src/IR.cpp index c694839..da4292b 100644 --- a/src/IR.cpp +++ b/src/IR.cpp @@ -105,8 +105,17 @@ FunctionType*FunctionType::get(Type *returnType, const std::vector ¶ } ArrayType *ArrayType::get(Type *elementType, unsigned numElements) { - // TODO:可以考虑在这里添加缓存,避免重复创建相同的数组类型 - return new ArrayType(elementType, numElements); + static std::set> arrayTypes; + auto iter = std::find_if(arrayTypes.begin(), arrayTypes.end(), [&](const std::unique_ptr &type) -> bool { + return elementType == type->getElementType() && numElements == type->getNumElements(); + }); + if (iter != arrayTypes.end()) { + return iter->get(); + } + auto type = new ArrayType(elementType, numElements); + assert(type); + auto result = arrayTypes.emplace(type); + return result.first->get(); } void Value::replaceAllUsesWith(Value *value) { diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 7618d18..09b1214 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -770,6 +770,7 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) { if (allocatedType->isPointer()) { // 如果 AllocaInst 分配的是一个指针类型 (例如,用于存储函数参数的指针,如 int b[][20] 中的 b) + // 即 `allocatedType` 是一个指向数组指针的指针 (e.g., [20 x i32]**) // 那么 GEP 的基指针是加载这个指针变量的值。 gepBasePointer = builder.createLoadInst(alloc); // 加载出实际的指针值 (e.g., [20 x i32]*) // 对于这种参数指针,用户提供的索引直接作用于它。不需要额外的 0。 @@ -777,6 +778,7 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) { } else { // 如果 AllocaInst 分配的是实际的数组数据 (例如,int b[10][20] 中的 b) // 那么 AllocaInst 本身就是 GEP 的基指针。 + // 这里的 `alloc` 是指向数组的指针 (e.g., [10 x [20 x i32]]*) gepBasePointer = alloc; // 类型是 [10 x [20 x i32]]* // 对于这种完整的数组分配,GEP 的第一个索引必须是 0,用于“步过”整个数组。 gepIndices.push_back(ConstantInteger::get(0)); @@ -856,32 +858,63 @@ std::any SysYIRGenerator::visitCall(SysYParser::CallContext *ctx) { std::vector args = {}; if (funcName == "starttime" || funcName == "stoptime") { - // 如果是starttime或stoptime函数 - // TODO: 这里需要处理starttime和stoptime函数的参数 - // args.emplace_back() + args.emplace_back( + ConstantInteger::get(static_cast(ctx->getStart()->getLine()))); } else { if (ctx->funcRParams() != nullptr) { args = std::any_cast>(visitFuncRParams(ctx->funcRParams())); } - auto params = function->getEntryBlock()->getArguments(); + // 获取形参列表。`getArguments()` 返回的是 `Argument*` 的集合, + // 每个 `Argument` 代表一个函数形参,其 `getType()` 就是指向形参的类型的指针类型。 + auto formalParamsAlloca = function->getEntryBlock()->getArguments(); + + // 检查实参和形参数量是否匹配。 + if (args.size() != formalParamsAlloca.size()) { + std::cerr << "Error: Function call argument count mismatch for function '" << funcName << "'." << std::endl; + assert(false && "Function call argument count mismatch!"); + } + for (int i = 0; i < args.size(); i++) { - // 参数类型转换 - if (params[i]->getType() != args[i]->getType() && - (params[i]->getNumDims() != 0 || - params[i]->getType()->as()->getBaseType() != args[i]->getType())) { - ConstantValue * constValue = dynamic_cast(args[i]); + // 形参的类型 (e.g., i32, float, i32*, [10 x i32]*) + Type* formalParamExpectedValueType = formalParamsAlloca[i]->getType()->as()->getBaseType(); + // 实参的实际类型 (e.g., i32, float, i32*, [67 x i32]*) + Type* actualArgType = args[i]->getType(); + // 如果实参类型与形参类型不匹配,则尝试进行类型转换 + if (formalParamExpectedValueType != actualArgType) { + ConstantValue *constValue = dynamic_cast(args[i]); if (constValue != nullptr) { - if (params[i]->getType() == Type::getPointerType(Type::getFloatType())) { - args[i] = ConstantInteger::get(static_cast(constValue->getInt())); + if (formalParamExpectedValueType->isInt() && actualArgType->isFloat()) { + args[i] = ConstantInteger::get(static_cast(constValue->getFloat())); + } else if (formalParamExpectedValueType->isFloat() && actualArgType->isInt()) { + args[i] = ConstantFloating::get(static_cast(constValue->getInt())); } else { - args[i] = ConstantFloating::get(static_cast(constValue->getFloat())); + // 如果是常量但不是简单的 int/float 标量转换, + // 或者是指针常量需要 bitcast,则让它进入非常量转换逻辑。 + // 例如,一个常量数组的地址,需要 bitcast 成另一种指针类型。 + // 目前不知道样例有没有这种情况,所以这里不做处理。 } - } else { - if (params[i]->getType() == Type::getPointerType(Type::getFloatType())) { - args[i] = builder.createIToFInst(args[i]); - } else { + } + else { + // 1. 标量值类型转换 (例如:int_reg 到 float_reg,float_reg 到 int_reg) + if (formalParamExpectedValueType->isInt() && actualArgType->isFloat()) { args[i] = builder.createFtoIInst(args[i]); + } else if (formalParamExpectedValueType->isFloat() && actualArgType->isInt()) { + args[i] = builder.createIToFInst(args[i]); + } + // 2. 指针类型转换 (例如数组退化:`[N x T]*` 到 `T*`,或兼容指针类型之间) TODO:不清楚有没有这种样例 + // 这种情况常见于数组参数,实参可能是一个更具体的数组指针类型, + // 而形参是其退化后的基础指针类型。LLVM 的 `bitcast` 指令可以用于 + // 在相同大小的指针类型之间进行转换,这对于数组退化至关重要。 + // else if (formalParamType->isPointer() && actualArgType->isPointer()) { + // 检查指针基类型是否兼容,或者是否是数组退化导致的类型不同。 + // 使用 bitcast, + // args[i] = builder.createBitCastInst(args[i], formalParamType); + // } + // 3. 其他未预期的类型不匹配 + // 如果代码执行到这里,说明存在编译器前端未处理的类型不兼容或错误。 + else { + // assert(false && "Unhandled type mismatch for function call argument."); } } } diff --git a/src/include/IRBuilder.h b/src/include/IRBuilder.h index 49941fd..d9e92ef 100644 --- a/src/include/IRBuilder.h +++ b/src/include/IRBuilder.h @@ -347,68 +347,47 @@ class IRBuilder { static Type *getIndexedType(Type *pointerType, const std::vector &indices) { assert(pointerType->isPointer() && "base must be a pointer type!"); - // Type *CurrentType = pointerType->as()->getBaseType(); // GEP 的类型推断从基指针所指向的类型开始。 // 例如: - // - 如果 pointerType 是 `[20 x [10 x i32]]*` (指向一个二维数组的指针), - // 那么 `currentWalkType` 将从 `[20 x [10 x i32]]` (二维数组类型) 开始。 - // - 如果 pointerType 是 `i32*` (指向一个整数的指针), - // 那么 `currentWalkType` 将从 `i32` (整数类型) 开始。 + // - 如果 pointerType 是 `[20 x [10 x i32]]*`,`currentWalkType` 初始为 `[20 x [10 x i32]]`。 + // - 如果 pointerType 是 `i32*`,`currentWalkType` 初始为 `i32`。 + // - 如果 pointerType 是 `i32**`,`currentWalkType` 初始为 `i32*`。 Type *currentWalkType = pointerType->as()->getBaseType(); // 遍历所有索引来深入类型层次结构。 - // 注意:这里的 `indices` 向量通常已经包含了 `getGEPAddressInst` 添加的第一个“步过”索引(通常是0)。 - // 因此,`indices[0]` 对应的是 GEP 操作的第一个逻辑步骤。 + // `indices` 向量包含了所有 GEP 索引,包括由 `visitLValue` 等函数添加的初始 `0` 索引。 for (int i = 0; i < indices.size(); ++i) { if (currentWalkType->isArray()) { - // 情况1:当前类型是数组类型 (例如 `[20 x [10 x i32]]` 或 `[10 x i32]`)。 - // 此时,当前的索引用于选择数组中的一个元素(或子数组)。 - // 新的 `currentWalkType` 变为该数组的元素类型。 - // 例如:`[20 x [10 x i32]]` 经过一个索引后,变为 `[10 x i32]`。 + // 情况一:当前遍历类型是 `ArrayType`。 + // 索引用于选择数组元素,`currentWalkType` 更新为数组的元素类型。 currentWalkType = currentWalkType->as()->getElementType(); - // } else if (currentWalkType->isStruct()) { - // // 情况2:当前类型是结构体类型。 - // // 此时,索引必须是一个常量整数,用于选择结构体中的特定成员。 - // // SysY 语言通常只支持数组,但如果你的 IR 支持结构体,这里需要实现。 - // ConstantInteger* structIdx = dynamic_cast(indices[i]); - // assert(structIdx && "Struct index must be a constant integer!"); - // assert(structIdx->getInt() >= 0 && "Struct index cannot be negative!"); - // // 确保 `StructType` 类有 `getNumMembers()` 和 `getMemberType()` 方法。 - // // 如果你的 Type 系统没有这些方法,需要根据实际情况调整。 - // assert(structIdx->getInt() < currentWalkType->as()->getNumMembers() && "Struct index out of bounds!"); - // currentWalkType = currentWalkType->as()->getMemberType(structIdx->getInt()); + } else if (currentWalkType->isPointer()) { + // 情况二:当前遍历类型是 `PointerType`。 + // 这意味着我们正在通过一个指针来访问其指向的内存。 + // 索引用于选择该指针所指向的“数组”的元素。 + // `currentWalkType` 更新为该指针所指向的基础类型。 + // 例如:如果 `currentWalkType` 是 `i32*`,它将变为 `i32`。 + // 如果 `currentWalkType` 是 `[10 x i32]*`,它将变为 `[10 x i32]`。 + currentWalkType = currentWalkType->as()->getBaseType(); } else { - // 情况3:当前类型既不是数组也不是结构体(即它是一个标量类型,如 `i32` 或 `float`)。 + // 情况三:当前遍历类型是标量类型 (例如 `i32`, `float` 等非聚合、非指针类型)。 // - // 如果 `currentWalkType` 是一个标量类型,并且**后面还有未处理的索引** (`i < indices.size() - 1`), - // 这意味着我们试图对一个标量类型进行进一步的结构性索引,这是**无效的**。 - // 例如:`int* ptr; ptr[0][0];` - // - `ptr` 的类型是 `int*`。 - // - `currentWalkType` 初始化为 `int`。 - // - `i = 0` 时,`currentWalkType` 是 `int`。它不是数组/结构体,类型不变。 - // - `i = 1` 时,`currentWalkType` 仍然是 `int`。此时 `i < indices.size() - 1` (即 `1 < 2 - 1 = 1`) 为假。 - // 因为 `indices.size()` 是 2 (`[0, 0]`),`i` 是 1。`i < indices.size() - 1` 是 `1 < 1`,为假。 - // 所以不会触发断言,`currentWalkType` 保持 `int`。这是正确的。 + // 如果 `currentWalkType` 是标量,并且当前索引 `i` **不是** `indices` 向量中的最后一个索引, + // 这意味着尝试对一个标量类型进行进一步的结构性索引,这是**无效的**。 + // 例如:`int x; x[0];` 对应的 GEP 链中,`x` 的类型是 `i32`,再加 `[0]` 索引就是错误。 // - // 让我重新检查一下 `if (i > 0)` 和 `if (i < indices.size() - 1)` 的区别。 - // 原始的 `if (i > 0)` 导致 `arr[1]` 失败是因为 `currentWalkType` 变成 `int` 后, - // `i=1` 触发了断言。 - // - // LLVM GEP 的行为是:如果当前类型是标量,并且这是 GEP 的**最后一个索引**,那么 GEP 是合法的, - // 最终的类型就是这个标量类型。如果不是最后一个索引,则报错。 - - // 修正后的判断: - // 如果当前类型是标量,并且当前索引 `i` 不是 `indices` 向量中的**最后一个索引**, - // 那么就意味着尝试对标量进行额外的结构性索引,这是错误的。 + // 如果 `currentWalkType` 是标量,且这是**最后一个索引** (`i == indices.size() - 1`), + // 那么 GEP 是合法的,它只是计算一个偏移地址,最终的类型就是这个标量类型。 + // 此时 `currentWalkType` 保持不变,循环结束。 if (i < indices.size() - 1) { - assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate type with further indices."); + assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate/non-pointer type with further indices."); return nullptr; // 返回空指针表示类型推断失败 } - // 如果 `currentWalkType` 是标量,并且这是最后一个索引,则类型保持不变。 - // 这是合法的 GEP 操作,例如 `getelementptr i32, i32* %ptr, i64 5`。 - // `currentWalkType` 将是 `i32`,并且循环会在此结束。 + // 如果是最后一个索引,且当前类型是标量,则类型保持不变,这是合法的。 + // 循环会自然结束,返回正确的 `currentWalkType`。 } } + // 所有索引处理完毕后,`currentWalkType` 就是 GEP 指令最终计算出的地址所指向的元素的类型。 return currentWalkType; } }; From d50912ee4c1ecd42fbfc3d538e27091e04f5022d Mon Sep 17 00:00:00 2001 From: Lixuanwang Date: Fri, 25 Jul 2025 12:00:47 +0800 Subject: [PATCH 31/35] =?UTF-8?q?[midend]=E5=90=8E=E7=AB=AF=E9=80=82?= =?UTF-8?q?=E9=85=8DGEP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/RISCv64ISel.cpp | 128 +++++++++++++++++------------------- src/include/RISCv64Passes.h | 12 ++-- 2 files changed, 68 insertions(+), 72 deletions(-) diff --git a/src/RISCv64ISel.cpp b/src/RISCv64ISel.cpp index 9bd104f..b6ed7df 100644 --- a/src/RISCv64ISel.cpp +++ b/src/RISCv64ISel.cpp @@ -794,30 +794,23 @@ void RISCv64ISel::selectNode(DAGNode* node) { case DAGNode::GET_ELEMENT_PTR: { auto gep = dynamic_cast(node->value); - // 获取GEP指令最终要写入的目标虚拟寄存器 auto result_vreg = getVReg(gep); - // --- Step 1: 获取基地址 --- + // --- Step 1: 获取基地址 (此部分逻辑正确,保持不变) --- auto base_ptr_node = node->operands[0]; - // 创建一个新的vreg作为地址累加器 auto current_addr_vreg = getNewVReg(); - // 判断基指针是局部数组还是全局数组,并生成获取其基地址的指令 if (auto alloca_base = dynamic_cast(base_ptr_node->value)) { - // 基指针是局部数组(在栈上),使用FRAME_ADDR伪指令获取其地址 auto frame_addr_instr = std::make_unique(RVOpcodes::FRAME_ADDR); frame_addr_instr->addOperand(std::make_unique(current_addr_vreg)); frame_addr_instr->addOperand(std::make_unique(getVReg(alloca_base))); CurMBB->addInstruction(std::move(frame_addr_instr)); } else if (auto global_base = dynamic_cast(base_ptr_node->value)) { - // 基指针是全局数组,使用LA伪指令加载其地址 auto la_instr = std::make_unique(RVOpcodes::LA); la_instr->addOperand(std::make_unique(current_addr_vreg)); la_instr->addOperand(std::make_unique(global_base->getName())); CurMBB->addInstruction(std::move(la_instr)); } else { - // 如果基指针是另一个计算结果(例如函数参数传递来的数组地址), - // 直接用MV指令将其值赋给地址累加器 auto base_vreg = getVReg(base_ptr_node->value); auto mv = std::make_unique(RVOpcodes::MV); mv->addOperand(std::make_unique(current_addr_vreg)); @@ -825,72 +818,75 @@ void RISCv64ISel::selectNode(DAGNode* node) { CurMBB->addInstruction(std::move(mv)); } - // --- Step 2: 迭代处理每个索引,累加偏移量 --- - // 获取数组的类型,并剥掉最外层的指针 + // --- Step 2: [最终权威版] 遵循LLVM GEP语义迭代计算地址 --- + + // 初始被索引的类型,是基指针指向的那个类型 (例如, [2 x i32]) Type* current_type = gep->getBasePointer()->getType()->as()->getBaseType(); - // 遍历所有索引 (DAGNode的操作数从第1个开始是索引) - for (size_t i = 1; i < node->operands.size(); ++i) { - auto index_node = node->operands[i]; - - // [修复] 核心修复逻辑在这里 - // GEP可以索引数组,也可以索引指针(数组退化的结果) - unsigned element_size = 0; - Type* element_type = nullptr; + // 迭代处理 GEP 的每一个索引 + for (size_t i = 0; i < gep->getNumIndices(); ++i) { + Value* indexValue = gep->getIndex(i); + // GEP的第一个索引以整个 `current_type` 的大小为步长。 + // 后续的索引则以 `current_type` 的元素大小为步长。 + // 这一步是计算地址偏移的关键。 + unsigned stride = getTypeSizeInBytes(current_type); + + // 如果步长为0(例如对一个void类型或空结构体索引),则不产生任何偏移 + if (stride != 0) { + // --- 为当前索引和步长生成偏移计算指令 --- + auto offset_vreg = getNewVReg(); + auto index_vreg = getVReg(indexValue); + + // 如果索引是常量,先用 LI 指令加载到虚拟寄存器 + if (auto const_index = dynamic_cast(indexValue)) { + auto li = std::make_unique(RVOpcodes::LI); + li->addOperand(std::make_unique(index_vreg)); + li->addOperand(std::make_unique(const_index->getInt())); + CurMBB->addInstruction(std::move(li)); + } + + // 优化:如果步长是1,可以直接移动(MV)作为偏移量,无需乘法 + if (stride == 1) { + auto mv = std::make_unique(RVOpcodes::MV); + mv->addOperand(std::make_unique(offset_vreg)); + mv->addOperand(std::make_unique(index_vreg)); + CurMBB->addInstruction(std::move(mv)); + } else { + // 步长不为1,需要生成乘法指令 + auto size_vreg = getNewVReg(); + auto li_size = std::make_unique(RVOpcodes::LI); + li_size->addOperand(std::make_unique(size_vreg)); + li_size->addOperand(std::make_unique(stride)); + CurMBB->addInstruction(std::move(li_size)); + + auto mul = std::make_unique(RVOpcodes::MULW); + mul->addOperand(std::make_unique(offset_vreg)); + mul->addOperand(std::make_unique(index_vreg)); + mul->addOperand(std::make_unique(size_vreg)); + CurMBB->addInstruction(std::move(mul)); + } + + // 将计算出的偏移量累加到当前地址上 + auto add = std::make_unique(RVOpcodes::ADD); + add->addOperand(std::make_unique(current_addr_vreg)); + add->addOperand(std::make_unique(current_addr_vreg)); + add->addOperand(std::make_unique(offset_vreg)); + CurMBB->addInstruction(std::move(add)); + } + + // --- 为下一次迭代更新类型:深入一层 --- if (auto array_type = current_type->as()) { - // 情况一:当前正在索引一个数组类型,例如 a[i] 中的 a - element_type = array_type->getElementType(); - element_size = getTypeSizeInBytes(element_type); + current_type = array_type->getElementType(); } else if (auto ptr_type = current_type->as()) { - // 情况二:当前正在索引一个指针类型,例如函数参数 p[] 经过退化后的 p - element_type = ptr_type->getBaseType(); - element_size = getTypeSizeInBytes(element_type); - } else { - // 如果既不是数组也不是指针,那么无法进行索引 - assert(false && "GEP can only index into an array or pointer type."); + // 这种情况不应该在第二次迭代后发生,但为了逻辑健壮性保留 + current_type = ptr_type->getBaseType(); } - - // 更新current_type,为下一次迭代做准备(处理多维数组) - current_type = element_type; - - // --- 计算偏移量: offset = index * element_size --- - auto offset_vreg = getNewVReg(); - auto index_vreg = getVReg(index_node->value); - - // 如果索引本身是个常量,需要先用LI指令加载到虚拟寄存器中 - if (auto const_index = dynamic_cast(index_node->value)) { - auto li = std::make_unique(RVOpcodes::LI); - li->addOperand(std::make_unique(index_vreg)); - li->addOperand(std::make_unique(const_index->getInt())); - CurMBB->addInstruction(std::move(li)); - } - - // 将元素大小加载到临时寄存器 - auto size_vreg = getNewVReg(); - auto li_size = std::make_unique(RVOpcodes::LI); - li_size->addOperand(std::make_unique(size_vreg)); - li_size->addOperand(std::make_unique(element_size)); - CurMBB->addInstruction(std::move(li_size)); - - // 执行乘法: offset_vreg = index_vreg * size_vreg - // SysY中数组索引计算用32位乘法足够 - auto mul = std::make_unique(RVOpcodes::MULW); - mul->addOperand(std::make_unique(offset_vreg)); - mul->addOperand(std::make_unique(index_vreg)); - mul->addOperand(std::make_unique(size_vreg)); - CurMBB->addInstruction(std::move(mul)); - - // 累加地址: current_addr_vreg = current_addr_vreg + offset_vreg - // 指针地址是64位的,用ADD指令 - auto add = std::make_unique(RVOpcodes::ADD); - add->addOperand(std::make_unique(current_addr_vreg)); - add->addOperand(std::make_unique(current_addr_vreg)); - add->addOperand(std::make_unique(offset_vreg)); - CurMBB->addInstruction(std::move(add)); + // 如果`current_type`已经是i32等基本类型,它会保持不变, + // 但下一次循环如果还有索引,`getTypeSizeInBytes(i32)`仍然能正确计算步长。 } - // --- Step 3: 将最终地址存入GEP的目标vreg --- + // --- Step 3: 将最终计算出的地址存入GEP的目标虚拟寄存器 (保持不变) --- auto final_mv = std::make_unique(RVOpcodes::MV); final_mv->addOperand(std::make_unique(result_vreg)); final_mv->addOperand(std::make_unique(current_addr_vreg)); diff --git a/src/include/RISCv64Passes.h b/src/include/RISCv64Passes.h index 7205b10..d2da152 100644 --- a/src/include/RISCv64Passes.h +++ b/src/include/RISCv64Passes.h @@ -6,13 +6,13 @@ namespace sysy { /** - * @class Pass + * @class BackendPass * @brief 所有优化Pass的抽象基类 (可选,但推荐) * * 定义一个通用的接口,所有优化都应该实现它。 */ -class Pass { +class BackendPass { public: - virtual ~Pass() = default; + virtual ~BackendPass() = default; virtual void runOnMachineFunction(MachineFunction* mfunc) = 0; }; @@ -25,7 +25,7 @@ public: * * 在虚拟寄存器上进行操作,此时调度自由度最大, * 主要目标是隐藏指令延迟,提高流水线效率。 */ -class PreRA_Scheduler : public Pass { +class PreRA_Scheduler : public BackendPass { public: void runOnMachineFunction(MachineFunction* mfunc) override; }; @@ -39,7 +39,7 @@ public: * * 在已分配物理寄存器的指令流上,通过一个小的滑动窗口来查找 * 并替换掉一些冗余或低效的指令模式。 */ -class PeepholeOptimizer : public Pass { +class PeepholeOptimizer : public BackendPass { public: void runOnMachineFunction(MachineFunction* mfunc) override; }; @@ -50,7 +50,7 @@ public: * * 主要目标是优化寄存器分配器插入的spill/fill代码(lw/sw), * 尝试将加载指令提前,以隐藏其访存延迟。 */ -class PostRA_Scheduler : public Pass { +class PostRA_Scheduler : public BackendPass { public: void runOnMachineFunction(MachineFunction* mfunc) override; }; From 12f63a0bf59a4bf964f475f77bea5009251ec13b Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Fri, 25 Jul 2025 12:35:35 +0800 Subject: [PATCH 32/35] =?UTF-8?q?[midend]=E8=A7=A3=E5=86=B3=E6=A0=87?= =?UTF-8?q?=E7=AD=BE=E9=87=8D=E5=90=8D=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SysYIRGenerator.cpp | 2 +- src/SysYIRPrinter.cpp | 4 +--- src/include/IR.h | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 09b1214..9f0f012 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -408,7 +408,7 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){ // 在处理函数体之前,创建一个新的基本块作为函数体的实际入口 // 这样 entryBB 就可以在完成初始化后跳转到这里 - BasicBlock* funcBodyEntry = function->addBasicBlock("funcBodyEntry"); + BasicBlock* funcBodyEntry = function->addBasicBlock("funcBodyEntry_" + name); // 从 entryBB 无条件跳转到 funcBodyEntry builder.createUncondBrInst(funcBodyEntry, {}); diff --git a/src/SysYIRPrinter.cpp b/src/SysYIRPrinter.cpp index 952d500..689fd50 100644 --- a/src/SysYIRPrinter.cpp +++ b/src/SysYIRPrinter.cpp @@ -150,9 +150,7 @@ void SysYPrinter::printFunction(Function *function) { for (const auto &blockIter : function->getBasicBlocks()) { // Basic block label BasicBlock* blockPtr = blockIter.get(); - if (blockPtr == function->getEntryBlock()) { - std::cout << "entry:" << std::endl; - } else if (!blockPtr->getName().empty()) { + if (!blockPtr->getName().empty()) { std::cout << blockPtr->getName() << ":" << std::endl; } diff --git a/src/include/IR.h b/src/include/IR.h index 1e64582..99bf003 100644 --- a/src/include/IR.h +++ b/src/include/IR.h @@ -1248,7 +1248,7 @@ class Function : public Value { friend class Module; protected: Function(Module *parent, Type *type, const std::string &name) : Value(type, name), parent(parent) { - blocks.emplace_back(new BasicBlock(this)); + blocks.emplace_back(new BasicBlock(this, "entry_" + name)); ///< 创建一个入口基本块 } public: From e2c97fd1718e72213ec67e7961738749b2979b1f Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Fri, 25 Jul 2025 16:33:18 +0800 Subject: [PATCH 33/35] =?UTF-8?q?[miden]DCE=E5=BC=95=E5=85=A5ctx=E9=81=BF?= =?UTF-8?q?=E5=85=8D=E9=87=8D=E5=A4=8D=E8=BF=90=E8=A1=8C=E9=81=8D=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E7=9A=84=E7=8A=B6=E6=80=81=E6=B1=A1=E6=9F=93=E3=80=82?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=A4=A9=E7=84=B6=E6=B4=BB=E8=B7=83=E5=88=A4?= =?UTF-8?q?=E6=96=AD=E6=9D=A1=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/DCE.cpp | 83 +++++++++++++++++++++++++++++++++++---------- src/include/DCE.h | 85 ++++++++++++++++++++++++++++------------------- 2 files changed, 117 insertions(+), 51 deletions(-) diff --git a/src/DCE.cpp b/src/DCE.cpp index e0e9519..db5d966 100644 --- a/src/DCE.cpp +++ b/src/DCE.cpp @@ -1,48 +1,57 @@ -#include "DCE.h" -#include "IR.h" -#include "SysYIROptUtils.h" -#include -#include +#include "DCE.h" // 包含DCE遍的头文件 +#include "IR.h" // 包含IR相关的定义 +#include "SysYIROptUtils.h" // 包含SysY IR优化工具类的定义 +#include // 用于断言 +#include // 用于调试输出 +#include // 包含set,虽然DCEContext内部用unordered_set,但这里保留 namespace sysy { // DCE 遍的静态 ID void *DCE::ID = (void *)&DCE::ID; -// DCE 遍的 runOnFunction 方法实现 -bool DCE::runOnFunction(Function *func, AnalysisManager &AM) { +// ====================================================================== +// DCEContext 类的实现 +// 封装了 DCE 遍的核心逻辑和状态,确保每次函数优化运行时状态独立 +// ====================================================================== + +// DCEContext 的 run 方法实现 +void DCEContext::run(Function *func, AnalysisManager *AM, bool &changed) { + // 清空活跃指令集合,确保每次运行都是新的状态 alive_insts.clear(); - bool changed = false; + // 第一次遍历:扫描所有指令,识别“天然活跃”的指令并将其及其依赖标记为活跃 + // 使用 func->getBasicBlocks() 获取基本块列表,保留用户风格 auto basicBlocks = func->getBasicBlocks(); - for (auto &basicBlock : basicBlocks) { // 确保基本块有效 if (!basicBlock) continue; + // 使用 basicBlock->getInstructions() 获取指令列表,保留用户风格 for (auto &inst : basicBlock->getInstructions()) { // 确保指令有效 if (!inst) continue; + // 调用 DCEContext 自身的 isAlive 和 addAlive 方法 if (isAlive(inst.get())) { addAlive(inst.get()); } } } - // 第二遍:删除所有未被标记为活跃的指令。 + // 第二次遍历:删除所有未被标记为活跃的指令。 for (auto &basicBlock : basicBlocks) { if (!basicBlock) continue; // 使用传统的迭代器循环,并手动管理迭代器, - // 以便在删除元素后正确前进。 + // 以便在删除元素后正确前进。保留用户风格 for (auto instIter = basicBlock->getInstructions().begin(); instIter != basicBlock->getInstructions().end();) { auto &inst = *instIter; Instruction *currentInst = inst.get(); // 如果指令不在活跃集合中,则删除它。 // 分支和返回指令由 isAlive 处理,并会被保留。 if (alive_insts.count(currentInst) == 0) { - // 删除指令 + // 删除指令,保留用户风格的 SysYIROptUtils::usedelete 和 erase changed = true; // 标记 IR 已被修改 SysYIROptUtils::usedelete(currentInst); instIter = basicBlock->getInstructions().erase(instIter); // 删除后返回下一个迭代器 @@ -51,27 +60,26 @@ bool DCE::runOnFunction(Function *func, AnalysisManager &AM) { } } } - - return changed; } // 判断指令是否是“天然活跃”的实现 // 只有具有副作用的指令(如存储、函数调用、原子操作) // 和控制流指令(如分支、返回)是天然活跃的。 -bool DCE::isAlive(Instruction *inst) { +bool DCEContext::isAlive(Instruction *inst) { // TODO: 后续程序并发考虑原子操作 // 其结果不被其他指令使用的指令(例如 StoreInst, BranchInst, ReturnInst)。 // dynamic_cast(inst) 检查是否是函数调用指令, // 函数调用通常有副作用。 // 终止指令 (BranchInst, ReturnInst) 必须是活跃的,因为它控制了程序的执行流程。 + // 保留用户提供的 isAlive 逻辑 bool isBranchOrReturn = inst->isBranch() || inst->isReturn(); bool isCall = inst->isCall(); - bool isStoreOrMemset = inst->isStore() && inst->isMemset(); + bool isStoreOrMemset = inst->isStore() || inst->isMemset(); return isBranchOrReturn || isCall || isStoreOrMemset; } // 递归地将活跃指令及其依赖加入到 alive_insts 集合中 -void DCE::addAlive(Instruction *inst) { +void DCEContext::addAlive(Instruction *inst) { // 如果指令已经存在于活跃集合中,则无需重复处理 if (alive_insts.count(inst) > 0) { return; @@ -79,6 +87,7 @@ void DCE::addAlive(Instruction *inst) { // 将当前指令标记为活跃 alive_insts.insert(inst); // 遍历当前指令的所有操作数 + // 保留用户提供的 getOperands() 和 getValue() for (auto operand : inst->getOperands()) { // 如果操作数是一个指令(即它是一个值的定义), // 并且它还没有被标记为活跃 @@ -88,4 +97,44 @@ void DCE::addAlive(Instruction *inst) { } } +// ====================================================================== +// DCE Pass 类的实现 +// 主要负责与 PassManager 交互,创建 DCEContext 实例并运行优化 +// ====================================================================== + +// DCE 遍的 runOnFunction 方法实现 +bool DCE::runOnFunction(Function *func, AnalysisManager &AM) { + + DCEContext ctx; + bool changed = false; + ctx.run(func, &AM, changed); // 运行 DCE 优化 + + // 如果 IR 被修改,则使相关的分析结果失效 + if (changed) { + // DCE 会删除指令,这会影响数据流分析,尤其是活跃性分析。 + // 如果删除导致基本块变空,也可能间接影响 CFG 和支配树。 + // AM.invalidateAnalysis(&LivenessAnalysisPass::ID, func); // 活跃性分析失效 + // AM.invalidateAnalysis(&DominatorTreeAnalysisPass::ID, func); // 支配树分析可能失效 + // 其他所有依赖于数据流或 IR 结构的分析都可能失效。 + } + return changed; +} + +// 声明DCE遍的分析依赖和失效信息 +void DCE::getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const { + // DCE不依赖特定的分析结果,它通过遍历和副作用判断来工作。 + + // DCE会删除指令,这会影响许多分析结果。 + // 至少,它会影响活跃性分析、支配树、控制流图(如果删除导致基本块为空并被合并)。 + // 假设存在LivenessAnalysisPass和DominatorTreeAnalysisPass + // analysisInvalidations.insert(&LivenessAnalysisPass::ID); + // analysisInvalidations.insert(&DominatorTreeAnalysisPass::ID); + // 任何改变IR结构的优化,都可能导致通用分析(如活跃性、支配树、循环信息)失效。 + // 最保守的做法是使所有函数粒度的分析失效,或者只声明你明确知道会受影响的分析。 + // 考虑到这个DCE仅删除指令,如果它不删除基本块,CFG可能不变,但数据流分析会失效。 + // 对于更激进的DCE(如ADCE),CFG也会改变。 + // 这里我们假设它主要影响数据流分析,并且可能间接影响CFG相关分析。 + // 如果有SideEffectInfo,它也可能被修改,但通常SideEffectInfo是静态的,不因DCE而变。 +} + } // namespace sysy diff --git a/src/include/DCE.h b/src/include/DCE.h index 8be40bd..41bc223 100644 --- a/src/include/DCE.h +++ b/src/include/DCE.h @@ -1,46 +1,63 @@ #pragma once -#include "IR.h" // 包含IR相关的定义,如Instruction, Function, BasicBlock等 -#include "IRBuilder.h" // 包含IR构建器的定义 -#include "SysYIROptUtils.h" // 包含SysY IR优化工具类的 -#include "Liveness.h" -#include "Dom.h" // 包含支配树的定义 -#include "Pass.h" // 包含Pass的基类定义 -#include // 用于存储活跃指令 +#include "Pass.h" +#include "IR.h" +#include "SysYIROptUtils.h" +#include "Dom.h" +#include +#include namespace sysy { +// 前向声明分析结果类,确保在需要时可以引用 +// class DominatorTreeAnalysisResult; // Pass.h 中已包含,这里不再需要 +class SideEffectInfoAnalysisResult; // 假设有副作用分析结果类 + +// DCEContext 类,用于封装DCE的内部逻辑和状态 +// 这样可以避免静态变量在多线程或多次运行时的冲突,并保持代码的模块化 +class DCEContext { +public: + // 运行DCE的主要方法 + // func: 当前要优化的函数 + // tp: 分析管理器,用于获取其他分析结果(如果需要) + void run(Function* func, AnalysisManager* AM, bool &changed); + +private: + // 存储活跃指令的集合 + std::unordered_set alive_insts; + + // 判断指令是否是“天然活跃”的(即总是保留的) + // inst: 要检查的指令 + // 返回值: 如果指令是天然活跃的,则为true,否则为false + bool isAlive(Instruction* inst); + + // 递归地将活跃指令及其依赖加入到 alive_insts 集合中 + // inst: 要标记为活跃的指令 + void addAlive(Instruction* inst); +}; + // DCE 优化遍类,继承自 OptimizationPass class DCE : public OptimizationPass { - private: - std::unordered_set alive_insts; - // 判断指令是否是“天然活跃”的(即总是保留的) - // inst: 要检查的指令 - // 返回值: 如果指令是天然活跃的,则为true,否则为false - bool isAlive(Instruction *inst); - // 递归地将活跃指令及其依赖加入到 alive_insts 集合中 - // inst: 要标记为活跃的指令 - void addAlive(Instruction *inst); public: - static void *ID; - DCE() : OptimizationPass("DCE", Granularity::Function) {} - bool runOnFunction(Function *func, AnalysisManager &AM) override; - void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override{ - // DCE不依赖特定的分析结果,它通过遍历和副作用判断来工作。 + // 构造函数 + DCE() : OptimizationPass("DCE", Granularity::Function) {} - // DCE会删除指令,这会影响许多分析结果。 - // 至少,它会影响活跃性分析、支配树、控制流图(如果删除导致基本块为空并被合并)。 - // 假设存在LivenessAnalysisPass和DominatorTreeAnalysisPass - // analysisInvalidations.insert(&LivenessAnalysisPass::ID); - // analysisInvalidations.insert(&DominatorTreeAnalysisPass::ID); - // 任何改变IR结构的优化,都可能导致通用分析(如活跃性、支配树、循环信息)失效。 - // 最保守的做法是使所有函数粒度的分析失效,或者只声明你明确知道会受影响的分析。 - // 考虑到这个DCE仅删除指令,如果它不删除基本块,CFG可能不变,但数据流分析会失效。 - // 对于更激进的DCE(如ADCE),CFG也会改变。 - // 这里我们假设它主要影响数据流分析,并且可能间接影响CFG相关分析。 - // 如果有SideEffectInfo,它也可能被修改,但通常SideEffectInfo是静态的,不因DCE而变。 - } - void *getPassID() const override { return &ID; } + // 静态成员,作为该遍的唯一ID + static void *ID; + + // 运行在函数上的优化逻辑 + // F: 当前要优化的函数 + // AM: 分析管理器,用于获取或使分析结果失效 + // 返回值: 如果IR被修改,则为true,否则为false + bool runOnFunction(Function *F, AnalysisManager& AM) override; + + // 声明该遍的分析依赖和失效信息 + // analysisDependencies: 该遍运行前需要哪些分析结果 + // analysisInvalidations: 该遍运行后会使哪些分析结果失效 + void getAnalysisUsage(std::set &analysisDependencies, std::set &analysisInvalidations) const override; + + // Pass 基类中的纯虚函数,必须实现 + void *getPassID() const override { return &ID; } }; } // namespace sysy \ No newline at end of file From 04c5c6b44d813c8b3d5fe19d2abd06da4a8f4996 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Fri, 25 Jul 2025 20:00:41 +0800 Subject: [PATCH 34/35] =?UTF-8?q?[midend-mem2reg]=E4=BF=AE=E5=A4=8Dassigns?= =?UTF-8?q?tmt=E5=AF=B9lvalue=E7=9A=84=E9=94=99=E8=AF=AF=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=EF=BC=88lvaue=E4=BC=9A=E8=A2=ABexp=E8=A7=A3=E9=87=8A=E4=B8=BA?= =?UTF-8?q?=E5=80=BC=EF=BC=8C=E8=80=8C=E8=A2=ABassign=E8=A7=A3=E9=87=8A?= =?UTF-8?q?=E4=B8=BA=E5=9C=B0=E5=9D=80=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/IR.cpp | 6 +-- src/SysYIRGenerator.cpp | 87 +++++++++++++++++++++++++++++------------ src/include/IR.h | 9 +++-- 3 files changed, 69 insertions(+), 33 deletions(-) diff --git a/src/IR.cpp b/src/IR.cpp index da4292b..faa9aed 100644 --- a/src/IR.cpp +++ b/src/IR.cpp @@ -647,7 +647,7 @@ Function * CallInst::getCallee() const { return dynamic_cast(getOper /** * 获取变量指针 */ -auto SymbolTable::getVariable(const std::string &name) const -> User * { +auto SymbolTable::getVariable(const std::string &name) const -> Value * { auto node = curNode; while (node != nullptr) { auto iter = node->varList.find(name); @@ -662,8 +662,8 @@ auto SymbolTable::getVariable(const std::string &name) const -> User * { /** * 添加变量到符号表 */ -auto SymbolTable::addVariable(const std::string &name, User *variable) -> User * { - User *result = nullptr; +auto SymbolTable::addVariable(const std::string &name, Value *variable) -> Value * { + Value *result = nullptr; if (curNode != nullptr) { std::stringstream ss; auto iter = variableIndex.find(name); diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 9f0f012..08332ec 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -450,44 +450,79 @@ std::any SysYIRGenerator::visitBlockStmt(SysYParser::BlockStmtContext *ctx) { std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) { auto lVal = ctx->lValue(); std::string name = lVal->Ident()->getText(); - std::vector dims; - for (const auto &exp : lVal->exp()) { - dims.push_back(std::any_cast(visitExp(exp))); + Value* LValue = nullptr; + Value* variable = module->getVariable(name); // 左值 + + vector indices; + if (lVal->exp().size() > 0) { + // 如果有下标,访问表达式获取下标值 + for (const auto &exp : lVal->exp()) { + Value* indexValue = std::any_cast(visitExp(exp)); + indices.push_back(indexValue); + } + } + if (indices.empty()) { + // variable 本身就是指向标量的指针 (e.g., int* %a) + if (dynamic_cast(variable) || dynamic_cast(variable)) { + LValue = variable; + } + } + else { + // 对于数组或多维数组的左值处理 + // 需要获取 GEP 地址 + Value* gepBasePointer = nullptr; + std::vector gepIndices; + if (AllocaInst *alloc = dynamic_cast(variable)) { + Type* allocatedType = alloc->getType()->as()->getBaseType(); + if (allocatedType->isPointer()) { + gepBasePointer = builder.createLoadInst(alloc); + gepIndices = indices; + } else { + gepBasePointer = alloc; + gepIndices.push_back(ConstantInteger::get(0)); + gepIndices.insert(gepIndices.end(), indices.begin(), indices.end()); + } + } else if (GlobalValue *glob = dynamic_cast(variable)) { + // 情况 B: 全局变量 (GlobalValue) + gepBasePointer = glob; + gepIndices.push_back(ConstantInteger::get(0)); + gepIndices.insert(gepIndices.end(), indices.begin(), indices.end()); + } else if (ConstantVariable *constV = dynamic_cast(variable)) { + gepBasePointer = constV; + gepIndices.push_back(ConstantInteger::get(0)); + gepIndices.insert(gepIndices.end(), indices.begin(), indices.end()); + } + // 左值为地址 + LValue = getGEPAddressInst(gepBasePointer, gepIndices); } - - auto variable = module->getVariable(name); // 获取 AllocaInst 或 GlobalValue - Value* value = std::any_cast(visitExp(ctx->exp())); // 右值 - Type* targetElementType = variable->getType(); // 从基指针指向的类型开始 + Value* RValue = std::any_cast(visitExp(ctx->exp())); // 右值 - //根据 dims 确定最终元素的类型 - targetElementType = builder.getIndexedType(targetElementType, dims); + // 先推断 LValue 的类型 + // 如果 LValue 是指向数组的指针,则需要根据 indices 获取正确的类型 + // 如果 LValue 是标量,则直接使用其类型 + // 注意:LValue 的类型可能是指向数组的指针 (e.g., int(*)[3]) 或者指向标量的指针 (e.g., int*) 也能推断 + Type* LType = builder.getIndexedType(variable->getType(), indices); + Type* RType = RValue->getType(); - // 左值右值类型不同处理:根据最终元素类型进行转换 - if (targetElementType != value->getType()) { - ConstantValue * constValue = dynamic_cast(value); + if (LType != RType) { + ConstantValue * constValue = dynamic_cast(RValue); if (constValue != nullptr) { - if (targetElementType == Type::getFloatType()) { - value = ConstantFloating::get(static_cast(constValue->getFloat())); + if (LType == Type::getFloatType()) { + RValue = ConstantFloating::get(static_cast(constValue->getFloat())); } else { // 假设如果不是浮点型,就是整型 - value = ConstantInteger::get(static_cast(constValue->getInt())); + RValue = ConstantInteger::get(static_cast(constValue->getInt())); } } else { - if (targetElementType == Type::getFloatType()) { - value = builder.createIToFInst(value); + if (LType == Type::getFloatType()) { + RValue = builder.createIToFInst(RValue); } else { // 假设如果不是浮点型,就是整型 - value = builder.createFtoIInst(value); + RValue = builder.createFtoIInst(RValue); } } } - // 计算目标地址:如果 dims 为空,就是变量本身地址;否则通过 GEP 计算 - Value* targetAddress = variable; - if (!dims.empty()) { - targetAddress = getGEPAddressInst(variable, dims); - } - - builder.createStoreInst(value, targetAddress); + builder.createStoreInst(RValue, LValue); return std::any(); } @@ -711,7 +746,7 @@ unsigned SysYIRGenerator::countArrayDimensions(Type* type) { std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) { std::string name = ctx->Ident()->getText(); - User* variable = module->getVariable(name); + Value* variable = module->getVariable(name); Value* value = nullptr; diff --git a/src/include/IR.h b/src/include/IR.h index 99bf003..8f0103d 100644 --- a/src/include/IR.h +++ b/src/include/IR.h @@ -521,6 +521,7 @@ public: Function* getParent() const { return parent; } void setParent(Function *func) { parent = func; } inst_list& getInstructions() { return instructions; } + auto getInstructions_Range() const { return make_range(instructions); } arg_list& getArguments() { return arguments; } block_list& getPredecessors() { return predecessors; } void clearPredecessors() { predecessors.clear(); } @@ -1404,7 +1405,7 @@ class ConstantVariable : public User { using SymbolTableNode = struct SymbolTableNode { SymbolTableNode *pNode; ///< 父节点 std::vector children; ///< 子节点列表 - std::map varList; ///< 变量列表 + std::map varList; ///< 变量列表 }; @@ -1419,8 +1420,8 @@ class SymbolTable { public: SymbolTable() = default; - User* getVariable(const std::string &name) const; ///< 根据名字name以及当前作用域获取变量 - User* addVariable(const std::string &name, User *variable); ///< 添加变量 + Value* getVariable(const std::string &name) const; ///< 根据名字name以及当前作用域获取变量 + Value* addVariable(const std::string &name, Value *variable); ///< 添加变量 std::vector>& getGlobals(); ///< 获取全局变量列表 const std::vector>& getConsts() const; ///< 获取常量列表 void enterNewScope(); ///< 进入新的作用域 @@ -1482,7 +1483,7 @@ class Module { void addVariable(const std::string &name, AllocaInst *variable) { variableTable.addVariable(name, variable); } ///< 添加变量 - User* getVariable(const std::string &name) { + Value* getVariable(const std::string &name) { return variableTable.getVariable(name); } ///< 根据名字name和当前作用域获取变量 Function* getFunction(const std::string &name) const { From 14fb3dbe48f50287366742b6c7ffd9825e8736c7 Mon Sep 17 00:00:00 2001 From: Lixuanwang Date: Fri, 25 Jul 2025 22:23:26 +0800 Subject: [PATCH 35/35] =?UTF-8?q?[midend][backend-GEP]=E8=A7=A3=E5=86=B3?= =?UTF-8?q?=E4=BA=86=E4=B8=80=E4=B8=AA32/64=E4=BD=8D=E5=AE=BD=E7=9A=84?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/RISCv64AsmPrinter.cpp | 48 ++++++-- src/RISCv64Backend.cpp | 2 +- src/RISCv64ISel.cpp | 83 ++++++++------ src/RISCv64RegAlloc.cpp | 201 ++++++++++++++++++++++++++++------ src/include/RISCv64LLIR.h | 8 +- src/include/RISCv64RegAlloc.h | 7 ++ 6 files changed, 272 insertions(+), 77 deletions(-) diff --git a/src/RISCv64AsmPrinter.cpp b/src/RISCv64AsmPrinter.cpp index 1995024..65dbe5d 100644 --- a/src/RISCv64AsmPrinter.cpp +++ b/src/RISCv64AsmPrinter.cpp @@ -31,6 +31,8 @@ void RISCv64AsmPrinter::run(std::ostream& os, bool debug) { } } +// 在 RISCv64AsmPrinter.cpp 文件中 + void RISCv64AsmPrinter::printPrologue() { StackFrameInfo& frame_info = MFunc->getFrameInfo(); // 序言需要为保存ra和s0预留16字节 @@ -44,12 +46,16 @@ void RISCv64AsmPrinter::printPrologue() { *OS << " sd s0, " << (aligned_stack_size - 16) << "(sp)\n"; *OS << " addi s0, sp, " << aligned_stack_size << "\n"; } - - // 忠实还原保存函数入口参数的逻辑 + + // 为函数参数分配寄存器 Function* F = MFunc->getFunc(); if (F && F->getEntryBlock()) { int arg_idx = 0; RISCv64ISel* isel = MFunc->getISel(); + + // 获取函数所有参数的类型列表 + auto param_types = F->getParamTypes(); + for (AllocaInst* alloca_for_param : F->getEntryBlock()->getArguments()) { if (arg_idx >= 8) break; @@ -57,7 +63,25 @@ void RISCv64AsmPrinter::printPrologue() { if (frame_info.alloca_offsets.count(vreg)) { int offset = frame_info.alloca_offsets.at(vreg); auto arg_reg = static_cast(static_cast(PhysicalReg::A0) + arg_idx); - *OS << " sw " << regToString(arg_reg) << ", " << offset << "(s0)\n"; + + // 1. 获取当前参数的真实类型 + // 注意:F->getParamTypes() 返回的是一个 range-based view,需要转换为vector或直接使用 + Type* current_param_type = nullptr; + int temp_idx = 0; + for(auto p_type : param_types) { + if (temp_idx == arg_idx) { + current_param_type = p_type; + break; + } + temp_idx++; + } + assert(current_param_type && "Could not find parameter type."); + + // 2. 根据类型决定使用 "sw" 还是 "sd" + const char* store_op = current_param_type->isPointer() ? "sd" : "sw"; + + // 3. 打印正确的存储指令 + *OS << " " << store_op << " " << regToString(arg_reg) << ", " << offset << "(s0)\n"; } arg_idx++; } @@ -133,17 +157,23 @@ void RISCv64AsmPrinter::printInstruction(MachineInstr* instr, bool debug) { case RVOpcodes::SNEZ: *OS << "snez "; break; case RVOpcodes::CALL: *OS << "call "; break; case RVOpcodes::LABEL: - // printOperand(instr->getOperands()[0].get()); - // *OS << ":"; break; - case RVOpcodes::FRAME_LOAD: + case RVOpcodes::FRAME_LOAD_W: // It should have been eliminated by RegAlloc if (!debug) throw std::runtime_error("FRAME pseudo-instruction not eliminated before AsmPrinter"); - *OS << "frame_load "; break; - case RVOpcodes::FRAME_STORE: + *OS << "frame_load_w "; break; + case RVOpcodes::FRAME_LOAD_D: // It should have been eliminated by RegAlloc if (!debug) throw std::runtime_error("FRAME pseudo-instruction not eliminated before AsmPrinter"); - *OS << "frame_store "; break; + *OS << "frame_load_d "; break; + case RVOpcodes::FRAME_STORE_W: + // It should have been eliminated by RegAlloc + if (!debug) throw std::runtime_error("FRAME pseudo-instruction not eliminated before AsmPrinter"); + *OS << "frame_store_w "; break; + case RVOpcodes::FRAME_STORE_D: + // It should have been eliminated by RegAlloc + if (!debug) throw std::runtime_error("FRAME pseudo-instruction not eliminated before AsmPrinter"); + *OS << "frame_store_d "; break; case RVOpcodes::FRAME_ADDR: // It should have been eliminated by RegAlloc if (!debug) throw std::runtime_error("FRAME pseudo-instruction not eliminated before AsmPrinter"); diff --git a/src/RISCv64Backend.cpp b/src/RISCv64Backend.cpp index 4f45fde..c429a63 100644 --- a/src/RISCv64Backend.cpp +++ b/src/RISCv64Backend.cpp @@ -85,7 +85,7 @@ std::string RISCv64CodeGen::function_gen(Function* func) { std::stringstream ss; RISCv64AsmPrinter printer(mfunc.get()); printer.run(ss); - if (DEBUG) ss << ss1.str(); // 将指令选择阶段的结果也包含在最终输出中 + if (DEBUG) ss << "\n" << ss1.str(); // 将指令选择阶段的结果也包含在最终输出中 return ss.str(); } diff --git a/src/RISCv64ISel.cpp b/src/RISCv64ISel.cpp index b6ed7df..db07908 100644 --- a/src/RISCv64ISel.cpp +++ b/src/RISCv64ISel.cpp @@ -149,38 +149,51 @@ void RISCv64ISel::selectNode(DAGNode* node) { auto dest_vreg = getVReg(node->value); Value* ptr_val = node->operands[0]->value; - // [V1设计保留] 对于从栈变量加载,继续使用伪指令 FRAME_LOAD。 - // 这种设计将栈帧布局的具体计算推迟到后续的 `eliminateFrameIndices` 阶段,保持了模块化。 + // --- 修改点 --- + // 1. 获取加载结果的类型 (即这个LOAD指令自身的类型) + Type* loaded_type = node->value->getType(); + + // 2. 根据类型选择正确的伪指令或真实指令操作码 + RVOpcodes frame_opcode = loaded_type->isPointer() ? RVOpcodes::FRAME_LOAD_D : RVOpcodes::FRAME_LOAD_W; + RVOpcodes real_opcode = loaded_type->isPointer() ? RVOpcodes::LD : RVOpcodes::LW; + + if (auto alloca = dynamic_cast(ptr_val)) { - auto instr = std::make_unique(RVOpcodes::FRAME_LOAD); + // 3. 创建使用新的、区分宽度的伪指令 + auto instr = std::make_unique(frame_opcode); instr->addOperand(std::make_unique(dest_vreg)); instr->addOperand(std::make_unique(getVReg(alloca))); CurMBB->addInstruction(std::move(instr)); + } else if (auto global = dynamic_cast(ptr_val)) { - // 对于全局变量,先用 la 加载其地址,再用 lw 加载其值。 + // 对于全局变量,先用 la 加载其地址 auto addr_vreg = getNewVReg(); auto la = std::make_unique(RVOpcodes::LA); la->addOperand(std::make_unique(addr_vreg)); la->addOperand(std::make_unique(global->getName())); CurMBB->addInstruction(std::move(la)); - auto lw = std::make_unique(RVOpcodes::LW); - lw->addOperand(std::make_unique(dest_vreg)); - lw->addOperand(std::make_unique( + // 然后根据类型使用 ld 或 lw 加载其值 + auto load_instr = std::make_unique(real_opcode); + load_instr->addOperand(std::make_unique(dest_vreg)); + load_instr->addOperand(std::make_unique( std::make_unique(addr_vreg), std::make_unique(0) )); - CurMBB->addInstruction(std::move(lw)); + CurMBB->addInstruction(std::move(load_instr)); + } else { - // 对于已经在虚拟寄存器中的指针地址,直接通过该地址加载。 + // 对于已经在虚拟寄存器中的指针地址,直接通过该地址加载 auto ptr_vreg = getVReg(ptr_val); - auto lw = std::make_unique(RVOpcodes::LW); - lw->addOperand(std::make_unique(dest_vreg)); - lw->addOperand(std::make_unique( + + // 根据类型使用 ld 或 lw + auto load_instr = std::make_unique(real_opcode); + load_instr->addOperand(std::make_unique(dest_vreg)); + load_instr->addOperand(std::make_unique( std::make_unique(ptr_vreg), std::make_unique(0) )); - CurMBB->addInstruction(std::move(lw)); + CurMBB->addInstruction(std::move(load_instr)); } break; } @@ -189,13 +202,8 @@ void RISCv64ISel::selectNode(DAGNode* node) { Value* val_to_store = node->operands[0]->value; Value* ptr_val = node->operands[1]->value; - // [V2优点] 在STORE节点内部负责加载作为源的常量。 - // 如果要存储的值是一个常量,就在这里生成 `li` 指令加载它。 + // 如果要存储的值是一个常量,就在这里生成 `li` 指令加载它 if (auto val_const = dynamic_cast(val_to_store)) { - if (DEBUG) { - std::cout << "[DEBUG] selectNode-BINARY: Found constant operand with value " << val_const->getInt() - << ". Generating LI instruction." << std::endl; - } auto li = std::make_unique(RVOpcodes::LI); li->addOperand(std::make_unique(getVReg(val_const))); li->addOperand(std::make_unique(val_const->getInt())); @@ -203,37 +211,50 @@ void RISCv64ISel::selectNode(DAGNode* node) { } auto val_vreg = getVReg(val_to_store); - // [V1设计保留] 同样,对于向栈变量的存储,使用 FRAME_STORE 伪指令。 + // --- 修改点 --- + // 1. 获取被存储的值的类型 + Type* stored_type = val_to_store->getType(); + + // 2. 根据类型选择正确的伪指令或真实指令操作码 + RVOpcodes frame_opcode = stored_type->isPointer() ? RVOpcodes::FRAME_STORE_D : RVOpcodes::FRAME_STORE_W; + RVOpcodes real_opcode = stored_type->isPointer() ? RVOpcodes::SD : RVOpcodes::SW; + if (auto alloca = dynamic_cast(ptr_val)) { - auto instr = std::make_unique(RVOpcodes::FRAME_STORE); + // 3. 创建使用新的、区分宽度的伪指令 + auto instr = std::make_unique(frame_opcode); instr->addOperand(std::make_unique(val_vreg)); instr->addOperand(std::make_unique(getVReg(alloca))); CurMBB->addInstruction(std::move(instr)); + } else if (auto global = dynamic_cast(ptr_val)) { - // 向全局变量存储。 + // 向全局变量存储 auto addr_vreg = getNewVReg(); auto la = std::make_unique(RVOpcodes::LA); la->addOperand(std::make_unique(addr_vreg)); la->addOperand(std::make_unique(global->getName())); CurMBB->addInstruction(std::move(la)); - auto sw = std::make_unique(RVOpcodes::SW); - sw->addOperand(std::make_unique(val_vreg)); - sw->addOperand(std::make_unique( + // 根据类型使用 sd 或 sw + auto store_instr = std::make_unique(real_opcode); + store_instr->addOperand(std::make_unique(val_vreg)); + store_instr->addOperand(std::make_unique( std::make_unique(addr_vreg), std::make_unique(0) )); - CurMBB->addInstruction(std::move(sw)); + CurMBB->addInstruction(std::move(store_instr)); + } else { - // 向一个指针(存储在虚拟寄存器中)指向的地址存储。 + // 向一个指针(存储在虚拟寄存器中)指向的地址存储 auto ptr_vreg = getVReg(ptr_val); - auto sw = std::make_unique(RVOpcodes::SW); - sw->addOperand(std::make_unique(val_vreg)); - sw->addOperand(std::make_unique( + + // 根据类型使用 sd 或 sw + auto store_instr = std::make_unique(real_opcode); + store_instr->addOperand(std::make_unique(val_vreg)); + store_instr->addOperand(std::make_unique( std::make_unique(ptr_vreg), std::make_unique(0) )); - CurMBB->addInstruction(std::move(sw)); + CurMBB->addInstruction(std::move(store_instr)); } break; } diff --git a/src/RISCv64RegAlloc.cpp b/src/RISCv64RegAlloc.cpp index 0c0a4a3..5edcf98 100644 --- a/src/RISCv64RegAlloc.cpp +++ b/src/RISCv64RegAlloc.cpp @@ -27,24 +27,26 @@ void RISCv64RegAlloc::run() { void RISCv64RegAlloc::eliminateFrameIndices() { StackFrameInfo& frame_info = MFunc->getFrameInfo(); - int current_offset = 20; // 这里写20是为了在$s0和第一个变量之间留出20字节的安全区, - // 以防止一些函数调用方面的恶性bug。 + // 初始偏移量,为保存ra和s0留出空间。可以根据你的函数序言调整。 + // 假设序言是 addi sp, sp, -stack_size; sd ra, stack_size-8(sp); sd s0, stack_size-16(sp); + int current_offset = 16; + Function* F = MFunc->getFunc(); RISCv64ISel* isel = MFunc->getISel(); + // --- MODIFICATION START: 动态计算栈帧大小 --- + // 遍历AllocaInst来计算局部变量所需的总空间 for (auto& bb : F->getBasicBlocks()) { for (auto& inst : bb->getInstructions()) { if (auto alloca = dynamic_cast(inst.get())) { - int size = 4; - if (!alloca->getDims().empty()) { - int num_elements = 1; - for (const auto& dim_use : alloca->getDims()) { - if (auto const_dim = dynamic_cast(dim_use->getValue())) { - num_elements *= const_dim->getInt(); - } - } - size *= num_elements; - } + // 获取Alloca指令指向的类型 (例如 alloca i32* 中,获取 i32) + Type* allocated_type = alloca->getType()->as()->getBaseType(); + int size = getTypeSizeInBytes(allocated_type); + + // RISC-V要求栈地址8字节对齐 + size = (size + 7) & ~7; + if (size == 0) size = 8; // 至少分配8字节 + current_offset += size; unsigned alloca_vreg = isel->getVReg(alloca); frame_info.alloca_offsets[alloca_vreg] = -current_offset; @@ -52,50 +54,66 @@ void RISCv64RegAlloc::eliminateFrameIndices() { } } frame_info.locals_size = current_offset; + // --- MODIFICATION END --- + // 遍历所有机器指令,将伪指令展开为真实指令 for (auto& mbb : MFunc->getBlocks()) { std::vector> new_instructions; for (auto& instr_ptr : mbb->getInstructions()) { - if (instr_ptr->getOpcode() == RVOpcodes::FRAME_LOAD) { + RVOpcodes opcode = instr_ptr->getOpcode(); + + // --- MODIFICATION START: 处理区分宽度的伪指令 --- + if (opcode == RVOpcodes::FRAME_LOAD_W || opcode == RVOpcodes::FRAME_LOAD_D) { + // 确定要生成的真实加载指令是 lw 还是 ld + RVOpcodes real_load_op = (opcode == RVOpcodes::FRAME_LOAD_W) ? RVOpcodes::LW : RVOpcodes::LD; + auto& operands = instr_ptr->getOperands(); unsigned dest_vreg = static_cast(operands[0].get())->getVRegNum(); unsigned alloca_vreg = static_cast(operands[1].get())->getVRegNum(); int offset = frame_info.alloca_offsets.at(alloca_vreg); auto addr_vreg = isel->getNewVReg(); + // 展开为: addi addr_vreg, s0, offset auto addi = std::make_unique(RVOpcodes::ADDI); addi->addOperand(std::make_unique(addr_vreg)); addi->addOperand(std::make_unique(PhysicalReg::S0)); addi->addOperand(std::make_unique(offset)); new_instructions.push_back(std::move(addi)); - auto lw = std::make_unique(RVOpcodes::LW); - lw->addOperand(std::make_unique(dest_vreg)); - lw->addOperand(std::make_unique( + // 展开为: lw/ld dest_vreg, 0(addr_vreg) + auto load_instr = std::make_unique(real_load_op); + load_instr->addOperand(std::make_unique(dest_vreg)); + load_instr->addOperand(std::make_unique( std::make_unique(addr_vreg), std::make_unique(0))); - new_instructions.push_back(std::move(lw)); + new_instructions.push_back(std::move(load_instr)); + + } else if (opcode == RVOpcodes::FRAME_STORE_W || opcode == RVOpcodes::FRAME_STORE_D) { + // 确定要生成的真实存储指令是 sw 还是 sd + RVOpcodes real_store_op = (opcode == RVOpcodes::FRAME_STORE_W) ? RVOpcodes::SW : RVOpcodes::SD; - } else if (instr_ptr->getOpcode() == RVOpcodes::FRAME_STORE) { auto& operands = instr_ptr->getOperands(); unsigned src_vreg = static_cast(operands[0].get())->getVRegNum(); unsigned alloca_vreg = static_cast(operands[1].get())->getVRegNum(); int offset = frame_info.alloca_offsets.at(alloca_vreg); auto addr_vreg = isel->getNewVReg(); + // 展开为: addi addr_vreg, s0, offset auto addi = std::make_unique(RVOpcodes::ADDI); addi->addOperand(std::make_unique(addr_vreg)); addi->addOperand(std::make_unique(PhysicalReg::S0)); addi->addOperand(std::make_unique(offset)); new_instructions.push_back(std::move(addi)); - auto sw = std::make_unique(RVOpcodes::SW); - sw->addOperand(std::make_unique(src_vreg)); - sw->addOperand(std::make_unique( + // 展开为: sw/sd src_vreg, 0(addr_vreg) + auto store_instr = std::make_unique(real_store_op); + store_instr->addOperand(std::make_unique(src_vreg)); + store_instr->addOperand(std::make_unique( std::make_unique(addr_vreg), std::make_unique(0))); - new_instructions.push_back(std::move(sw)); - } else if (instr_ptr->getOpcode() == RVOpcodes::FRAME_ADDR) { // [新] 处理FRAME_ADDR + new_instructions.push_back(std::move(store_instr)); + + } else if (instr_ptr->getOpcode() == RVOpcodes::FRAME_ADDR) { auto& operands = instr_ptr->getOperands(); unsigned dest_vreg = static_cast(operands[0].get())->getVRegNum(); unsigned alloca_vreg = static_cast(operands[1].get())->getVRegNum(); @@ -104,12 +122,13 @@ void RISCv64RegAlloc::eliminateFrameIndices() { // 将 `frame_addr rd, rs` 展开为 `addi rd, s0, offset` auto addi = std::make_unique(RVOpcodes::ADDI); addi->addOperand(std::make_unique(dest_vreg)); - addi->addOperand(std::make_unique(PhysicalReg::S0)); // 基地址是帧指针 s0 + addi->addOperand(std::make_unique(PhysicalReg::S0)); addi->addOperand(std::make_unique(offset)); new_instructions.push_back(std::move(addi)); } else { new_instructions.push_back(std::move(instr_ptr)); } + // --- MODIFICATION END --- } mbb->getInstructions() = std::move(new_instructions); } @@ -119,30 +138,72 @@ void RISCv64RegAlloc::getInstrUseDef(MachineInstr* instr, LiveSet& use, LiveSet& bool is_def = true; auto opcode = instr->getOpcode(); - // 预定义def和use规则 + // --- MODIFICATION START: 细化对指令的 use/def 定义 --- + + // 对于没有定义目标寄存器的指令,预先设置 is_def = false if (opcode == RVOpcodes::SW || opcode == RVOpcodes::SD || opcode == RVOpcodes::BEQ || opcode == RVOpcodes::BNE || opcode == RVOpcodes::BLT || opcode == RVOpcodes::BGE || + opcode == RVOpcodes::BLTU || opcode == RVOpcodes::BGEU || opcode == RVOpcodes::RET || opcode == RVOpcodes::J) { is_def = false; } + + // 对 CALL 指令进行特殊处理 if (opcode == RVOpcodes::CALL) { - // CALL会杀死所有调用者保存寄存器,这是一个简化处理 - // 同时也使用了传入a0-a7的参数 + // CALL 指令的第一个操作数通常是目标函数标签,不是寄存器。 + // 它可能会有一个可选的返回值(def),以及一系列参数(use)。 + // 这里的处理假定 CALL 的机器指令操作数布局是: + // [可选: dest_vreg (def)], [函数标签], [可选: arg1_vreg (use)], [可选: arg2_vreg (use)], ... + + // 我们需要一种方法来识别哪些操作数是def,哪些是use。 + // 一个简单的约定:如果第一个操作数是寄存器,则它是def(返回值)。 + if (!instr->getOperands().empty() && instr->getOperands().front()->getKind() == MachineOperand::KIND_REG) { + auto reg_op = static_cast(instr->getOperands().front().get()); + if (reg_op->isVirtual()) { + def.insert(reg_op->getVRegNum()); + } + } + + // 遍历所有操作数,非第一个寄存器操作数均视为use + bool first_reg_skipped = false; + for (const auto& op : instr->getOperands()) { + if (op->getKind() == MachineOperand::KIND_REG) { + if (!first_reg_skipped) { + first_reg_skipped = true; + continue; // 跳过我们已经作为def处理的返回值 + } + auto reg_op = static_cast(op.get()); + if (reg_op->isVirtual()) { + use.insert(reg_op->getVRegNum()); + } + } + } + + // **重要**: CALL指令还隐式定义(杀死)了所有调用者保存的寄存器。 + // 一个完整的实现会在这里将所有caller-saved寄存器标记为def, + // 以确保任何跨调用存活的变量都不会被分配到这些寄存器中。 + // 这个简化的实现暂不处理隐式def,但这是未来优化的关键点。 + + return; // CALL 指令处理完毕,直接返回 } + // --- MODIFICATION END --- + + // 对其他所有指令的通用处理逻辑 for (const auto& op : instr->getOperands()) { if (op->getKind() == MachineOperand::KIND_REG) { auto reg_op = static_cast(op.get()); if (reg_op->isVirtual()) { if (is_def) { def.insert(reg_op->getVRegNum()); - is_def = false; + is_def = false; // 一条指令通常只有一个目标寄存ator } else { use.insert(reg_op->getVRegNum()); } } } else if (op->getKind() == MachineOperand::KIND_MEM) { + // 内存操作数 `offset(base)` 中的 base 寄存器是 use auto mem_op = static_cast(op.get()); if (mem_op->getBase()->isVirtual()) { use.insert(mem_op->getBase()->getVRegNum()); @@ -151,6 +212,43 @@ void RISCv64RegAlloc::getInstrUseDef(MachineInstr* instr, LiveSet& use, LiveSet& } } +/** + * @brief 计算一个类型在内存中占用的字节数。 + * @param type 需要计算大小的IR类型。 + * @return 该类型占用的字节数。 + */ +unsigned RISCv64RegAlloc::getTypeSizeInBytes(Type* type) { + if (!type) { + assert(false && "Cannot get size of a null type."); + return 0; + } + + switch (type->getKind()) { + // 对于SysY语言,基本类型int和float都占用4字节 + case Type::kInt: + case Type::kFloat: + return 4; + + // 指针类型在RISC-V 64位架构下占用8字节 + // 虽然SysY没有'int*'语法,但数组变量在IR层面本身就是指针类型 + case Type::kPointer: + return 8; + + // 数组类型的总大小 = 元素数量 * 单个元素的大小 + case Type::kArray: { + auto arrayType = type->as(); + // 递归调用以计算元素大小 + return arrayType->getNumElements() * getTypeSizeInBytes(arrayType->getElementType()); + } + + // 其他类型,如Void, Label等不占用栈空间,或者不应该出现在这里 + default: + // 如果遇到未处理的类型,触发断言,方便调试 + assert(false && "Unsupported type for size calculation."); + return 0; + } +} + void RISCv64RegAlloc::analyzeLiveness() { bool changed = true; while (changed) { @@ -259,8 +357,21 @@ void RISCv64RegAlloc::colorGraph() { void RISCv64RegAlloc::rewriteFunction() { StackFrameInfo& frame_info = MFunc->getFrameInfo(); int current_offset = frame_info.locals_size; + + // --- FIX 1: 动态计算溢出槽大小 --- + // 根据溢出虚拟寄存器的真实类型,为其在栈上分配正确大小的空间。 for (unsigned vreg : spilled_vregs) { - current_offset += 4; + // 从反向映射中查找 vreg 对应的 IR Value + assert(vreg_to_value_map.count(vreg) && "Spilled vreg not found in map!"); + Value* val = vreg_to_value_map.at(vreg); + + // 使用辅助函数获取类型大小 + int size = getTypeSizeInBytes(val->getType()); + + // 保持栈8字节对齐 + current_offset += size; + current_offset = (current_offset + 7) & ~7; + frame_info.spill_offsets[vreg] = -current_offset; } frame_info.spill_size = current_offset - frame_info.locals_size; @@ -271,10 +382,16 @@ void RISCv64RegAlloc::rewriteFunction() { LiveSet use, def; getInstrUseDef(instr_ptr.get(), use, def); + // --- FIX 2: 为溢出的 'use' 操作数插入正确的加载指令 --- for (unsigned vreg : use) { if (spilled_vregs.count(vreg)) { + // 同样地,根据 vreg 的类型决定使用 lw 还是 ld + assert(vreg_to_value_map.count(vreg)); + Value* val = vreg_to_value_map.at(vreg); + RVOpcodes load_op = val->getType()->isPointer() ? RVOpcodes::LD : RVOpcodes::LW; + int offset = frame_info.spill_offsets.at(vreg); - auto load = std::make_unique(RVOpcodes::LW); + auto load = std::make_unique(load_op); load->addOperand(std::make_unique(vreg)); load->addOperand(std::make_unique( std::make_unique(PhysicalReg::S0), @@ -286,10 +403,16 @@ void RISCv64RegAlloc::rewriteFunction() { new_instructions.push_back(std::move(instr_ptr)); + // --- FIX 3: 为溢出的 'def' 操作数插入正确的存储指令 --- for (unsigned vreg : def) { if (spilled_vregs.count(vreg)) { + // 根据 vreg 的类型决定使用 sw 还是 sd + assert(vreg_to_value_map.count(vreg)); + Value* val = vreg_to_value_map.at(vreg); + RVOpcodes store_op = val->getType()->isPointer() ? RVOpcodes::SD : RVOpcodes::SW; + int offset = frame_info.spill_offsets.at(vreg); - auto store = std::make_unique(RVOpcodes::SW); + auto store = std::make_unique(store_op); store->addOperand(std::make_unique(vreg)); store->addOperand(std::make_unique( std::make_unique(PhysicalReg::S0), @@ -302,27 +425,39 @@ void RISCv64RegAlloc::rewriteFunction() { mbb->getInstructions() = std::move(new_instructions); } + // 最后的虚拟寄存器到物理寄存器的替换过程保持不变 for (auto& mbb : MFunc->getBlocks()) { for (auto& instr_ptr : mbb->getInstructions()) { for (auto& op_ptr : instr_ptr->getOperands()) { + + // 情况一:操作数本身就是一个寄存器 (例如 add rd, rs1, rs2 中的所有操作数) if(op_ptr->getKind() == MachineOperand::KIND_REG) { auto reg_op = static_cast(op_ptr.get()); if (reg_op->isVirtual()) { unsigned vreg = reg_op->getVRegNum(); if (color_map.count(vreg)) { + // 如果vreg被成功着色,替换为物理寄存器 reg_op->setPReg(color_map.at(vreg)); } else if (spilled_vregs.count(vreg)) { - reg_op->setPReg(PhysicalReg::T6); // 溢出统一用t6 + // 如果vreg被溢出,替换为专用的溢出物理寄存器t6 + reg_op->setPReg(PhysicalReg::T6); } } - } else if (op_ptr->getKind() == MachineOperand::KIND_MEM) { + } + // 情况二:操作数是一个内存地址 (例如 lw rd, offset(rs1) 中的 offset(rs1)) + else if (op_ptr->getKind() == MachineOperand::KIND_MEM) { auto mem_op = static_cast(op_ptr.get()); + // 获取内存操作数内部的“基址寄存器” auto base_reg_op = mem_op->getBase(); + + // 对这个基址寄存器,执行与情况一完全相同的替换逻辑 if(base_reg_op->isVirtual()){ unsigned vreg = base_reg_op->getVRegNum(); if(color_map.count(vreg)) { + // 如果基址vreg被成功着色,替换 base_reg_op->setPReg(color_map.at(vreg)); } else if (spilled_vregs.count(vreg)) { + // 如果基址vreg被溢出,替换为t6 base_reg_op->setPReg(PhysicalReg::T6); } } diff --git a/src/include/RISCv64LLIR.h b/src/include/RISCv64LLIR.h index 86de7d4..d8797bc 100644 --- a/src/include/RISCv64LLIR.h +++ b/src/include/RISCv64LLIR.h @@ -44,9 +44,11 @@ enum class RVOpcodes { // 特殊标记,非指令 LABEL, // 新增伪指令,用于解耦栈帧处理 - FRAME_LOAD, // 从栈帧加载 (AllocaInst) - FRAME_STORE, // 保存到栈帧 (AllocaInst) - FRAME_ADDR, // [新] 获取栈帧变量的地址 + FRAME_LOAD_W, // 从栈帧加载 32位 Word (对应 lw) + FRAME_LOAD_D, // 从栈帧加载 64位 Doubleword (对应 ld) + FRAME_STORE_W, // 保存 32位 Word 到栈帧 (对应 sw) + FRAME_STORE_D, // 保存 64位 Doubleword 到栈帧 (对应 sd) + FRAME_ADDR, // 获取栈帧变量的地址 }; class MachineOperand; diff --git a/src/include/RISCv64RegAlloc.h b/src/include/RISCv64RegAlloc.h index c786bde..724ad1c 100644 --- a/src/include/RISCv64RegAlloc.h +++ b/src/include/RISCv64RegAlloc.h @@ -49,6 +49,13 @@ private: // 可用的物理寄存器池 std::vector allocable_int_regs; + + // 存储vreg到IR Value*的反向映射 + // 这个map将在run()函数开始时被填充,并在rewriteFunction()中使用。 + std::map vreg_to_value_map; + + // 用于计算类型大小的辅助函数 + unsigned getTypeSizeInBytes(Type* type); }; } // namespace sysy