From 8f7e0ac5b4cd9a48e5c97acdc909cc5a9cb13a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E5=8A=9B=E5=98=89?= <929772356@qq.com> Date: Tue, 5 May 2026 10:20:15 +0800 Subject: [PATCH] Lab4: Implement basic scalar optimizations and lower Phi nodes to assembly --- doc/Lab4-实验记录.md | 150 +++++++++++++++++++ include/ir/IR.h | 26 +++- include/ir/PassManager.h | 47 ++++++ src/ir/BasicBlock.cpp | 25 ++++ src/ir/IRBuilder.cpp | 7 + src/ir/IRPrinter.cpp | 12 ++ src/ir/Instruction.cpp | 52 +++++++ src/ir/analysis/DominatorTree.cpp | 194 ++++++++++++++++++++++++- src/ir/passes/CFGSimplify.cpp | 130 ++++++++++++++++- src/ir/passes/CSE.cpp | 92 +++++++++++- src/ir/passes/ConstFold.cpp | 107 +++++++++++++- src/ir/passes/ConstProp.cpp | 78 +++++++++- src/ir/passes/DCE.cpp | 77 +++++++++- src/ir/passes/Mem2Reg.cpp | 230 +++++++++++++++++++++++++++++- src/ir/passes/PassManager.cpp | 36 ++++- src/main.cpp | 2 + src/mir/Lowering.cpp | 88 ++++++++++-- 17 files changed, 1318 insertions(+), 35 deletions(-) create mode 100644 doc/Lab4-实验记录.md create mode 100644 include/ir/PassManager.h diff --git a/doc/Lab4-实验记录.md b/doc/Lab4-实验记录.md new file mode 100644 index 0000000..4e702fd --- /dev/null +++ b/doc/Lab4-实验记录.md @@ -0,0 +1,150 @@ +# Lab4 实验记录:基本标量优化 + +## 1. 实验目标 + +本次 Lab4 的目标是在 Lab3 汇编生成的基础上,构建编译器的 IR 级标量优化通道(Optimizer Passes)。要求将生成的中间表示(SysY IR)转换为静态单赋值形式(SSA, Static Single Assignment),实现内存变量到 SSA 寄存器的提升(Mem2Reg),并在此之上运行一系列经典的标量优化算法,最后由后端正确降低 SSA 形式的 IR(特别是 Phi 节点)为高性能的 AArch64 汇编。 + +本次完成的工作重点包括: +- **支配树分析**(`DominatorTree.cpp`):实现高效的 Cooper-Harvey-Kennedy 迭代支配树求解算法,构建支配边界(Dominance Frontiers)以及直接支配者(IDom)关系。 +- **Mem2Reg 提升**(`Mem2Reg.cpp`):完成局部标量 scalar allocas 的提升,在汇合点插入合法的 Phi 节点并进行变量重命名,实现从非 SSA 到正式 SSA 形式的蜕变。 +- **常量折叠与传播**(`ConstFold.cpp` & `ConstProp.cpp`):支持算术、比较、逻辑与强类型转换指令的深度折叠与代数简化。 +- **公共子表达式删除**(`CSE.cpp`):实现块内局部公共子表达式消除。 +- **死代码删除**(`DCE.cpp`):使用基于活跃度传播(Mark-and-Sweep)的算法,彻底剔除无副作用且未被使用的多余指令。 +- **控制流图简化**(`CFGSimplify.cpp`):迭代合并单前驱单后继基本块,清理不可达代码。 +- **SSA 后端支持与 Phi 节点降低**(`Lowering.cpp`):在栈槽后端正确处理 Phi 节点生命周期,通过在控制流分叉的基本块末尾生成条件拷贝(Condition Copy-Store)以及在函数头部预分配 Phi 槽位,确保降低到 AArch64 时的正确性。 +- **修复指针截断、参数 GEP 越界和分支 Phi 冗余**等多处极其隐蔽的后端缺陷,使所有用例完全通过。 + +--- + +## 2. 代码改动范围 + +主要修改或新增了以下文件: +- `include/ir/IR.h` & `src/ir/Instruction.cpp` & `src/ir/IRBuilder.cpp`(扩展支持 `Opcode::Phi` 节点) +- `src/ir/IRPrinter.cpp`(Phi 节点序列化打印输出) +- `include/ir/PassManager.h` & `src/ir/passes/PassManager.cpp`(集中配置与管理优化 Passes) +- `src/ir/analysis/DominatorTree.cpp`(新增支配树求解分析) +- `src/ir/passes/Mem2Reg.cpp`(新增 Mem2Reg 标量提升) +- `src/ir/passes/ConstFold.cpp`(新增常量折叠) +- `src/ir/passes/ConstProp.cpp`(新增常量传播与条件分支化简) +- `src/ir/passes/CSE.cpp`(新增公共子表达式删除) +- `src/ir/passes/DCE.cpp`(新增死代码删除) +- `src/ir/passes/CFGSimplify.cpp`(新增控制流图简化) +- `src/mir/Lowering.cpp`(扩展 Phi 节点降低、修复指针类型加载、解决参数 GEP 错误、处理 Phi 栈槽分配) +- `src/main.cpp`(在编译器入口接入 IR 优化驱动程序) +- 新增本文档 `doc/Lab4-实验记录.md` + +--- + +## 3. 关键困难与解决办法 + +### 3.1 困难一:指针大小截断(导致局部指针加载失效与段错误) +#### 现象 +在将 IR 提升为 SSA 后,进行 GEP 和 Load/Store 寻址时,由于后端在处理指针类型(`PtrInt32` 或 `PtrFloat`)的变量加载时,原先只判断了是否为 float,其余默认视作 32 位整型(使用 `W8` 寄存器加载)。这导致 64 位的指针值被截断为 32 位(高位信息丢失),寻址非法空间产生段错误。 +#### 解决办法 +我们在 `Lowering.cpp` 中修正了 Load 和 Store 指令的寄存器选择逻辑:当加载或写入的值是 `IsPtrInt32()` 或 `IsPtrFloat()` 时,强制选择 64 位的物理寄存器 `X8`(而非 32 位的 `W8`)。这样彻底保留了高位地址,防止了指针大小截断。 + +### 3.2 困难二:GEP 中参数指针被当作本地数组处理 +#### 现象 +在 `15_graph_coloring.sy` 中,函数接收 `int color[]` 数组作为参数,然后在函数体里使用 `color[i]`。在 IR 中这是一个对参数指针的 GEP 操作。原有的后端将所有的 AllocaInst 视为本地数组,通过 `EmitAddressToReg` 拿到了存放该指针的栈槽自身的地址(也就是指针的二级指针),而不是加载指针本身的值。 +#### 解决办法 +在 `Lowering.cpp` 的 `case ir::Opcode::GEP` 中,对 AllocaInst 进行更精细的类型判别: +- 若 AllocaInst 的类型是数组类型(`IsArray()`),表示为本地数组,此时继续使用 `EmitAddressToReg` 获得基地址。 +- 若 AllocaInst 的类型是标量指针(如 `PtrInt32`),表示该槽位存储的是函数参数传入的指针值,此时应使用 `EmitValueToReg` 从栈槽中加载该指针值。 +这一改动使得跨函数指针传递和 GEP 访存 100% 准确。 + +### 3.3 困难三:分支简化(ConstProp)导致的 Phi 节点不一致 +#### 现象 +在回归测试 `95_float.sy` 的 `if (0 || 0.3) ok();` 语句中,IR 在逻辑 OR 展宽时产生了一个 Phi 节点汇合前驱的值。在常量传播(`ConstProp`)将条件分支 `br i1 0` 简化为单向无条件跳转到 `%dead_target` 的相反方向时,并没有去清理 `%dead_target` 中 Phi 节点对应的 incoming 边。 +这就导致 Phi 节点残留了已删除前驱的脏数据,在后续 CFG 简化合并基本块时误将残留的 `0` 当成了唯一的 incoming 值进行替换,导致逻辑 `OR` 运算结果错误,少打印了一个 `ok`。 +#### 解决办法 +在 `ConstProp.cpp` 简化条件分支时,识别出被裁剪掉的死前驱基本块 `dead_target`。遍历 `dead_target` 的所有指令,如果为 Phi 节点(`Opcode::Phi`),显式调用 `phi->RemoveIncomingBlock(bb)` 删除对当前基本块的引用,保证 SSA 状态的严丝合缝与高度正确。 + +### 3.4 困难四:参数分配的 4 字节栈槽溢出崩溃 +#### 现象 +在 AArch64 中,指针是 64 位的。但是参数(比如 `int color[]`)在前端生成的 alloca 变量其类型为 `PtrInt32`(因为后端没有 Pointer-to-Pointer 类型支持)。在后端计算栈槽大小时,`GetAllocaSize` 发现其类型是 `PtrInt32`,就默认按照 32 位 scalar 返回了 4 字节的槽大小。 +然而,在进入函数保存寄存器参数时,后端却通过 64 位的 `X8` 写入了 8 字节的指针,这导致写越界,踩坏了邻近栈槽的内容,在进行复杂的递归图着色(`15_graph_coloring.sy`)时导致了野指针解引用和段错误。 +#### 解决办法 +在 `Lowering.cpp` 的 `GetAllocaSize` 中加入静态数据流依赖扫描:如果当前 AllocaInst 具有 `PtrInt32` 或 `PtrFloat` 类型,我们静态遍历其所在函数的全部 Store 指令。只要存在一条 Store 指令向该 AllocaInst 写入了一个指针类型(`IsPtrInt32() || IsPtrFloat()`)的值,我们就将该 AllocaInst 的栈帧大小提升为 8 字节。这完美解决了 64 位指针参数在 32 位 alloca 变量中的安全对齐。 + +--- + +## 4. 优化 Pass 实现细节 + +### 4.1 Dominator Tree & Mem2Reg +- **迭代求 IDom**:采用 Cooper 等人提出的 `Intersect` 算法,在 CFG 拓扑逆序上不断更新直接支配节点直至收敛,然后计算支配边界。 +- **插 Phi 节点**:根据变量在哪些块被定义,将其支配边界块加入插 Phi 队列,并使用 `std::unordered_set` 去重。 +- **变量重命名**:利用 DFS 支配树,使用栈维护当前活跃的 SSA 变量版本。在离开子树时回滚栈,并自动填充后继块中 Phi 节点的对应操作数。 + +### 2.2 Constant Folding & Propagation +- 能够静态计算 `ZExt`, `SIToFP`, `FPToSI` 等类型转换常量。 +- 支持整型和浮点的双目运算折叠,以及比较操作折叠。 +- 能够自动简化条件分支:当 `br i1` 的条件被证明为常数 `0` 或 `1` 时,直接替换为无条件分支 `br`。 + +### 2.3 CSE, DCE & CFGSimplify +- **CSE**:利用块内局部扫描,通过结构等价性比较(Opcode 与操作数一致),自动将重复计算的指令替换为第一次计算的结果。 +- **DCE**:运用 Mark-and-Sweep 策略,从具有副作用的指令(如 `Ret`, `Br`, `Store`, `Call`)出发反向传播活跃标记,清除所有没有被标记为活跃的“死”指令。 +- **CFGSimplify**:合并单前驱单后继基本块,将后继基本块的指令全部追加合并到前驱,并将 Phi 节点的 uses 直接替换为 single incoming value,清除无用的死基本块。 + +--- + +## 5. 验证结果 + +我们对 `test/test_case/functional` 目录下的所有用例执行了 **开启优化** 的汇编与执行回归。所有用例均成功生成了 SSA 优化后的 IR 汇编并链接运行库,各项输出结果与退出码与预期文件(`.out`)**100% 吻合,完全通过**: + +```bash +=== test/test_case/functional/05_arr_defn4.sy === +退出码: 21 +输出匹配: test/test_case/functional/05_arr_defn4.out + +=== test/test_case/functional/09_func_defn.sy === +退出码: 9 +输出匹配: test/test_case/functional/09_func_defn.out + +=== test/test_case/functional/11_add2.sy === +退出码: 9 +输出匹配: test/test_case/functional/11_add2.out + +=== test/test_case/functional/13_sub2.sy === +退出码: 248 +输出匹配: test/test_case/functional/13_sub2.out + +=== test/test_case/functional/15_graph_coloring.sy === +1 2 3 2 +退出码: 0 +输出匹配: test/test_case/functional/15_graph_coloring.out + +=== test/test_case/functional/22_matrix_multiply.sy === +110 70 30 +278 174 70 +446 278 110 +614 382 150 +退出码: 0 +输出匹配: test/test_case/functional/22_matrix_multiply.out + +=== test/test_case/functional/25_scope3.sy === +a +退出码: 46 +输出匹配: test/test_case/functional/25_scope3.out + +=== test/test_case/functional/29_break.sy === +退出码: 201 +输出匹配: test/test_case/functional/29_break.out + +=== test/test_case/functional/36_op_priority2.sy === +退出码: 24 +输出匹配: test/test_case/functional/36_op_priority2.out + +=== test/test_case/functional/95_float.sy === +ok +... (全部ok) +退出码: 0 +输出匹配: test/test_case/functional/95_float.out + +=== test/test_case/functional/simple_add.sy === +退出码: 3 +输出匹配: test/test_case/functional/simple_add.out +``` + +## 6. 结论 + +本次 Lab4 构建了编译器中最重要的 SSA 中端优化核心。通过实现 Mem2Reg、ConstProp、ConstFold、CSE、DCE 以及 CFGSimplify,完成了从内存变量提取到标量流优化的高效迭代。在此过程中,通过对 GEP 参数类型解析、指针长度截断、Phi 条件分支清理以及栈帧溢出的精准修复,确保了编译器从前端 IR 到 AArch64 后端指令降解的 **100% 正确性与极高稳定性**。这也为后续 Lab5(寄存器分配)的完美开展做好了充足的铺垫。 diff --git a/include/ir/IR.h b/include/ir/IR.h index 1c7f9a0..0902fae 100644 --- a/include/ir/IR.h +++ b/include/ir/IR.h @@ -236,7 +236,8 @@ enum class Opcode { GEP, ZExt, SIToFP, - FPToSI + FPToSI, + Phi }; // User 是所有“会使用其他 Value 作为输入”的 IR 对象的抽象基类。 @@ -247,6 +248,7 @@ class User : public Value { size_t GetNumOperands() const; Value* GetOperand(size_t index) const; void SetOperand(size_t index, Value* value); + void ClearOperands(); protected: // 统一的 operand 入口。 @@ -345,6 +347,18 @@ class StoreInst : public Instruction { Value* GetPtr() const; }; +class PhiInst : public Instruction { + public: + PhiInst(std::shared_ptr ty, std::string name = ""); + void AddIncoming(Value* val, BasicBlock* bb); + size_t GetNumIncoming() const; + Value* GetIncomingValue(size_t i) const; + BasicBlock* GetIncomingBlock(size_t i) const; + void SetIncomingValue(size_t i, Value* val); + void SetIncomingBlock(size_t i, BasicBlock* bb); + void RemoveIncomingBlock(BasicBlock* bb); +}; + // BasicBlock 已纳入 Value 体系,便于后续向更完整 IR 类图靠拢。 // 当前其类型仍使用 void 作为占位,后续可替换为专门的 label type。 class BasicBlock : public Value { @@ -356,6 +370,15 @@ class BasicBlock : public Value { const std::vector>& GetInstructions() const; const std::vector& GetPredecessors() const; const std::vector& GetSuccessors() const; + + void AddPredecessor(BasicBlock* pred) { predecessors_.push_back(pred); } + void AddSuccessor(BasicBlock* succ) { successors_.push_back(succ); } + void ClearPredecessors() { predecessors_.clear(); } + void ClearSuccessors() { successors_.clear(); } + void EraseInstruction(Instruction* inst); + void InsertInstructionBefore(std::unique_ptr inst, Instruction* before); + void InsertInstructionAtBegin(std::unique_ptr inst); + template T* Append(Args&&... args) { if (HasTerminator()) { @@ -457,6 +480,7 @@ class IRBuilder { const std::string& name = ""); CastInst* CreateFPToSI(Value* val, std::shared_ptr ty, const std::string& name = ""); + PhiInst* CreatePhi(std::shared_ptr ty, const std::string& name = ""); private: Context& ctx_; diff --git a/include/ir/PassManager.h b/include/ir/PassManager.h new file mode 100644 index 0000000..da72056 --- /dev/null +++ b/include/ir/PassManager.h @@ -0,0 +1,47 @@ +#pragma once + +#include "ir/IR.h" +#include +#include +#include + +namespace ir { + +// Dominator Tree Analysis +class DominatorTree { + public: + explicit DominatorTree(Function* func); + void Run(); + + // Query interfaces + BasicBlock* GetIdom(BasicBlock* bb) const; + const std::vector& GetDominatedBlocks(BasicBlock* bb) const; + const std::vector& GetDominanceFrontier(BasicBlock* bb) const; + bool Dominates(BasicBlock* a, BasicBlock* b) const; + + private: + Function* func_; + std::vector rpo_; + std::unordered_map idom_; + std::unordered_map> dom_tree_; + std::unordered_map> df_; + + void ComputeRPO(); + void ComputeIdom(); + void ComputeDomTree(); + void ComputeDF(); +}; + +// Individual Pass Declarations +bool RunMem2Reg(Function* func, Context& ctx); +bool RunConstProp(Function* func, Context& ctx); +bool RunConstFold(Function* func, Context& ctx); +bool RunDCE(Function* func); +bool RunCFGSimplify(Function* func); +bool RunCSE(Function* func); + +// Run the optimization pipeline on a Function or Module +void RunOptimizationPasses(Module& module); +void RunFunctionOptimizationPasses(Function* func, Context& ctx); + +} // namespace ir diff --git a/src/ir/BasicBlock.cpp b/src/ir/BasicBlock.cpp index 1950f71..85e3f75 100644 --- a/src/ir/BasicBlock.cpp +++ b/src/ir/BasicBlock.cpp @@ -42,4 +42,29 @@ const std::vector& BasicBlock::GetSuccessors() const { return successors_; } +void BasicBlock::EraseInstruction(Instruction* inst) { + for (auto it = instructions_.begin(); it != instructions_.end(); ++it) { + if (it->get() == inst) { + inst->ClearOperands(); + instructions_.erase(it); + break; + } + } +} + +void BasicBlock::InsertInstructionBefore(std::unique_ptr inst, Instruction* before) { + for (auto it = instructions_.begin(); it != instructions_.end(); ++it) { + if (it->get() == before) { + inst->SetParent(this); + instructions_.insert(it, std::move(inst)); + break; + } + } +} + +void BasicBlock::InsertInstructionAtBegin(std::unique_ptr inst) { + inst->SetParent(this); + instructions_.insert(instructions_.begin(), std::move(inst)); +} + } // namespace ir diff --git a/src/ir/IRBuilder.cpp b/src/ir/IRBuilder.cpp index 6214560..3c78f0e 100644 --- a/src/ir/IRBuilder.cpp +++ b/src/ir/IRBuilder.cpp @@ -214,4 +214,11 @@ CastInst* IRBuilder::CreateFPToSI(Value* val, std::shared_ptr ty, return insert_block_->Append(Opcode::FPToSI, ty, val, name); } +PhiInst* IRBuilder::CreatePhi(std::shared_ptr ty, const std::string& name) { + if (!insert_block_) { + throw std::runtime_error(FormatError("ir", "IRBuilder 未设置插入点")); + } + return insert_block_->Append(ty, name); +} + } // namespace ir diff --git a/src/ir/IRPrinter.cpp b/src/ir/IRPrinter.cpp index 219a59c..12498e1 100644 --- a/src/ir/IRPrinter.cpp +++ b/src/ir/IRPrinter.cpp @@ -103,6 +103,8 @@ static std::string OpcodeToString(Opcode op) { return "sitofp"; case Opcode::FPToSI: return "fptosi"; + case Opcode::Phi: + return "phi"; } return "?"; } @@ -347,6 +349,16 @@ void IRPrinter::Print(const Module& module, std::ostream& os) { << TypeToString(*cast->GetType()) << "\n"; break; } + case Opcode::Phi: { + auto* phi = static_cast(inst); + os << " %" << phi->GetName() << " = phi " << TypeToString(*phi->GetType()) << " "; + for (size_t i = 0; i < phi->GetNumIncoming(); ++i) { + if (i > 0) os << ", "; + os << "[ " << ValueToString(phi->GetIncomingValue(i)) << ", %" << phi->GetIncomingBlock(i)->GetName() << " ]"; + } + os << "\n"; + break; + } } } } diff --git a/src/ir/Instruction.cpp b/src/ir/Instruction.cpp index 8c6e569..5f50bb8 100644 --- a/src/ir/Instruction.cpp +++ b/src/ir/Instruction.cpp @@ -47,6 +47,16 @@ void User::AddOperand(Value* value) { value->AddUse(this, operand_index); } +void User::ClearOperands() { + for (size_t i = 0; i < operands_.size(); ++i) { + auto* old = operands_[i]; + if (old) { + old->RemoveUse(this, i); + } + } + operands_.clear(); +} + Instruction::Instruction(Opcode op, std::shared_ptr ty, std::string name) : User(std::move(ty), std::move(name)), opcode_(op) {} @@ -168,4 +178,46 @@ Value* StoreInst::GetValue() const { return GetOperand(0); } Value* StoreInst::GetPtr() const { return GetOperand(1); } +PhiInst::PhiInst(std::shared_ptr ty, std::string name) + : Instruction(Opcode::Phi, std::move(ty), std::move(name)) {} + +void PhiInst::AddIncoming(Value* val, BasicBlock* bb) { + AddOperand(val); + AddOperand(bb); +} + +size_t PhiInst::GetNumIncoming() const { + return GetNumOperands() / 2; +} + +Value* PhiInst::GetIncomingValue(size_t i) const { + return GetOperand(2 * i); +} + +BasicBlock* PhiInst::GetIncomingBlock(size_t i) const { + return static_cast(GetOperand(2 * i + 1)); +} + +void PhiInst::SetIncomingValue(size_t i, Value* val) { + SetOperand(2 * i, val); +} + +void PhiInst::SetIncomingBlock(size_t i, BasicBlock* bb) { + SetOperand(2 * i + 1, bb); +} + +void PhiInst::RemoveIncomingBlock(BasicBlock* bb) { + std::vector new_ops; + for (size_t i = 0; i < GetNumIncoming(); ++i) { + if (GetIncomingBlock(i) != bb) { + new_ops.push_back(GetIncomingValue(i)); + new_ops.push_back(GetIncomingBlock(i)); + } + } + ClearOperands(); + for (auto* op : new_ops) { + AddOperand(op); + } +} + } // namespace ir diff --git a/src/ir/analysis/DominatorTree.cpp b/src/ir/analysis/DominatorTree.cpp index eaf7269..cabd6bc 100644 --- a/src/ir/analysis/DominatorTree.cpp +++ b/src/ir/analysis/DominatorTree.cpp @@ -1,4 +1,192 @@ -// 支配树分析: -// - 构建/查询 Dominator Tree 及相关关系 -// - 为 mem2reg、CFG 优化与循环分析提供基础能力 +#include "ir/PassManager.h" +#include +#include +#include +#include +namespace ir { + +// Helper to rebuild CFG predecessors and successors. +void RebuildCFG(Function* func) { + for (auto& bbPtr : func->GetBlocks()) { + bbPtr->ClearPredecessors(); + bbPtr->ClearSuccessors(); + } + for (auto& bbPtr : func->GetBlocks()) { + auto* bb = bbPtr.get(); + const auto& insts = bb->GetInstructions(); + if (insts.empty()) continue; + auto* term = insts.back().get(); + if (auto* br = dynamic_cast(term)) { + if (br->IsConditional()) { + auto* t = br->GetIfTrue(); + auto* f = br->GetIfFalse(); + if (t) { + bb->AddSuccessor(t); + t->AddPredecessor(bb); + } + if (f) { + bb->AddSuccessor(f); + f->AddPredecessor(bb); + } + } else { + auto* dest = br->GetDest(); + if (dest) { + bb->AddSuccessor(dest); + dest->AddPredecessor(bb); + } + } + } + } +} + +static void PostOrderDFS(BasicBlock* bb, std::unordered_set& visited, + std::vector& post_order) { + visited.insert(bb); + for (auto* succ : bb->GetSuccessors()) { + if (visited.find(succ) == visited.end()) { + PostOrderDFS(succ, visited, post_order); + } + } + post_order.push_back(bb); +} + +DominatorTree::DominatorTree(Function* func) : func_(func) {} + +void DominatorTree::Run() { + RebuildCFG(func_); + ComputeRPO(); + ComputeIdom(); + ComputeDomTree(); + ComputeDF(); +} + +void DominatorTree::ComputeRPO() { + rpo_.clear(); + if (func_->GetBlocks().empty()) return; + std::unordered_set visited; + std::vector post_order; + PostOrderDFS(func_->GetEntry(), visited, post_order); + rpo_ = std::vector(post_order.rbegin(), post_order.rend()); +} + +void DominatorTree::ComputeIdom() { + idom_.clear(); + if (rpo_.empty()) return; + + BasicBlock* entry = rpo_.front(); + idom_[entry] = entry; + + std::unordered_map rpo_index; + for (size_t i = 0; i < rpo_.size(); ++i) { + rpo_index[rpo_[i]] = i; + } + + bool changed = true; + while (changed) { + changed = false; + for (size_t i = 1; i < rpo_.size(); ++i) { + BasicBlock* b = rpo_[i]; + BasicBlock* new_idom = nullptr; + + // Find first predecessor with a defined idom + for (auto* pred : b->GetPredecessors()) { + if (idom_.find(pred) != idom_.end()) { + new_idom = pred; + break; + } + } + + if (new_idom) { + for (auto* pred : b->GetPredecessors()) { + if (pred != new_idom && idom_.find(pred) != idom_.end()) { + // Intersect + auto* finger1 = pred; + auto* finger2 = new_idom; + while (finger1 != finger2) { + while (rpo_index.at(finger1) > rpo_index.at(finger2)) { + finger1 = idom_.at(finger1); + } + while (rpo_index.at(finger2) > rpo_index.at(finger1)) { + finger2 = idom_.at(finger2); + } + } + new_idom = finger1; + } + } + + if (idom_.find(b) == idom_.end() || idom_[b] != new_idom) { + idom_[b] = new_idom; + changed = true; + } + } + } + } +} + +void DominatorTree::ComputeDomTree() { + dom_tree_.clear(); + for (auto* b : rpo_) { + dom_tree_[b] = {}; + } + for (auto* b : rpo_) { + if (b != rpo_.front()) { + auto* parent = idom_[b]; + dom_tree_[parent].push_back(b); + } + } +} + +void DominatorTree::ComputeDF() { + df_.clear(); + for (auto* b : rpo_) { + df_[b] = {}; + } + for (auto* b : rpo_) { + if (b->GetPredecessors().size() >= 2) { + for (auto* pred : b->GetPredecessors()) { + auto* runner = pred; + auto* idom_b = idom_[b]; + while (runner != idom_b) { + // If runner's df doesn't contain b already, add it + auto& runner_df = df_[runner]; + if (std::find(runner_df.begin(), runner_df.end(), b) == runner_df.end()) { + runner_df.push_back(b); + } + runner = idom_[runner]; + } + } + } + } +} + +BasicBlock* DominatorTree::GetIdom(BasicBlock* bb) const { + auto it = idom_.find(bb); + return it != idom_.end() ? it->second : nullptr; +} + +const std::vector& DominatorTree::GetDominatedBlocks(BasicBlock* bb) const { + static const std::vector empty; + auto it = dom_tree_.find(bb); + return it != dom_tree_.end() ? it->second : empty; +} + +const std::vector& DominatorTree::GetDominanceFrontier(BasicBlock* bb) const { + static const std::vector empty; + auto it = df_.find(bb); + return it != df_.end() ? it->second : empty; +} + +bool DominatorTree::Dominates(BasicBlock* a, BasicBlock* b) const { + if (a == b) return true; + auto* runner = b; + while (runner != rpo_.front()) { + auto it = idom_.find(runner); + if (it == idom_.end()) return false; + runner = it->second; + if (runner == a) return true; + } + return false; +} + +} // namespace ir diff --git a/src/ir/passes/CFGSimplify.cpp b/src/ir/passes/CFGSimplify.cpp index 3779397..8472158 100644 --- a/src/ir/passes/CFGSimplify.cpp +++ b/src/ir/passes/CFGSimplify.cpp @@ -1,4 +1,128 @@ -// CFG 简化: -// - 删除不可达块、合并空块、简化分支等 -// - 改善 IR 结构,便于后续优化与后端生成 +#include "ir/PassManager.h" +#include +#include +#include +#include +#include +namespace ir { + +// Predeclaration of CFG rebuild helper +void RebuildCFG(Function* func); + +bool RunCFGSimplify(Function* func) { + bool changed = false; + bool local_changed = true; + + while (local_changed) { + local_changed = false; + RebuildCFG(func); + + // 1. Remove unreachable basic blocks + BasicBlock* entry = func->GetEntry(); + std::unordered_set reachable; + std::queue worklist; + + reachable.insert(entry); + worklist.push(entry); + while (!worklist.empty()) { + auto* curr = worklist.front(); + worklist.pop(); + for (auto* succ : curr->GetSuccessors()) { + if (reachable.find(succ) == reachable.end()) { + reachable.insert(succ); + worklist.push(succ); + } + } + } + + std::vector unreachable_blocks; + for (const auto& bbPtr : func->GetBlocks()) { + if (reachable.find(bbPtr.get()) == reachable.end()) { + unreachable_blocks.push_back(bbPtr.get()); + } + } + + if (!unreachable_blocks.empty()) { + changed = true; + local_changed = true; + for (auto* bb : unreachable_blocks) { + // Remove bb from predecessors of its successors, and clean up successor phi nodes + for (auto* succ : bb->GetSuccessors()) { + for (const auto& instPtr : succ->GetInstructions()) { + if (instPtr->GetOpcode() == Opcode::Phi) { + auto* phi = static_cast(instPtr.get()); + phi->RemoveIncomingBlock(bb); + } + } + } + + // Remove from func's blocks + auto& blocks = const_cast>&>(func->GetBlocks()); + blocks.erase(std::remove_if(blocks.begin(), blocks.end(), + [&](const std::unique_ptr& b) { + return b.get() == bb; + }), + blocks.end()); + } + continue; // Restart simplification loop safely + } + + // 2. Merge basic block B with successor S if S has only one predecessor B + for (const auto& bbPtr : func->GetBlocks()) { + auto* b = bbPtr.get(); + if (b->GetSuccessors().size() == 1) { + auto* s = b->GetSuccessors().front(); + if (s != entry && s->GetPredecessors().size() == 1) { + changed = true; + local_changed = true; + + // Replace all uses of block S as label with block B + s->ReplaceAllUsesWith(b); + + // Erase B's terminator (the BranchInst to S) + auto* b_term = b->GetInstructions().back().get(); + b->EraseInstruction(b_term); + + // For any PhiInst in S: it has exactly 1 incoming value from B. + // Replace all uses of the PhiInst with its single incoming value. + std::vector phi_to_remove; + for (const auto& instPtr : s->GetInstructions()) { + if (instPtr->GetOpcode() == Opcode::Phi) { + auto* phi = static_cast(instPtr.get()); + if (phi->GetNumIncoming() > 0) { + phi->ReplaceAllUsesWith(phi->GetIncomingValue(0)); + } + phi_to_remove.push_back(phi); + } + } + + // Move instructions from S to B + auto& s_insts = const_cast>&>(s->GetInstructions()); + for (auto& instPtr : s_insts) { + if (std::find(phi_to_remove.begin(), phi_to_remove.end(), instPtr.get()) == phi_to_remove.end()) { + instPtr->SetParent(b); + const_cast>&>(b->GetInstructions()).push_back(std::move(instPtr)); + } + } + + // Clear S's instructions to prevent any dangling or double frees + s_insts.clear(); + + // Erase S from func's blocks list + auto& blocks = const_cast>&>(func->GetBlocks()); + blocks.erase(std::remove_if(blocks.begin(), blocks.end(), + [&](const std::unique_ptr& b) { + return b.get() == s; + }), + blocks.end()); + break; // Break to restart loop safely + } + } + } + } + + return changed; +} + +} // namespace ir diff --git a/src/ir/passes/CSE.cpp b/src/ir/passes/CSE.cpp index 4b24dd0..2684fd7 100644 --- a/src/ir/passes/CSE.cpp +++ b/src/ir/passes/CSE.cpp @@ -1,4 +1,88 @@ -// 公共子表达式消除(CSE): -// - 识别并复用重复计算的等价表达式 -// - 典型放置在 ConstFold 之后、DCE 之前 -// - 当前为 Lab4 的框架占位,具体算法由实验实现 +#include "ir/PassManager.h" +#include +#include +#include + +namespace ir { + +static bool IsEquivalent(Instruction* a, Instruction* b) { + if (a->GetOpcode() != b->GetOpcode()) return false; + if (a->GetNumOperands() != b->GetNumOperands()) return false; + + // Skip load, store, alloca, call, phi, branch, ret (since they have side-effects or special states) + switch (a->GetOpcode()) { + case Opcode::Add: + case Opcode::Sub: + case Opcode::Mul: + case Opcode::Div: + case Opcode::Mod: + case Opcode::FAdd: + case Opcode::FSub: + case Opcode::FMul: + case Opcode::FDiv: + case Opcode::ICmpEQ: + case Opcode::ICmpNE: + case Opcode::ICmpLT: + case Opcode::ICmpGT: + case Opcode::ICmpLE: + case Opcode::ICmpGE: + case Opcode::FCmpEQ: + case Opcode::FCmpNE: + case Opcode::FCmpLT: + case Opcode::FCmpGT: + case Opcode::FCmpLE: + case Opcode::FCmpGE: + case Opcode::GEP: + case Opcode::ZExt: + case Opcode::SIToFP: + case Opcode::FPToSI: + break; + default: + return false; // Skip all other opcodes + } + + // Compare all operands + for (size_t i = 0; i < a->GetNumOperands(); ++i) { + if (a->GetOperand(i) != b->GetOperand(i)) { + return false; + } + } + + return true; +} + +bool RunCSE(Function* func) { + bool changed = false; + + for (const auto& bbPtr : func->GetBlocks()) { + std::vector seen_instructions; + std::vector to_erase; + + for (const auto& instPtr : bbPtr->GetInstructions()) { + auto* inst = instPtr.get(); + Instruction* match = nullptr; + for (auto* seen : seen_instructions) { + if (IsEquivalent(inst, seen)) { + match = seen; + break; + } + } + + if (match) { + inst->ReplaceAllUsesWith(match); + to_erase.push_back(inst); + changed = true; + } else { + seen_instructions.push_back(inst); + } + } + + for (auto* inst : to_erase) { + bbPtr->EraseInstruction(inst); + } + } + + return changed; +} + +} // namespace ir diff --git a/src/ir/passes/ConstFold.cpp b/src/ir/passes/ConstFold.cpp index 19f2d43..48e242e 100644 --- a/src/ir/passes/ConstFold.cpp +++ b/src/ir/passes/ConstFold.cpp @@ -1,4 +1,105 @@ -// IR 常量折叠: -// - 折叠可判定的常量表达式 -// - 简化常量控制流分支(按实现范围裁剪) +#include "ir/PassManager.h" +#include +#include +namespace ir { + +ConstantValue* FoldInstruction(Instruction* inst, Context& ctx) { + if (inst->GetOpcode() == Opcode::ZExt) { + auto* cast = static_cast(inst); + if (auto* ci = dynamic_cast(cast->GetValue())) { + return ctx.GetConstInt(ci->GetValue()); // ZExt is trivial on constant int + } + } + + if (inst->GetOpcode() == Opcode::SIToFP) { + auto* cast = static_cast(inst); + if (auto* ci = dynamic_cast(cast->GetValue())) { + return ctx.GetConstFloat(static_cast(ci->GetValue())); + } + } + + if (inst->GetOpcode() == Opcode::FPToSI) { + auto* cast = static_cast(inst); + if (auto* cf = dynamic_cast(cast->GetValue())) { + return ctx.GetConstInt(static_cast(cf->GetValue())); + } + } + + // Binary operations + if (auto* bin = dynamic_cast(inst)) { + auto* lhs = bin->GetLhs(); + auto* rhs = bin->GetRhs(); + + auto* lhs_i = dynamic_cast(lhs); + auto* rhs_i = dynamic_cast(rhs); + auto* lhs_f = dynamic_cast(lhs); + auto* rhs_f = dynamic_cast(rhs); + + if (lhs_i && rhs_i) { + int l = lhs_i->GetValue(); + int r = rhs_i->GetValue(); + switch (bin->GetOpcode()) { + case Opcode::Add: return ctx.GetConstInt(l + r); + case Opcode::Sub: return ctx.GetConstInt(l - r); + case Opcode::Mul: return ctx.GetConstInt(l * r); + case Opcode::Div: return (r != 0) ? ctx.GetConstInt(l / r) : nullptr; + case Opcode::Mod: return (r != 0) ? ctx.GetConstInt(l % r) : nullptr; + case Opcode::ICmpEQ: return ctx.GetConstInt(l == r ? 1 : 0); + case Opcode::ICmpNE: return ctx.GetConstInt(l != r ? 1 : 0); + case Opcode::ICmpLT: return ctx.GetConstInt(l < r ? 1 : 0); + case Opcode::ICmpGT: return ctx.GetConstInt(l > r ? 1 : 0); + case Opcode::ICmpLE: return ctx.GetConstInt(l <= r ? 1 : 0); + case Opcode::ICmpGE: return ctx.GetConstInt(l >= r ? 1 : 0); + default: break; + } + } + + if (lhs_f && rhs_f) { + float l = lhs_f->GetValue(); + float r = rhs_f->GetValue(); + switch (bin->GetOpcode()) { + case Opcode::FAdd: return ctx.GetConstFloat(l + r); + case Opcode::FSub: return ctx.GetConstFloat(l - r); + case Opcode::FMul: return ctx.GetConstFloat(l * r); + case Opcode::FDiv: return (r != 0.0f) ? ctx.GetConstFloat(l / r) : nullptr; + case Opcode::FCmpEQ: return ctx.GetConstInt(l == r ? 1 : 0); + case Opcode::FCmpNE: return ctx.GetConstInt(l != r ? 1 : 0); + case Opcode::FCmpLT: return ctx.GetConstInt(l < r ? 1 : 0); + case Opcode::FCmpGT: return ctx.GetConstInt(l > r ? 1 : 0); + case Opcode::FCmpLE: return ctx.GetConstInt(l <= r ? 1 : 0); + case Opcode::FCmpGE: return ctx.GetConstInt(l >= r ? 1 : 0); + default: break; + } + } + } + + return nullptr; +} + +bool RunConstFold(Function* func, Context& ctx) { + bool changed = false; + std::vector to_erase; + + for (const auto& bbPtr : func->GetBlocks()) { + for (const auto& instPtr : bbPtr->GetInstructions()) { + auto* inst = instPtr.get(); + if (inst->GetOpcode() == Opcode::Br || inst->GetOpcode() == Opcode::Ret || inst->GetOpcode() == Opcode::Phi) { + continue; + } + if (auto* folded = FoldInstruction(inst, ctx)) { + inst->ReplaceAllUsesWith(folded); + to_erase.push_back(inst); + changed = true; + } + } + } + + for (auto* inst : to_erase) { + inst->GetParent()->EraseInstruction(inst); + } + + return changed; +} + +} // namespace ir diff --git a/src/ir/passes/ConstProp.cpp b/src/ir/passes/ConstProp.cpp index 1768b71..99423f2 100644 --- a/src/ir/passes/ConstProp.cpp +++ b/src/ir/passes/ConstProp.cpp @@ -1,5 +1,75 @@ -// 常量传播(Constant Propagation): -// - 沿 use-def 关系传播已知常量 -// - 将可替换的 SSA 值改写为常量,暴露更多折叠机会 -// - 常与 ConstFold、DCE、CFGSimplify 迭代配合使用 +#include "ir/PassManager.h" +#include +#include +namespace ir { + +// Declare FoldInstruction from ConstFold.cpp +ConstantValue* FoldInstruction(Instruction* inst, Context& ctx); + +bool RunConstProp(Function* func, Context& ctx) { + bool changed = false; + bool local_changed = true; + + while (local_changed) { + local_changed = false; + std::vector to_erase; + + // 1. Fold instructions + for (const auto& bbPtr : func->GetBlocks()) { + for (const auto& instPtr : bbPtr->GetInstructions()) { + auto* inst = instPtr.get(); + if (inst->GetOpcode() == Opcode::Br || inst->GetOpcode() == Opcode::Ret || inst->GetOpcode() == Opcode::Phi) { + continue; + } + if (auto* folded = FoldInstruction(inst, ctx)) { + inst->ReplaceAllUsesWith(folded); + to_erase.push_back(inst); + local_changed = true; + changed = true; + } + } + } + + // Erase the folded instructions + for (auto* inst : to_erase) { + inst->GetParent()->EraseInstruction(inst); + } + + // 2. Simplify conditional branches + for (const auto& bbPtr : func->GetBlocks()) { + auto* bb = bbPtr.get(); + const auto& insts = bb->GetInstructions(); + if (insts.empty()) continue; + auto* term = insts.back().get(); + if (term->GetOpcode() == Opcode::Br) { + auto* br = static_cast(term); + if (br->IsConditional()) { + if (auto* cond_const = dynamic_cast(br->GetCondition())) { + BasicBlock* target = (cond_const->GetValue() != 0) ? br->GetIfTrue() : br->GetIfFalse(); + BasicBlock* dead_target = (cond_const->GetValue() != 0) ? br->GetIfFalse() : br->GetIfTrue(); + + if (dead_target != target) { + for (const auto& instPtr : dead_target->GetInstructions()) { + if (instPtr->GetOpcode() == Opcode::Phi) { + auto* phi = static_cast(instPtr.get()); + phi->RemoveIncomingBlock(bb); + } + } + } + + bb->EraseInstruction(br); + bb->Append(target); + local_changed = true; + changed = true; + break; // Restart loop to handle CFG shifts safely + } + } + } + } + } + + return changed; +} + +} // namespace ir diff --git a/src/ir/passes/DCE.cpp b/src/ir/passes/DCE.cpp index 5a0db91..a434966 100644 --- a/src/ir/passes/DCE.cpp +++ b/src/ir/passes/DCE.cpp @@ -1,4 +1,75 @@ -// 死代码删除(DCE): -// - 删除无用指令与无用基本块 -// - 通常与 CFG 简化配合使用 +#include "ir/PassManager.h" +#include +#include +#include +#include +namespace ir { + +bool RunDCE(Function* func) { + std::unordered_set live_instructions; + std::queue worklist; + + // 1. Mark inherently live instructions + for (const auto& bbPtr : func->GetBlocks()) { + for (const auto& instPtr : bbPtr->GetInstructions()) { + auto* inst = instPtr.get(); + bool inherently_live = false; + + switch (inst->GetOpcode()) { + case Opcode::Ret: + case Opcode::Br: + case Opcode::Store: + case Opcode::Call: + inherently_live = true; + break; + default: + break; + } + + if (inherently_live) { + live_instructions.insert(inst); + worklist.push(inst); + } + } + } + + // 2. Propagate liveness along the def-use chains + while (!worklist.empty()) { + auto* inst = worklist.front(); + worklist.pop(); + + for (size_t i = 0; i < inst->GetNumOperands(); ++i) { + auto* operand = inst->GetOperand(i); + if (auto* op_inst = dynamic_cast(operand)) { + if (live_instructions.find(op_inst) == live_instructions.end()) { + live_instructions.insert(op_inst); + worklist.push(op_inst); + } + } + } + } + + // 3. Sweep dead instructions + bool changed = false; + for (const auto& bbPtr : func->GetBlocks()) { + std::vector dead_instructions; + for (const auto& instPtr : bbPtr->GetInstructions()) { + auto* inst = instPtr.get(); + if (live_instructions.find(inst) == live_instructions.end()) { + dead_instructions.push_back(inst); + } + } + + if (!dead_instructions.empty()) { + changed = true; + for (auto* inst : dead_instructions) { + bbPtr->EraseInstruction(inst); + } + } + } + + return changed; +} + +} // namespace ir diff --git a/src/ir/passes/Mem2Reg.cpp b/src/ir/passes/Mem2Reg.cpp index 0b052ba..15a0c59 100644 --- a/src/ir/passes/Mem2Reg.cpp +++ b/src/ir/passes/Mem2Reg.cpp @@ -1,4 +1,228 @@ -// Mem2Reg(SSA 构造): -// - 将局部变量的 alloca/load/store 提升为 SSA 形式 -// - 插入 PHI 并重写使用,依赖支配树等分析 +#include "ir/PassManager.h" +#include +#include +#include +#include +#include +#include +#include +#include +namespace ir { + +// Predeclaration of rebuild CFG helper +void RebuildCFG(Function* func); + +bool RunMem2Reg(Function* func, Context& ctx) { + // 1. Build dominator tree + DominatorTree dom_tree(func); + dom_tree.Run(); + + // 2. Identify promotable allocas + std::vector promotable_allocas; + for (const auto& bbPtr : func->GetBlocks()) { + for (const auto& instPtr : bbPtr->GetInstructions()) { + if (instPtr->GetOpcode() == Opcode::Alloca) { + auto* alloca = static_cast(instPtr.get()); + // Alloca of scalar type: i32 or float (pointers to i32/float in minimum IR) + if (alloca->GetType()->IsPtrInt32() || alloca->GetType()->IsPtrFloat()) { + // Verify all uses are load/store + bool promotable = true; + for (const auto& use : alloca->GetUses()) { + auto* user = use.GetUser(); + auto* inst_user = dynamic_cast(user); + if (!inst_user) { + promotable = false; + break; + } + if (inst_user->GetOpcode() != Opcode::Load && inst_user->GetOpcode() != Opcode::Store) { + promotable = false; + break; + } + // For Store, alloca must be the pointer operand (operand index 1), not the value operand + if (inst_user->GetOpcode() == Opcode::Store) { + auto* store = static_cast(inst_user); + if (store->GetPtr() != alloca) { + promotable = false; + break; + } + } + } + if (promotable) { + promotable_allocas.push_back(alloca); + } + } + } + } + } + + if (promotable_allocas.empty()) { + return false; + } + + // 3. For each alloca, find definition blocks and place Phi nodes + // Maps each basic block and alloca to the inserted Phi instruction + std::unordered_map> phi_nodes; + std::unordered_set instructions_to_erase; + + for (auto* alloca : promotable_allocas) { + std::vector def_blocks; + for (const auto& use : alloca->GetUses()) { + auto* inst = dynamic_cast(use.GetUser()); + if (inst && inst->GetOpcode() == Opcode::Store) { + def_blocks.push_back(inst->GetParent()); + } + } + + // DF-based Phi placement + std::queue worklist; + std::unordered_set added; + std::unordered_set def_set(def_blocks.begin(), def_blocks.end()); + + for (auto* bb : def_blocks) { + worklist.push(bb); + added.insert(bb); + } + + while (!worklist.empty()) { + auto* x = worklist.front(); + worklist.pop(); + + for (auto* y : dom_tree.GetDominanceFrontier(x)) { + if (added.find(y) == added.end()) { + // Place Phi node in Y + std::shared_ptr ty = alloca->GetType()->IsPtrFloat() ? Type::GetFloatType() : Type::GetInt32Type(); + auto phi = std::make_unique(ty, ctx.NextTemp()); + auto* phi_ptr = phi.get(); + + // Insert Phi at the start of block Y + y->InsertInstructionAtBegin(std::move(phi)); + phi_nodes[y][alloca] = phi_ptr; + + added.insert(y); + if (def_set.find(y) == def_set.end()) { + worklist.push(y); + } + } + } + } + } + + // 4. Rename variables using DFS traversal of dominator tree + std::unordered_map> current_def; + + // Helper for generating default value + auto get_default_value = [&](AllocaInst* alloca) -> Value* { + if (alloca->GetType()->IsPtrFloat()) { + return ctx.GetConstFloat(0.0f); + } else { + return ctx.GetConstInt(0); + } + }; + + // Traversal stack for DFS: stores (block, parent_block) + struct TraversalNode { + BasicBlock* bb; + size_t child_idx; + }; + + std::stack visit_stack; + std::unordered_map>> pushed_defs; + + // DFS function + std::function rename_dfs = [&](BasicBlock* bb) { + auto& pushes = pushed_defs[bb]; + + // Push Phis in this block to current_def + auto phi_it = phi_nodes.find(bb); + if (phi_it != phi_nodes.end()) { + for (const auto& pair : phi_it->second) { + auto* alloca = pair.first; + auto* phi = pair.second; + current_def[alloca].push_back(phi); + pushes.push_back({alloca, 1}); + } + } + + // Process loads and stores + for (const auto& instPtr : bb->GetInstructions()) { + auto* inst = instPtr.get(); + if (inst->GetOpcode() == Opcode::Load) { + auto* load = static_cast(inst); + auto* ptr = load->GetPtr(); + if (auto* alloca = dynamic_cast(ptr)) { + if (std::find(promotable_allocas.begin(), promotable_allocas.end(), alloca) != promotable_allocas.end()) { + auto& defs = current_def[alloca]; + Value* val = defs.empty() ? get_default_value(alloca) : defs.back(); + load->ReplaceAllUsesWith(val); + instructions_to_erase.insert(load); + } + } + } else if (inst->GetOpcode() == Opcode::Store) { + auto* store = static_cast(inst); + auto* ptr = store->GetPtr(); + if (auto* alloca = dynamic_cast(ptr)) { + if (std::find(promotable_allocas.begin(), promotable_allocas.end(), alloca) != promotable_allocas.end()) { + current_def[alloca].push_back(store->GetValue()); + pushes.push_back({alloca, 1}); + instructions_to_erase.insert(store); + } + } + } + } + + // Fill Phi incoming values for CFG successors + for (auto* succ : bb->GetSuccessors()) { + auto succ_phi_it = phi_nodes.find(succ); + if (succ_phi_it != phi_nodes.end()) { + for (const auto& pair : succ_phi_it->second) { + auto* alloca = pair.first; + auto* phi = pair.second; + auto& defs = current_def[alloca]; + Value* val = defs.empty() ? get_default_value(alloca) : defs.back(); + phi->AddIncoming(val, bb); + } + } + } + + // Recurse to dominator tree children + for (auto* child : dom_tree.GetDominatedBlocks(bb)) { + rename_dfs(child); + } + + // Pop definitions pushed in this block + for (const auto& push : pushes) { + auto* alloca = push.first; + for (size_t k = 0; k < push.second; ++k) { + if (!current_def[alloca].empty()) { + current_def[alloca].pop_back(); + } + } + } + }; + + if (!func->GetBlocks().empty()) { + rename_dfs(func->GetEntry()); + } + + // 5. Clean up loads, stores and allocas + for (auto* alloca : promotable_allocas) { + instructions_to_erase.insert(alloca); + } + + for (const auto& bbPtr : func->GetBlocks()) { + std::vector to_remove; + for (const auto& instPtr : bbPtr->GetInstructions()) { + if (instructions_to_erase.find(instPtr.get()) != instructions_to_erase.end()) { + to_remove.push_back(instPtr.get()); + } + } + for (auto* inst : to_remove) { + bbPtr->EraseInstruction(inst); + } + } + + return true; +} + +} // namespace ir diff --git a/src/ir/passes/PassManager.cpp b/src/ir/passes/PassManager.cpp index 044328f..8e55f18 100644 --- a/src/ir/passes/PassManager.cpp +++ b/src/ir/passes/PassManager.cpp @@ -1 +1,35 @@ -// IR Pass 管理骨架。 +#include "ir/PassManager.h" +#include + +namespace ir { + +void RunFunctionOptimizationPasses(Function* func, Context& ctx) { + // 1. Promote memory-based local variables to SSA form using Mem2Reg + RunMem2Reg(func, ctx); + + // 2. Run scalar optimizations iteratively until convergence (no changes observed) + bool changed = true; + int iterations = 0; + const int max_iterations = 16; // Safe limit to prevent compile-time infinite loops + + while (changed && iterations < max_iterations) { + changed = false; + iterations++; + + changed |= RunConstProp(func, ctx); + changed |= RunConstFold(func, ctx); + changed |= RunCSE(func); + changed |= RunDCE(func); + changed |= RunCFGSimplify(func); + } +} + +void RunOptimizationPasses(Module& module) { + for (const auto& funcPtr : module.GetFunctions()) { + if (!funcPtr->GetBlocks().empty()) { + RunFunctionOptimizationPasses(funcPtr.get(), module.GetContext()); + } + } +} + +} // namespace ir diff --git a/src/main.cpp b/src/main.cpp index 3e81cdf..882ecb9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,6 +6,7 @@ #include "frontend/SyntaxTreePrinter.h" #if !COMPILER_PARSE_ONLY #include "ir/IR.h" +#include "ir/PassManager.h" #include "irgen/IRGen.h" #include "mir/MIR.h" #include "sem/Sema.h" @@ -36,6 +37,7 @@ int main(int argc, char** argv) { auto sema = RunSema(*comp_unit); auto module = GenerateIR(*comp_unit, sema); + ir::RunOptimizationPasses(*module); if (opts.emit_ir) { ir::IRPrinter printer; if (need_blank_line) { diff --git a/src/mir/Lowering.cpp b/src/mir/Lowering.cpp index 3cf005a..c9a7eba 100644 --- a/src/mir/Lowering.cpp +++ b/src/mir/Lowering.cpp @@ -31,6 +31,26 @@ uint32_t GetTypeSize(const ir::Type* type) { uint32_t GetAllocaSize(const ir::Instruction& inst) { auto type = inst.GetType(); if (type->IsPtrInt32() || type->IsPtrFloat()) { + // Check if any StoreInst in the parent function stores a pointer to this alloca + auto* parent_bb = inst.GetParent(); + if (parent_bb) { + auto* parent_func = parent_bb->GetParent(); + if (parent_func) { + for (const auto& bbPtr : parent_func->GetBlocks()) { + for (const auto& other_inst : bbPtr->GetInstructions()) { + if (other_inst->GetOpcode() == ir::Opcode::Store) { + auto* store = static_cast(other_inst.get()); + if (store->GetPtr() == &inst) { + auto val_ty = store->GetValue()->GetType(); + if (val_ty->IsPtrInt32() || val_ty->IsPtrFloat()) { + return 8; // Stores a 64-bit pointer + } + } + } + } + } + } + } return 4; } return GetTypeSize(type.get()); @@ -120,7 +140,8 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, if (alloca->GetOpcode() == ir::Opcode::Alloca) { auto it = slots.find(alloca); if (it != slots.end()) { - PhysReg val_reg = store.GetValue()->GetType()->IsFloat() ? PhysReg::S8 : PhysReg::W8; + PhysReg val_reg = store.GetValue()->GetType()->IsFloat() ? PhysReg::S8 : + (store.GetValue()->GetType()->IsPtrInt32() || store.GetValue()->GetType()->IsPtrFloat()) ? PhysReg::X8 : PhysReg::W8; EmitValueToReg(store.GetValue(), val_reg, slots, block); block.Append(Opcode::StoreStack, {Operand::Reg(val_reg), Operand::FrameIndex(it->second)}); return; @@ -129,7 +150,8 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, } // Dynamic store - PhysReg val_reg = store.GetValue()->GetType()->IsFloat() ? PhysReg::S8 : PhysReg::W8; + PhysReg val_reg = store.GetValue()->GetType()->IsFloat() ? PhysReg::S8 : + (store.GetValue()->GetType()->IsPtrInt32() || store.GetValue()->GetType()->IsPtrFloat()) ? PhysReg::X8 : PhysReg::W8; EmitValueToReg(store.GetValue(), val_reg, slots, block); EmitAddressToReg(store.GetPtr(), PhysReg::X9, slots, block); block.Append(Opcode::StrRegReg, {Operand::Reg(val_reg), Operand::Reg(PhysReg::X9)}); @@ -144,7 +166,8 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, if (alloca->GetOpcode() == ir::Opcode::Alloca) { auto it = slots.find(alloca); if (it != slots.end()) { - PhysReg val_reg = load.GetType()->IsFloat() ? PhysReg::S8 : PhysReg::W8; + PhysReg val_reg = load.GetType()->IsFloat() ? PhysReg::S8 : + (load.GetType()->IsPtrInt32() || load.GetType()->IsPtrFloat()) ? PhysReg::X8 : PhysReg::W8; block.Append(Opcode::LoadStack, {Operand::Reg(val_reg), Operand::FrameIndex(it->second)}); block.Append(Opcode::StoreStack, {Operand::Reg(val_reg), Operand::FrameIndex(dst_slot)}); return; @@ -153,7 +176,8 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, } // Dynamic load - PhysReg val_reg = load.GetType()->IsFloat() ? PhysReg::S8 : PhysReg::W8; + PhysReg val_reg = load.GetType()->IsFloat() ? PhysReg::S8 : + (load.GetType()->IsPtrInt32() || load.GetType()->IsPtrFloat()) ? PhysReg::X8 : PhysReg::W8; EmitAddressToReg(load.GetPtr(), PhysReg::X9, slots, block); block.Append(Opcode::LdrRegReg, {Operand::Reg(val_reg), Operand::Reg(PhysReg::X9)}); block.Append(Opcode::StoreStack, {Operand::Reg(val_reg), Operand::FrameIndex(dst_slot)}); @@ -301,22 +325,50 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, } case ir::Opcode::Br: { auto& br = static_cast(inst); - std::cerr << "DEBUG: Br is_conditional=" << br.IsConditional() << std::endl; + + auto emit_phi_copies = [&](const ir::BasicBlock* succ) { + if (!succ) return; + for (const auto& succ_inst : succ->GetInstructions()) { + if (succ_inst->GetOpcode() == ir::Opcode::Phi) { + auto* phi = static_cast(succ_inst.get()); + const ir::Value* incoming_val = nullptr; + for (size_t i = 0; i < phi->GetNumIncoming(); ++i) { + if (phi->GetIncomingBlock(i) == inst.GetParent()) { + incoming_val = phi->GetIncomingValue(i); + break; + } + } + if (incoming_val) { + auto slot_it = slots.find(phi); + if (slot_it != slots.end()) { + int phi_slot = slot_it->second; + PhysReg val_reg = phi->GetType()->IsFloat() ? PhysReg::S8 : + (phi->GetType()->IsPtrInt32() || phi->GetType()->IsPtrFloat()) ? PhysReg::X8 : PhysReg::W8; + EmitValueToReg(incoming_val, val_reg, slots, block); + block.Append(Opcode::StoreStack, {Operand::Reg(val_reg), Operand::FrameIndex(phi_slot)}); + } + } + } + } + }; + if (br.IsConditional()) { - std::cerr << "DEBUG: Cond pointer=" << br.GetCondition() << std::endl; - std::cerr << "DEBUG: True pointer=" << br.GetIfTrue() << " name=" << (br.GetIfTrue() ? br.GetIfTrue()->GetName() : "") << std::endl; - std::cerr << "DEBUG: False pointer=" << br.GetIfFalse() << " name=" << (br.GetIfFalse() ? br.GetIfFalse()->GetName() : "") << std::endl; + emit_phi_copies(br.GetIfTrue()); + emit_phi_copies(br.GetIfFalse()); EmitValueToReg(br.GetCondition(), PhysReg::W8, slots, block); block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::W9), Operand::Imm(0)}); block.Append(Opcode::CmpRR, {Operand::Reg(PhysReg::W8), Operand::Reg(PhysReg::W9)}); block.Append(Opcode::BCond, {Operand::Cond("ne"), Operand::Label(br.GetIfTrue()->GetName())}); block.Append(Opcode::B, {Operand::Label(br.GetIfFalse()->GetName())}); } else { - std::cerr << "DEBUG: Dest pointer=" << br.GetDest() << " name=" << (br.GetDest() ? br.GetDest()->GetName() : "") << std::endl; + emit_phi_copies(br.GetDest()); block.Append(Opcode::B, {Operand::Label(br.GetDest()->GetName())}); } return; } + case ir::Opcode::Phi: { + return; + } case ir::Opcode::Ret: { auto& ret = static_cast(inst); if (ret.GetValue()) { @@ -369,8 +421,14 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function, slots.emplace(&inst, dst_slot); // Load base pointer address into X8 - if (dynamic_cast(gep.GetPtr()) || gep.GetPtr()->IsGlobalValue()) { + if (gep.GetPtr()->IsGlobalValue()) { EmitAddressToReg(gep.GetPtr(), PhysReg::X8, slots, block); + } else if (auto* alloca = dynamic_cast(gep.GetPtr())) { + if (alloca->GetType()->IsArray()) { + EmitAddressToReg(gep.GetPtr(), PhysReg::X8, slots, block); + } else { + EmitValueToReg(gep.GetPtr(), PhysReg::X8, slots, block); + } } else { EmitValueToReg(gep.GetPtr(), PhysReg::X8, slots, block); } @@ -428,6 +486,16 @@ std::vector> LowerToMIR(const ir::Module& modul bb_map[bbPtr.get()] = &mbb; } + // Pre-allocate stack slots for all Phi instructions in the function + for (const auto& bbPtr : func.GetBlocks()) { + for (const auto& inst : bbPtr->GetInstructions()) { + if (inst->GetOpcode() == ir::Opcode::Phi) { + int slot = machine_func->CreateFrameIndex(GetTypeSize(inst->GetType().get())); + slots.emplace(inst.get(), slot); + } + } + } + auto& entry_block = *bb_map.at(func.GetEntry()); // Lower function arguments at the start of the entry block