[midend]后端适配GEP

2025-07-25 12:00:47 +08:00
parent 259d71cde5
commit d50912ee4c
2 changed files with 68 additions and 72 deletions
--- a/src/RISCv64ISel.cpp
+++ b/src/RISCv64ISel.cpp
@@ -794,30 +794,23 @@ void RISCv64ISel::selectNode(DAGNode* node) {

        case DAGNode::GET_ELEMENT_PTR: {
            auto gep = dynamic_cast<GetElementPtrInst*>(node->value);
-            // 获取GEP指令最终要写入的目标虚拟寄存器
            auto result_vreg = getVReg(gep);

-            // --- Step 1: 获取基地址 ---
+            // --- Step 1: 获取基地址 (此部分逻辑正确，保持不变) ---
            auto base_ptr_node = node->operands[0];
-            // 创建一个新的vreg作为地址累加器
            auto current_addr_vreg = getNewVReg();

-            // 判断基指针是局部数组还是全局数组，并生成获取其基地址的指令
            if (auto alloca_base = dynamic_cast<AllocaInst*>(base_ptr_node->value)) {
-                // 基指针是局部数组(在栈上)，使用FRAME_ADDR伪指令获取其地址
                auto frame_addr_instr = std::make_unique<MachineInstr>(RVOpcodes::FRAME_ADDR);
                frame_addr_instr->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
                frame_addr_instr->addOperand(std::make_unique<RegOperand>(getVReg(alloca_base)));
                CurMBB->addInstruction(std::move(frame_addr_instr));
            } else if (auto global_base = dynamic_cast<GlobalValue*>(base_ptr_node->value)) {
-                // 基指针是全局数组，使用LA伪指令加载其地址
                auto la_instr = std::make_unique<MachineInstr>(RVOpcodes::LA);
                la_instr->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
                la_instr->addOperand(std::make_unique<LabelOperand>(global_base->getName()));
                CurMBB->addInstruction(std::move(la_instr));
            } else {
-                // 如果基指针是另一个计算结果（例如函数参数传递来的数组地址），
-                // 直接用MV指令将其值赋给地址累加器
                auto base_vreg = getVReg(base_ptr_node->value);
                auto mv = std::make_unique<MachineInstr>(RVOpcodes::MV);
                mv->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
@@ -825,72 +818,75 @@ void RISCv64ISel::selectNode(DAGNode* node) {
                CurMBB->addInstruction(std::move(mv));
            }

-            // --- Step 2: 迭代处理每个索引，累加偏移量 ---
-            // 获取数组的类型，并剥掉最外层的指针
+            // --- Step 2: [最终权威版] 遵循LLVM GEP语义迭代计算地址 ---
+            
+            // 初始被索引的类型，是基指针指向的那个类型 (例如, [2 x i32])
            Type* current_type = gep->getBasePointer()->getType()->as<PointerType>()->getBaseType();

-            // 遍历所有索引 (DAGNode的操作数从第1个开始是索引)
-            for (size_t i = 1; i < node->operands.size(); ++i) {
-                auto index_node = node->operands[i];
+            // 迭代处理 GEP 的每一个索引
+            for (size_t i = 0; i < gep->getNumIndices(); ++i) {
+                Value* indexValue = gep->getIndex(i);

-                // [修复] 核心修复逻辑在这里
-                // GEP可以索引数组，也可以索引指针（数组退化的结果）
-                unsigned element_size = 0;
-                Type* element_type = nullptr;
+                // GEP的第一个索引以整个 `current_type` 的大小为步长。
+                // 后续的索引则以 `current_type` 的元素大小为步长。
+                // 这一步是计算地址偏移的关键。
+                unsigned stride = getTypeSizeInBytes(current_type);
                
+                // 如果步长为0（例如对一个void类型或空结构体索引），则不产生任何偏移
+                if (stride != 0) {
+                    // --- 为当前索引和步长生成偏移计算指令 ---
+                    auto offset_vreg = getNewVReg();
+                    auto index_vreg = getVReg(indexValue);
+
+                    // 如果索引是常量，先用 LI 指令加载到虚拟寄存器
+                    if (auto const_index = dynamic_cast<ConstantValue*>(indexValue)) {
+                        auto li = std::make_unique<MachineInstr>(RVOpcodes::LI);
+                        li->addOperand(std::make_unique<RegOperand>(index_vreg));
+                        li->addOperand(std::make_unique<ImmOperand>(const_index->getInt()));
+                        CurMBB->addInstruction(std::move(li));
+                    }
+                    
+                    // 优化：如果步长是1，可以直接移动(MV)作为偏移量，无需乘法
+                    if (stride == 1) {
+                        auto mv = std::make_unique<MachineInstr>(RVOpcodes::MV);
+                        mv->addOperand(std::make_unique<RegOperand>(offset_vreg));
+                        mv->addOperand(std::make_unique<RegOperand>(index_vreg));
+                        CurMBB->addInstruction(std::move(mv));
+                    } else {
+                        // 步长不为1，需要生成乘法指令
+                        auto size_vreg = getNewVReg();
+                        auto li_size = std::make_unique<MachineInstr>(RVOpcodes::LI);
+                        li_size->addOperand(std::make_unique<RegOperand>(size_vreg));
+                        li_size->addOperand(std::make_unique<ImmOperand>(stride));
+                        CurMBB->addInstruction(std::move(li_size));
+                        
+                        auto mul = std::make_unique<MachineInstr>(RVOpcodes::MULW);
+                        mul->addOperand(std::make_unique<RegOperand>(offset_vreg));
+                        mul->addOperand(std::make_unique<RegOperand>(index_vreg));
+                        mul->addOperand(std::make_unique<RegOperand>(size_vreg));
+                        CurMBB->addInstruction(std::move(mul));
+                    }
+
+                    // 将计算出的偏移量累加到当前地址上
+                    auto add = std::make_unique<MachineInstr>(RVOpcodes::ADD);
+                    add->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
+                    add->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
+                    add->addOperand(std::make_unique<RegOperand>(offset_vreg));
+                    CurMBB->addInstruction(std::move(add));
+                }
+
+                // --- 为下一次迭代更新类型：深入一层 ---
                if (auto array_type = current_type->as<ArrayType>()) {
-                    // 情况一：当前正在索引一个数组类型，例如 a[i] 中的 a
-                    element_type = array_type->getElementType();
-                    element_size = getTypeSizeInBytes(element_type);
+                    current_type = array_type->getElementType();
                } else if (auto ptr_type = current_type->as<PointerType>()) {
-                    // 情况二：当前正在索引一个指针类型，例如函数参数 p[] 经过退化后的 p
-                    element_type = ptr_type->getBaseType();
-                    element_size = getTypeSizeInBytes(element_type);
-                } else {
-                    // 如果既不是数组也不是指针，那么无法进行索引
-                    assert(false && "GEP can only index into an array or pointer type.");
+                    // 这种情况不应该在第二次迭代后发生，但为了逻辑健壮性保留
+                    current_type = ptr_type->getBaseType();
                }
-
-                // 更新current_type，为下一次迭代做准备（处理多维数组）
-                current_type = element_type;
-
-                // --- 计算偏移量: offset = index * element_size ---
-                auto offset_vreg = getNewVReg();
-                auto index_vreg = getVReg(index_node->value);
-                
-                // 如果索引本身是个常量，需要先用LI指令加载到虚拟寄存器中
-                if (auto const_index = dynamic_cast<ConstantValue*>(index_node->value)) {
-                    auto li = std::make_unique<MachineInstr>(RVOpcodes::LI);
-                    li->addOperand(std::make_unique<RegOperand>(index_vreg));
-                    li->addOperand(std::make_unique<ImmOperand>(const_index->getInt()));
-                    CurMBB->addInstruction(std::move(li));
-                }
-
-                // 将元素大小加载到临时寄存器
-                auto size_vreg = getNewVReg();
-                auto li_size = std::make_unique<MachineInstr>(RVOpcodes::LI);
-                li_size->addOperand(std::make_unique<RegOperand>(size_vreg));
-                li_size->addOperand(std::make_unique<ImmOperand>(element_size));
-                CurMBB->addInstruction(std::move(li_size));
-
-                // 执行乘法: offset_vreg = index_vreg * size_vreg
-                // SysY中数组索引计算用32位乘法足够
-                auto mul = std::make_unique<MachineInstr>(RVOpcodes::MULW);
-                mul->addOperand(std::make_unique<RegOperand>(offset_vreg));
-                mul->addOperand(std::make_unique<RegOperand>(index_vreg));
-                mul->addOperand(std::make_unique<RegOperand>(size_vreg));
-                CurMBB->addInstruction(std::move(mul));
-
-                // 累加地址: current_addr_vreg = current_addr_vreg + offset_vreg
-                // 指针地址是64位的，用ADD指令
-                auto add = std::make_unique<MachineInstr>(RVOpcodes::ADD);
-                add->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
-                add->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
-                add->addOperand(std::make_unique<RegOperand>(offset_vreg));
-                CurMBB->addInstruction(std::move(add));
+                // 如果`current_type`已经是i32等基本类型，它会保持不变，
+                // 但下一次循环如果还有索引，`getTypeSizeInBytes(i32)`仍然能正确计算步长。
            }
            
-            // --- Step 3: 将最终地址存入GEP的目标vreg ---
+            // --- Step 3: 将最终计算出的地址存入GEP的目标虚拟寄存器 (保持不变) ---
            auto final_mv = std::make_unique<MachineInstr>(RVOpcodes::MV);
            final_mv->addOperand(std::make_unique<RegOperand>(result_vreg));
            final_mv->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
--- a/src/include/RISCv64Passes.h
+++ b/src/include/RISCv64Passes.h
@@ -6,13 +6,13 @@
 namespace sysy {

 /**
- * @class Pass
+ * @class BackendPass
 * @brief 所有优化Pass的抽象基类 (可选，但推荐)
 * * 定义一个通用的接口，所有优化都应该实现它。
 */
-class Pass {
+class BackendPass {
 public:
-    virtual ~Pass() = default;
+    virtual ~BackendPass() = default;
    virtual void runOnMachineFunction(MachineFunction* mfunc) = 0;
 };

@@ -25,7 +25,7 @@ public:
 * * 在虚拟寄存器上进行操作，此时调度自由度最大，
 * 主要目标是隐藏指令延迟，提高流水线效率。
 */
-class PreRA_Scheduler : public Pass {
+class PreRA_Scheduler : public BackendPass {
 public:
    void runOnMachineFunction(MachineFunction* mfunc) override;
 };
@@ -39,7 +39,7 @@ public:
 * * 在已分配物理寄存器的指令流上，通过一个小的滑动窗口来查找
 * 并替换掉一些冗余或低效的指令模式。
 */
-class PeepholeOptimizer : public Pass {
+class PeepholeOptimizer : public BackendPass {
 public:
    void runOnMachineFunction(MachineFunction* mfunc) override;
 };
@@ -50,7 +50,7 @@ public:
 * * 主要目标是优化寄存器分配器插入的spill/fill代码(lw/sw)，
 * 尝试将加载指令提前，以隐藏其访存延迟。
 */
-class PostRA_Scheduler : public Pass {
+class PostRA_Scheduler : public BackendPass {
 public:
    void runOnMachineFunction(MachineFunction* mfunc) override;
 };