[midend]添加了后端中对GEP指令的支持

2025-07-24 00:39:11 +08:00
parent 56b376914b
commit f4d231b989
2 changed files with 161 additions and 1 deletions
--- a/src/RISCv64ISel.cpp
+++ b/src/RISCv64ISel.cpp
@@ -10,7 +10,7 @@ namespace sysy {

 // DAG节点定义 (内部实现)
 struct RISCv64ISel::DAGNode {
-    enum NodeKind { CONSTANT, LOAD, STORE, BINARY, CALL, RETURN, BRANCH, ALLOCA_ADDR, UNARY, MEMSET };
+    enum NodeKind { CONSTANT, LOAD, STORE, BINARY, CALL, RETURN, BRANCH, ALLOCA_ADDR, UNARY, MEMSET, GET_ELEMENT_PTR};
    NodeKind kind;
    Value* value = nullptr;
    std::vector<DAGNode*> operands;
@@ -792,6 +792,112 @@ void RISCv64ISel::selectNode(DAGNode* node) {
            break;
        }

+        case DAGNode::GET_ELEMENT_PTR: {
+            auto gep = dynamic_cast<GetElementPtrInst*>(node->value);
+            // 获取GEP指令最终要写入的目标虚拟寄存器
+            auto result_vreg = getVReg(gep);
+
+            // --- Step 1: 获取基地址 ---
+            auto base_ptr_node = node->operands[0];
+            // 创建一个新的vreg作为地址累加器
+            auto current_addr_vreg = getNewVReg();
+
+            // 判断基指针是局部数组还是全局数组，并生成获取其基地址的指令
+            if (auto alloca_base = dynamic_cast<AllocaInst*>(base_ptr_node->value)) {
+                // 基指针是局部数组(在栈上)，使用FRAME_ADDR伪指令获取其地址
+                auto frame_addr_instr = std::make_unique<MachineInstr>(RVOpcodes::FRAME_ADDR);
+                frame_addr_instr->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
+                frame_addr_instr->addOperand(std::make_unique<RegOperand>(getVReg(alloca_base)));
+                CurMBB->addInstruction(std::move(frame_addr_instr));
+            } else if (auto global_base = dynamic_cast<GlobalValue*>(base_ptr_node->value)) {
+                // 基指针是全局数组，使用LA伪指令加载其地址
+                auto la_instr = std::make_unique<MachineInstr>(RVOpcodes::LA);
+                la_instr->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
+                la_instr->addOperand(std::make_unique<LabelOperand>(global_base->getName()));
+                CurMBB->addInstruction(std::move(la_instr));
+            } else {
+                // 如果基指针是另一个计算结果（例如函数参数传递来的数组地址），
+                // 直接用MV指令将其值赋给地址累加器
+                auto base_vreg = getVReg(base_ptr_node->value);
+                auto mv = std::make_unique<MachineInstr>(RVOpcodes::MV);
+                mv->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
+                mv->addOperand(std::make_unique<RegOperand>(base_vreg));
+                CurMBB->addInstruction(std::move(mv));
+            }
+
+            // --- Step 2: 迭代处理每个索引，累加偏移量 ---
+            // 获取数组的类型，并剥掉最外层的指针
+            Type* current_type = gep->getBasePointer()->getType()->as<PointerType>()->getBaseType();
+
+            // 遍历所有索引 (DAGNode的操作数从第1个开始是索引)
+            for (size_t i = 1; i < node->operands.size(); ++i) {
+                auto index_node = node->operands[i];
+                
+                // [修复] 核心修复逻辑在这里
+                // GEP可以索引数组，也可以索引指针（数组退化的结果）
+                unsigned element_size = 0;
+                Type* element_type = nullptr;
+
+                if (auto array_type = current_type->as<ArrayType>()) {
+                    // 情况一：当前正在索引一个数组类型，例如 a[i] 中的 a
+                    element_type = array_type->getElementType();
+                    element_size = getTypeSizeInBytes(element_type);
+                } else if (auto ptr_type = current_type->as<PointerType>()) {
+                    // 情况二：当前正在索引一个指针类型，例如函数参数 p[] 经过退化后的 p
+                    element_type = ptr_type->getBaseType();
+                    element_size = getTypeSizeInBytes(element_type);
+                } else {
+                    // 如果既不是数组也不是指针，那么无法进行索引
+                    assert(false && "GEP can only index into an array or pointer type.");
+                }
+
+                // 更新current_type，为下一次迭代做准备（处理多维数组）
+                current_type = element_type;
+
+                // --- 计算偏移量: offset = index * element_size ---
+                auto offset_vreg = getNewVReg();
+                auto index_vreg = getVReg(index_node->value);
+                
+                // 如果索引本身是个常量，需要先用LI指令加载到虚拟寄存器中
+                if (auto const_index = dynamic_cast<ConstantValue*>(index_node->value)) {
+                    auto li = std::make_unique<MachineInstr>(RVOpcodes::LI);
+                    li->addOperand(std::make_unique<RegOperand>(index_vreg));
+                    li->addOperand(std::make_unique<ImmOperand>(const_index->getInt()));
+                    CurMBB->addInstruction(std::move(li));
+                }
+
+                // 将元素大小加载到临时寄存器
+                auto size_vreg = getNewVReg();
+                auto li_size = std::make_unique<MachineInstr>(RVOpcodes::LI);
+                li_size->addOperand(std::make_unique<RegOperand>(size_vreg));
+                li_size->addOperand(std::make_unique<ImmOperand>(element_size));
+                CurMBB->addInstruction(std::move(li_size));
+
+                // 执行乘法: offset_vreg = index_vreg * size_vreg
+                // SysY中数组索引计算用32位乘法足够
+                auto mul = std::make_unique<MachineInstr>(RVOpcodes::MULW);
+                mul->addOperand(std::make_unique<RegOperand>(offset_vreg));
+                mul->addOperand(std::make_unique<RegOperand>(index_vreg));
+                mul->addOperand(std::make_unique<RegOperand>(size_vreg));
+                CurMBB->addInstruction(std::move(mul));
+
+                // 累加地址: current_addr_vreg = current_addr_vreg + offset_vreg
+                // 指针地址是64位的，用ADD指令
+                auto add = std::make_unique<MachineInstr>(RVOpcodes::ADD);
+                add->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
+                add->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
+                add->addOperand(std::make_unique<RegOperand>(offset_vreg));
+                CurMBB->addInstruction(std::move(add));
+            }
+            
+            // --- Step 3: 将最终地址存入GEP的目标vreg ---
+            auto final_mv = std::make_unique<MachineInstr>(RVOpcodes::MV);
+            final_mv->addOperand(std::make_unique<RegOperand>(result_vreg));
+            final_mv->addOperand(std::make_unique<RegOperand>(current_addr_vreg));
+            CurMBB->addInstruction(std::move(final_mv));
+            break;
+        }
+
        default:
            throw std::runtime_error("Unsupported DAGNode kind in ISel");
    }
@@ -850,6 +956,21 @@ std::vector<std::unique_ptr<RISCv64ISel::DAGNode>> RISCv64ISel::build_dag(BasicB
                    std::cout << "  -> Operand " << i << " has kind: " << memset_node->operands[i]->kind << std::endl;
                }
            }
+        } else if (auto gep = dynamic_cast<GetElementPtrInst*>(inst)) {
+            // 如果这个GEP指令已经创建过节点，则跳过
+            if(value_to_node.count(gep)) continue;
+
+            // 创建一个新的 GET_ELEMENT_PTR 类型的节点
+            auto gep_node = create_node(DAGNode::GET_ELEMENT_PTR, gep, value_to_node, nodes_storage);
+            
+            // 第一个操作数是基指针（即数组本身）
+            gep_node->operands.push_back(get_operand_node(gep->getBasePointer(), value_to_node, nodes_storage));
+            
+            // 依次添加所有索引作为后续的操作数
+            for (auto index : gep->getIndices()) {
+                // [修复] 从 Use 对象中获取真正的 Value*
+                gep_node->operands.push_back(get_operand_node(index->getValue(), value_to_node, nodes_storage));
+            }
        } else if (auto load = dynamic_cast<LoadInst*>(inst)) {
            auto load_node = create_node(DAGNode::LOAD, load, value_to_node, nodes_storage);
            load_node->operands.push_back(get_operand_node(load->getPointer(), value_to_node, nodes_storage));
@@ -892,6 +1013,43 @@ std::vector<std::unique_ptr<RISCv64ISel::DAGNode>> RISCv64ISel::build_dag(BasicB
    return nodes_storage;
 }

+/**
+ * @brief 计算一个类型在内存中占用的字节数。
+ * @param type 需要计算大小的IR类型。
+ * @return 该类型占用的字节数。
+ */
+unsigned RISCv64ISel::getTypeSizeInBytes(Type* type) {
+    if (!type) {
+        assert(false && "Cannot get size of a null type.");
+        return 0;
+    }
+
+    switch (type->getKind()) {
+        // 对于SysY语言，基本类型int和float都占用4字节
+        case Type::kInt:
+        case Type::kFloat:
+            return 4;
+
+        // 指针类型在RISC-V 64位架构下占用8字节
+        // 虽然SysY没有'int*'语法，但数组变量在IR层面本身就是指针类型
+        case Type::kPointer:
+            return 8;
+
+        // 数组类型的总大小 = 元素数量 * 单个元素的大小
+        case Type::kArray: {
+            auto arrayType = type->as<ArrayType>();
+            // 递归调用以计算元素大小
+            return arrayType->getNumElements() * getTypeSizeInBytes(arrayType->getElementType());
+        }
+
+        // 其他类型，如Void, Label等不占用栈空间，或者不应该出现在这里
+        default:
+            // 如果遇到未处理的类型，触发断言，方便调试
+            assert(false && "Unsupported type for size calculation.");
+            return 0;
+    }
+}
+
 // [新] 打印DAG图以供调试的辅助函数
 void RISCv64ISel::print_dag(const std::vector<std::unique_ptr<DAGNode>>& dag, const std::string& bb_name) {
    // 检查是否有DEBUG宏或者全局变量，避免在非调试模式下打印
--- a/src/include/RISCv64ISel.h
+++ b/src/include/RISCv64ISel.h
@@ -33,6 +33,8 @@ private:
    std::vector<std::unique_ptr<DAGNode>> build_dag(BasicBlock* bb);
    DAGNode* get_operand_node(Value* val_ir, std::map<Value*, DAGNode*>&, std::vector<std::unique_ptr<DAGNode>>&);
    DAGNode* create_node(int kind, Value* val, std::map<Value*, DAGNode*>&, std::vector<std::unique_ptr<DAGNode>>&);
+    // 用于计算类型大小的辅助函数
+    unsigned getTypeSizeInBytes(Type* type);
    
    void print_dag(const std::vector<std::unique_ptr<DAGNode>>& dag, const std::string& bb_name);