[backend]引入浮点数支持，但目前寄存器分配存在问题

2025-07-30 15:07:29 +08:00
parent 860ebcd447
commit dd38bdc133
10 changed files with 748 additions and 199 deletions
--- a/src/backend/RISCv64/Handler/CalleeSavedHandler.cpp
+++ b/src/backend/RISCv64/Handler/CalleeSavedHandler.cpp
@@ -14,23 +14,34 @@ bool CalleeSavedHandler::runOnFunction(Function *F, AnalysisManager& AM) {
 void CalleeSavedHandler::runOnMachineFunction(MachineFunction* mfunc) {
    // 此 Pass 负责分析、分配栈空间并插入 callee-saved 寄存器的保存/恢复指令。
    // 它通过与 FrameInfo 协作，确保为 callee-saved 寄存器分配的空间与局部变量/溢出槽的空间不冲突。
-    // 这样做可以使生成的 sd/ld 指令能被后续的优化 Pass (如 PostRA-Scheduler) 处理。
-
+    
    StackFrameInfo& frame_info = mfunc->getFrameInfo();
-    std::set<PhysicalReg> used_callee_saved;
+    
+    // [修改] 分别记录被使用的整数和浮点被调用者保存寄存器
+    std::set<PhysicalReg> used_int_callee_saved;
+    std::set<PhysicalReg> used_fp_callee_saved;

-    // 1. 扫描所有指令，找出被使用的s寄存器 (s1-s11)
+    // 1. 扫描所有指令，找出被使用的s寄存器 (s1-s11) 和 fs寄存器 (fs0-fs11)
    for (auto& mbb : mfunc->getBlocks()) {
        for (auto& instr : mbb->getInstructions()) {
            for (auto& op : instr->getOperands()) {
+                
                auto check_and_insert_reg = [&](RegOperand* reg_op) {
                    if (!reg_op->isVirtual()) {
                        PhysicalReg preg = reg_op->getPReg();
+                        
+                        // [修改] 区分整数和浮点被调用者保存寄存器
+                        // s0 由序言/尾声处理器专门处理，这里不计入
                        if (preg >= PhysicalReg::S1 && preg <= PhysicalReg::S11) {
-                            used_callee_saved.insert(preg);
+                            used_int_callee_saved.insert(preg);
+                        } 
+                        // fs0-fs11 在我们的枚举中对应 f8,f9,f18-f27
+                        else if ((preg >= PhysicalReg::F8 && preg <= PhysicalReg::F9) || (preg >= PhysicalReg::F18 && preg <= PhysicalReg::F27)) {
+                            used_fp_callee_saved.insert(preg);
                        }
                    }
                };
+
                if (op->getKind() == MachineOperand::KIND_REG) {
                    check_and_insert_reg(static_cast<RegOperand*>(op.get()));
                } else if (op->getKind() == MachineOperand::KIND_MEM) {
@@ -40,83 +51,93 @@ void CalleeSavedHandler::runOnMachineFunction(MachineFunction* mfunc) {
        }
    }

-    if (used_callee_saved.empty()) {
+    // 如果没有使用任何需要处理的 callee-saved 寄存器，则直接返回
+    if (used_int_callee_saved.empty() && used_fp_callee_saved.empty()) {
        frame_info.callee_saved_size = 0; // 确保大小被初始化
-        return; // 无需操作
+        return;
    }

-    // 2. 计算为 callee-saved 寄存器分配的栈空间
-    //    这里的关键是，偏移的基准点要在局部变量和溢出槽之下。
-    int callee_saved_size = used_callee_saved.size() * 8;
-    frame_info.callee_saved_size = callee_saved_size; // 将大小存入 FrameInfo
+    // 2. 计算为 callee-saved 寄存器分配的栈空间大小
+    //    每个寄存器在RV64中都占用8字节
+    int callee_saved_size = (used_int_callee_saved.size() + used_fp_callee_saved.size()) * 8;
+    frame_info.callee_saved_size = callee_saved_size;

-    // 3. 计算无冲突的栈偏移
-    //    栈向下增长，所以偏移是负数。
-    //    ra/s0 占用 -8 和 -16。局部变量和溢出区在它们之下。callee-saved 区在更下方。
-    //    我们使用相对于 s0 的偏移。s0 将指向栈顶 (sp + total_size)。
-    int base_offset = -16 - frame_info.locals_size - frame_info.spill_size;
-
-    // 为了栈帧布局确定性，对寄存器进行排序
-    std::vector<PhysicalReg> sorted_regs(used_callee_saved.begin(), used_callee_saved.end());
-    std::sort(sorted_regs.begin(), sorted_regs.end());
-    
-    // 4. 在函数序言插入保存指令
+    // 3. 在函数序言中插入保存指令
    MachineBasicBlock* entry_block = mfunc->getBlocks().front().get();
    auto& entry_instrs = entry_block->getInstructions();
-    auto prologue_end = entry_instrs.begin();
+    // 插入点通常在函数入口标签之后
+    auto insert_pos = entry_instrs.begin();
+    if (!entry_instrs.empty() && entry_instrs.front()->getOpcode() == RVOpcodes::LABEL) {
+        insert_pos = std::next(insert_pos);
+    }
    
-    // 找到序言结束的位置（通常是addi s0, sp, size之后，但为了让优化器看到，我们插在更前面）
-    // 合理的位置是在 IR 指令开始之前，即在任何非序言指令（如第一个标签）之前。
-    // 为简单起见，我们直接插入到块的开头，后续重排 pass 会处理。
-    // (更优的实现会寻找一个特定的插入点)
+    // 为了布局确定性，对寄存器进行排序并按序保存
+    std::vector<PhysicalReg> sorted_int_regs(used_int_callee_saved.begin(), used_int_callee_saved.end());
+    std::vector<PhysicalReg> sorted_fp_regs(used_fp_callee_saved.begin(), used_fp_callee_saved.end());
+    std::sort(sorted_int_regs.begin(), sorted_int_regs.end());
+    std::sort(sorted_fp_regs.begin(), sorted_fp_regs.end());
+    
+    int current_offset = -16; // ra和s0已占用-8和-16，从-24开始分配

-    int current_offset = base_offset;
-    for (PhysicalReg reg : sorted_regs) {
+    // 插入整数保存指令 (sd)
+    for (PhysicalReg reg : sorted_int_regs) {
+        current_offset -= 8;
        auto sd = std::make_unique<MachineInstr>(RVOpcodes::SD);
        sd->addOperand(std::make_unique<RegOperand>(reg));
        sd->addOperand(std::make_unique<MemOperand>(
            std::make_unique<RegOperand>(PhysicalReg::S0), // 基址为帧指针 s0
            std::make_unique<ImmOperand>(current_offset)
        ));
-        // 从头部插入，但要放在函数标签之后
-        entry_instrs.insert(entry_instrs.begin() + 1, std::move(sd)); 
+        entry_instrs.insert(insert_pos, std::move(sd));
+    }
+    
+    // 插入浮点保存指令 (fsd)
+    for (PhysicalReg reg : sorted_fp_regs) {
        current_offset -= 8;
+        auto fsd = std::make_unique<MachineInstr>(RVOpcodes::FSD); // 使用浮点保存指令
+        fsd->addOperand(std::make_unique<RegOperand>(reg));
+        fsd->addOperand(std::make_unique<MemOperand>(
+            std::make_unique<RegOperand>(PhysicalReg::S0),
+            std::make_unique<ImmOperand>(current_offset)
+        ));
+        entry_instrs.insert(insert_pos, std::move(fsd));
    }

-    // 5. 在函数结尾（ret之前）插入恢复指令，使用反向遍历来避免迭代器失效
+    // 4. 在函数结尾（ret之前）插入恢复指令
    for (auto& mbb : mfunc->getBlocks()) {
-        // 使用手动控制的反向循环
        for (auto it = mbb->getInstructions().begin(); it != mbb->getInstructions().end(); ++it) {
            if ((*it)->getOpcode() == RVOpcodes::RET) {
-                // 1. 创建一个临时vector来存储所有需要插入的恢复指令
-                std::vector<std::unique_ptr<MachineInstr>> restore_instrs;
+                current_offset = -16; // 重置偏移量用于恢复
                
-                int current_offset_load = base_offset;
-                // 以相同的顺序（例如 s1, s2, ...）创建恢复指令
-                for (PhysicalReg reg : sorted_regs) {
+                // 恢复整数寄存器 (ld) - 以与保存时相同的顺序
+                for (PhysicalReg reg : sorted_int_regs) {
+                    current_offset -= 8;
                    auto ld = std::make_unique<MachineInstr>(RVOpcodes::LD);
                    ld->addOperand(std::make_unique<RegOperand>(reg));
                    ld->addOperand(std::make_unique<MemOperand>(
                        std::make_unique<RegOperand>(PhysicalReg::S0),
-                        std::make_unique<ImmOperand>(current_offset_load)
+                        std::make_unique<ImmOperand>(current_offset)
                    ));
-                    restore_instrs.push_back(std::move(ld));
-                    current_offset_load -= 8;
+                    mbb->getInstructions().insert(it, std::move(ld));
                }

-                // 2. 使用 make_move_iterator 一次性将所有恢复指令插入到 RET 指令之前
-                //    这可以高效地转移指令的所有权，并且只让迭代器失效一次。
-                if (!restore_instrs.empty()) {
-                    mbb->getInstructions().insert(it, 
-                        std::make_move_iterator(restore_instrs.begin()),
-                        std::make_move_iterator(restore_instrs.end())
-                    );
+                // 恢复浮点寄存器 (fld)
+                for (PhysicalReg reg : sorted_fp_regs) {
+                    current_offset -= 8;
+                    auto fld = std::make_unique<MachineInstr>(RVOpcodes::FLD); // 使用浮点加载指令
+                    fld->addOperand(std::make_unique<RegOperand>(reg));
+                    fld->addOperand(std::make_unique<MemOperand>(
+                        std::make_unique<RegOperand>(PhysicalReg::S0),
+                        std::make_unique<ImmOperand>(current_offset)
+                    ));
+                    mbb->getInstructions().insert(it, std::move(fld));
                }
-
-                // 找到了RET并处理完毕后，就可以跳出内层循环，继续寻找下一个基本块
-                break; 
+                
+                // 处理完一个基本块的RET后，迭代器已失效，需跳出当前块的循环
+                goto next_block_label;
            }
        }
+        next_block_label:;
    }
 }