mysysy/src/RISCv64Backend.cpp

#include "RISCv64Backend.h"
#include <sstream>
#include <algorithm>
#include <stdexcept>
#include <regex>
#include <iomanip>
#include <functional>

namespace sysy {

// 可用于分配的寄存器
const std::vector<RISCv64CodeGen::PhysicalReg> RISCv64CodeGen::allocable_regs = {
    // 整数寄存器
    PhysicalReg::T0, PhysicalReg::T1, PhysicalReg::T2, PhysicalReg::T3,
    PhysicalReg::T4, PhysicalReg::T5, PhysicalReg::T6,
    PhysicalReg::A0, PhysicalReg::A1, PhysicalReg::A2, PhysicalReg::A3,
    PhysicalReg::A4, PhysicalReg::A5, PhysicalReg::A6, PhysicalReg::A7,
    PhysicalReg::S0, PhysicalReg::S1, PhysicalReg::S2, PhysicalReg::S3,
    PhysicalReg::S4, PhysicalReg::S5, PhysicalReg::S6, PhysicalReg::S7,
    PhysicalReg::S8, PhysicalReg::S9, PhysicalReg::S10, PhysicalReg::S11,
    // 浮点寄存器
    PhysicalReg::F0, PhysicalReg::F1, PhysicalReg::F2, PhysicalReg::F3,
    PhysicalReg::F4, PhysicalReg::F5, PhysicalReg::F6, PhysicalReg::F7,
    PhysicalReg::F8, PhysicalReg::F9, PhysicalReg::F10, PhysicalReg::F11,
    PhysicalReg::F12, PhysicalReg::F13, PhysicalReg::F14, PhysicalReg::F15,
    PhysicalReg::F16, PhysicalReg::F17, PhysicalReg::F18, PhysicalReg::F19,
    PhysicalReg::F20, PhysicalReg::F21, PhysicalReg::F22, PhysicalReg::F23,
    PhysicalReg::F24, PhysicalReg::F25, PhysicalReg::F26, PhysicalReg::F27,
    PhysicalReg::F28, PhysicalReg::F29, PhysicalReg::F30, PhysicalReg::F31
};

// 将物理寄存器枚举转换为字符串
std::string RISCv64CodeGen::reg_to_string(PhysicalReg reg) {
    switch (reg) {
        case PhysicalReg::ZERO: return "x0";
        case PhysicalReg::RA:   return "ra";
        case PhysicalReg::SP:   return "sp";
        case PhysicalReg::GP:   return "gp";
        case PhysicalReg::TP:   return "tp";
        case PhysicalReg::T0:   return "t0";
        case PhysicalReg::T1:   return "t1";
        case PhysicalReg::T2:   return "t2";
        case PhysicalReg::S0:   return "s0";
        case PhysicalReg::S1:   return "s1";
        case PhysicalReg::A0:   return "a0";
        case PhysicalReg::A1:   return "a1";
        case PhysicalReg::A2:   return "a2";
        case PhysicalReg::A3:   return "a3";
        case PhysicalReg::A4:   return "a4";
        case PhysicalReg::A5:   return "a5";
        case PhysicalReg::A6:   return "a6";
        case PhysicalReg::A7:   return "a7";
        case PhysicalReg::S2:   return "s2";
        case PhysicalReg::S3:   return "s3";
        case PhysicalReg::S4:   return "s4";
        case PhysicalReg::S5:   return "s5";
        case PhysicalReg::S6:   return "s6";
        case PhysicalReg::S7:   return "s7";
        case PhysicalReg::S8:   return "s8";
        case PhysicalReg::S9:   return "s9";
        case PhysicalReg::S10:  return "s10";
        case PhysicalReg::S11:  return "s11";
        case PhysicalReg::T3:   return "t3";
        case PhysicalReg::T4:   return "t4";
        case PhysicalReg::T5:   return "t5";
        case PhysicalReg::T6:   return "t6";
        // 浮点寄存器
        case PhysicalReg::F0:   return "f0";
        case PhysicalReg::F1:   return "f1";
        case PhysicalReg::F2:   return "f2";
        case PhysicalReg::F3:   return "f3";
        case PhysicalReg::F4:   return "f4";
        case PhysicalReg::F5:   return "f5";
        case PhysicalReg::F6:   return "f6";
        case PhysicalReg::F7:   return "f7";
        case PhysicalReg::F8:   return "f8";
        case PhysicalReg::F9:   return "f9";
        case PhysicalReg::F10:  return "f10";
        case PhysicalReg::F11:  return "f11";
        case PhysicalReg::F12:  return "f12";
        case PhysicalReg::F13:  return "f13";
        case PhysicalReg::F14:  return "f14";
        case PhysicalReg::F15:  return "f15";
        case PhysicalReg::F16:  return "f16";
        case PhysicalReg::F17:  return "f17";
        case PhysicalReg::F18:  return "f18";
        case PhysicalReg::F19:  return "f19";
        case PhysicalReg::F20:  return "f20";
        case PhysicalReg::F21:  return "f21";
        case PhysicalReg::F22:  return "f22";
        case PhysicalReg::F23:  return "f23";
        case PhysicalReg::F24:  return "f24";
        case PhysicalReg::F25:  return "f25";
        case PhysicalReg::F26:  return "f26";
        case PhysicalReg::F27:  return "f27";
        case PhysicalReg::F28:  return "f28";
        case PhysicalReg::F29:  return "f29";
        case PhysicalReg::F30:  return "f30";
        case PhysicalReg::F31:  return "f31";
        default: return "UNKNOWN_REG";
    }
}

// 总体代码生成入口
std::string RISCv64CodeGen::code_gen() {
    std::stringstream ss;
    ss << module_gen();
    return ss.str();
}

// 模块级代码生成 (处理全局变量和函数)
std::string RISCv64CodeGen::module_gen() {
    std::stringstream ss;
    bool has_globals = !module->getGlobals().empty();
    if (has_globals) {
        ss << ".data\n"; // 数据段
        for (const auto& global : module->getGlobals()) {
            ss << ".globl " << global->getName() << "\n"; // 声明全局符号
            ss << global->getName() << ":\n"; // 标签
            const auto& init_values = global->getInitValues();
            for (size_t i = 0; i < init_values.getValues().size(); ++i) {
                auto val = init_values.getValues()[i];
                auto count = init_values.getNumbers()[i];
                if (auto constant = dynamic_cast<ConstantValue*>(val)) {
                    for (unsigned j = 0; j < count; ++j) {
                        if (constant->isInt()) {
                            ss << "    .word " << constant->getInt() << "\n"; // 整数常量 (32位)
                        } else {
                            float f = constant->getFloat();
                            uint32_t float_bits = *(uint32_t*)&f;
                            ss << "    .word " << float_bits << "\n"; // 浮点常量 (32位)
                        }
                    }
                }
            }
        }
    }
    if (!module->getFunctions().empty()) {
        ss << ".text\n"; // 代码段
        for (const auto& func : module->getFunctions()) {
            ss << function_gen(func.second.get());
        }
    }
    return ss.str();
}

// 函数级代码生成
std::string RISCv64CodeGen::function_gen(Function* func) {
    std::stringstream ss;
    ss << ".globl " << func->getName() << "\n"; // 声明函数为全局符号
    ss << func->getName() << ":\n"; // 函数入口标签

    RegAllocResult alloc_result = register_allocation(func);
    int stack_size = alloc_result.stack_size;

    // 函数序言 (Prologue)
    // RV64: ra 和 s0 都是64位（8字节）寄存器
    // 保存 ra 和 s0, 调整栈指针
    // s0 指向当前帧的底部（分配局部变量/溢出空间后的 sp）
    // 确保栈大小 16 字节对齐
    int aligned_stack_size = (stack_size + 15) & ~15;

    // 只有当需要栈空间时才生成序言
    if (aligned_stack_size > 0) {
        ss << "    addi sp, sp, -" << aligned_stack_size << "\n"; // 调整栈指针
        // RV64 修改: 使用 sd (store doubleword) 保存 8 字节的 ra 和 s0
        // 同时更新偏移量，为每个寄存器保留8字节
        ss << "    sd ra, " << (aligned_stack_size - 8) << "(sp)\n"; // 保存返回地址 (8字节)
        ss << "    sd s0, " << (aligned_stack_size - 16) << "(sp)\n"; // 保存帧指针 (8字节)
        ss << "    mv s0, sp\n"; // 设置新的帧指针
    }

    // 将传入的寄存器参数 (a0-a7 / f10-f17) 保存到对应的栈槽 (AllocaInst)。
    // RV64中，a0-a7是64位寄存器，但我们传入的int/float是32位。
    // 使用 sw/fsw 会正确地存储低32位，这是正确的行为。
    int arg_idx = 0;
    BasicBlock* entry_bb = func->getEntryBlock(); // 获取函数的入口基本块

    if (entry_bb) { // 确保入口基本块存在
        for (AllocaInst* alloca_for_param : entry_bb->getArguments()) {
            if (arg_idx >= 8) {
                std::cerr << "警告: 函数 '" << func->getName() << "' 的参数 (索引 " << arg_idx << ") 数量超过了 RISC-V 寄存器传递限制 (8个参数)。\n"
                          << "       这些参数目前未通过栈正确处理，可能导致错误。\n";
                break;
            }

            if (alloc_result.stack_map.count(alloca_for_param)) {
                int offset = alloc_result.stack_map.at(alloca_for_param);
                Type* allocated_type = alloca_for_param->getType()->as<PointerType>()->getBaseType();

                if (allocated_type->isInt()) {
                    PhysicalReg arg_reg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + arg_idx);
                    std::string arg_reg_str = reg_to_string(arg_reg);
                    // 使用 sw 保存 int (32位) 参数，这是正确的
                    ss << "    sw " << arg_reg_str << ", " << offset << "(s0)\n";
                } else if (allocated_type->isFloat()) {
                    PhysicalReg farg_reg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::F10) + arg_idx);
                    std::string farg_reg_str = reg_to_string(farg_reg);
                     // 使用 fsw 保存 float (32位) 参数，这是正确的
                    ss << "    fsw " << farg_reg_str << ", " << offset << "(s0)\n";
                } else {
                    throw std::runtime_error("Unsupported function argument type encountered during parameter saving to stack.");
                }
            } else {
                std::cerr << "警告: 函数参数对应的 AllocaInst '"
                          << (alloca_for_param->getName().empty() ? "anonymous" : alloca_for_param->getName())
                          << "' 没有在栈映射中找到。这可能导致后续代码生成错误。\n";
            }
            arg_idx++;
        }
    } else {
        std::cerr << "错误: 函数 '" << func->getName() << "' 没有入口基本块。\n";
    }

    // 生成每个基本块的代码
    int block_idx = 0;
    for (const auto& bb : func->getBasicBlocks()) {
        ss << basicBlock_gen(bb.get(), alloc_result, block_idx++);
    }

    // 函数尾声 (Epilogue) 由 RETURN DAGNode 的指令选择处理
    return ss.str();
}


// 基本块代码生成
std::string RISCv64CodeGen::basicBlock_gen(BasicBlock* bb, const RegAllocResult& alloc, int block_idx) {
    std::stringstream ss;

    std::string bb_name = bb->getName();
    if (bb_name.empty()) {
        bb_name = ENTRY_BLOCK_PSEUDO_NAME + std::to_string(block_idx);
        if (block_idx == 0) {
            bb_name = "entry";
        }
    }
    else {
        ss << bb_name << ":\n"; // 基本块标签
    }
    if (DEBUG) std::cerr << "=== 生成基本块: " << bb_name << " ===\n";

    // 构建当前基本块的 DAG
    auto dag_nodes_for_bb = build_dag(bb);
    if (DEBUG)
        print_dag(dag_nodes_for_bb, bb_name); // 打印 DAG 调试信息

    // 存储最终生成的指令
    std::set<DAGNode*> emitted_nodes; // 跟踪已发射的节点，防止重复
    std::vector<std::string> ordered_insts; // 用于收集指令并按序排列

    // 在 DAG 中遍历并生成指令。由于 select_instructions 可能会递归地为操作数选择指令，
    // 并且 emit_instructions 也会递归地发射，我们需要一个机制来确保指令的正确顺序和唯一性。
    // 最简单的方法是逆拓扑序遍历所有节点，确保其操作数先被处理。
    // 但是目前的 DAG 构建方式可能不支持直接的拓扑排序，
    // 我们将依赖 emit_instructions 的递归特性来处理依赖。

    // 遍历 DAG 的根节点（没有用户的节点，或者 Store/Return/Branch 节点）
    // 从这些节点开始递归发射指令。
    // NOTE: 这种发射方式可能不总是产生最优的代码顺序，但可以确保依赖关系。
    for (auto it = dag_nodes_for_bb.rbegin(); it != dag_nodes_for_bb.rend(); ++it) {
        DAGNode* node = it->get();
        // 只有那些没有用户（或者代表副作用，如STORE, RETURN, BRANCH）的节点才需要作为发射的“根”
        // 否则，它们会被其用户节点递归地发射
        // 然而，为了确保所有指令都被发射，我们通常从所有节点（或者至少是副作用节点）开始发射
        // 并且利用 emitted_nodes 集合防止重复
        // 这里简化为对所有 DAG 节点进行一次 select_instructions 和 emit_instructions 调用。
        // emit_instructions 会通过递归处理其操作数来保证依赖顺序。
        select_instructions(node, alloc); // 为当前节点选择指令
    }

    // 收集所有指令到一个临时的 vector 中，然后进行排序
    // 注意：这里的发射逻辑需要重新设计，目前的 emit_instructions 是直接添加到 std::vector<std::string>& insts 中
    // 并且期望是按顺序添加的，这在递归时难以保证。
    // 更好的方法是让 emit_instructions 直接输出到 stringstream，并控制递归顺序。
    // 但是为了最小化改动，我们先保持 emit_instructions 的现有签名，
    // 然后在它内部处理指令的收集和去重。

    // 重新设计 emit_instructions 的调用方式
    // 这里的思路是，每个 DAGNode 都存储了自己及其依赖（如果未被其他节点引用）的指令。
    // 最终，我们遍历 BasicBlock 中的所有原始 IR 指令，找到它们对应的 DAGNode，然后发射。
    // 这是因为 IR 指令的顺序决定了代码的逻辑顺序。

    // 遍历 IR 指令，并找到对应的 DAGNode 进行发射
    // 由于 build_dag 是从 IR 指令顺序构建的，我们应该按照 IR 指令的顺序来发射。
    emitted_nodes.clear(); // 再次清空已发射节点集合
    // 临时存储每个 IR 指令对应的 DAGNode，因为 DAGNode 列表是平铺的
    std::map<Instruction*, DAGNode*> inst_to_dag_node;
    for (const auto& dag_node_ptr : dag_nodes_for_bb) {
        if (dag_node_ptr->value && dynamic_cast<Instruction*>(dag_node_ptr->value)) {
            inst_to_dag_node[dynamic_cast<Instruction*>(dag_node_ptr->value)] = dag_node_ptr.get();
        }
    }

    for (const auto& inst_ptr : bb->getInstructions()) {
        DAGNode* node_to_emit = nullptr;
        // 查找当前 IR 指令在 DAG 中对应的节点。
        // 注意：不是所有 IR 指令都会直接映射到一个“根”DAGNode (例如，某些值可能只作为操作数存在)
        // 但终结符（如 Branch, Return）和 Store 指令总是重要的。
        // 对于 load/binary 等，我们应该在 build_dag 中确保它们有一个结果 vreg，并被后续指令使用。
        // 如果一个 IR 指令是某个 DAGNode 的 value，那么我们就发射那个 DAGNode。
        if (inst_to_dag_node.count(inst_ptr.get())) {
            node_to_emit = inst_to_dag_node.at(inst_ptr.get());
        }

        if (node_to_emit) {
            // 注意：select_instructions 已经在上面统一调用过，这里只需要 emit。
            // 但如果 select_instructions 没有递归地为所有依赖选择指令，这里可能需要重新考虑。
            // 为了简化，我们假定 select_instructions 在第一次被调用时（通常在 emit 之前）已经递归地为所有操作数选择了指令。

            // 直接将指令添加到 ss 中，而不是通过 vector 中转
            emit_instructions(node_to_emit, ss, alloc, emitted_nodes);
        }
    }

    return ss.str();
}

// 辅助函数，用于创建 DAGNode 并管理其所有权
sysy::RISCv64CodeGen::DAGNode* sysy::RISCv64CodeGen::create_node(
    DAGNode::NodeKind kind,
    Value* val,
    std::map<Value*, DAGNode*>& value_to_node, // 需要外部传入
    std::vector<std::unique_ptr<DAGNode>>& nodes_storage // 需要外部传入
) {
    // 优化：如果一个值已经有节点并且它不是控制流/存储/Alloca地址/一元操作，则重用它 (CSE)
    // 对于 AllocaInst，我们想创建一个代表其地址的节点，但不一定直接为 AllocaInst 本身分配虚拟寄存器。
    if (val && value_to_node.count(val) && kind != DAGNode::STORE && kind != DAGNode::RETURN && kind != DAGNode::BRANCH && kind != DAGNode::ALLOCA_ADDR && kind != DAGNode::UNARY) {
        return value_to_node[val];
    }

    auto node = std::make_unique<DAGNode>(kind);
    node->value = val;

    // 为产生结果的值分配虚拟寄存器
    if (val && value_vreg_map.count(val) && !dynamic_cast<AllocaInst*>(val)) { // 排除 AllocaInst
        node->result_vreg = value_vreg_map.at(val);
    }

    DAGNode* raw_node_ptr = node.get();
    nodes_storage.push_back(std::move(node)); // 存储 unique_ptr

    // 仅当 IR Value 表示一个计算值时，才将其映射到创建的 DAGNode
    // 且它应该已经在 register_allocation 中被分配了 vreg
    if (val && value_vreg_map.count(val) && kind != DAGNode::STORE && kind != DAGNode::RETURN && kind != DAGNode::BRANCH && !dynamic_cast<AllocaInst*>(val)) {
        value_to_node[val] = raw_node_ptr;
    }
    return raw_node_ptr;
}


// 辅助函数：获取值的 DAG 节点。
// 如果 value 已经映射到 DAG 节点，则直接返回。
// 如果是常量，则创建 CONSTANT 节点。
// 如果是 AllocaInst，则创建 ALLOCA_ADDR 节点。
// 否则，假定需要通过 LOAD 获取该值。
sysy::RISCv64CodeGen::DAGNode* sysy::RISCv64CodeGen::get_operand_node(
    Value* val_ir,
    std::map<Value*, DAGNode*>& value_to_node, // 接受 value_to_node
    std::vector<std::unique_ptr<DAGNode>>& nodes_storage // 接受 nodes_storage
) {
    if (value_to_node.count(val_ir)) {
        return value_to_node[val_ir];
    } else if (auto constant = dynamic_cast<ConstantValue*>(val_ir)) {
        return create_node(DAGNode::CONSTANT, constant, value_to_node, nodes_storage); // 调用成员函数版 create_node
    } else if (auto alloca = dynamic_cast<AllocaInst*>(val_ir)) {
        return create_node(DAGNode::ALLOCA_ADDR, alloca, value_to_node, nodes_storage); // 调用成员函数版 create_node
    } else if (auto global = dynamic_cast<GlobalValue*>(val_ir)) {
        // 确保 GlobalValue 也能正确处理，如果 DAGNode::CONSTANT 无法存储 GlobalValue*，
        // 则需要新的 DAGNode 类型，例如 DAGNode::GLOBAL_ADDR
        return create_node(DAGNode::CONSTANT, global, value_to_node, nodes_storage); // 调用成员函数版 create_node
    }
    // 这是一个尚未在此块中计算的值，假设它需要加载 (从内存或参数)
    return create_node(DAGNode::LOAD, val_ir, value_to_node, nodes_storage); // 调用成员函数版 create_node
}

std::vector<std::unique_ptr<RISCv64CodeGen::DAGNode>> RISCv64CodeGen::build_dag(BasicBlock* bb) {
    std::vector<std::unique_ptr<DAGNode>> nodes_storage; // 存储所有 unique_ptr
    std::map<Value*, DAGNode*> value_to_node; // 将 IR Value* 映射到原始 DAGNode*，用于快速查找

    for (const auto& inst_ptr : bb->getInstructions()) {
        auto inst = inst_ptr.get();

        if (auto alloca = dynamic_cast<AllocaInst*>(inst)) {
            // AllocaInst 本身不产生寄存器中的值，但其地址将被 load/store 使用。
            // 创建一个节点来表示分配内存的地址。
            // 这个地址将是 s0 (帧指针) 的偏移量。
            // 我们将 AllocaInst 指针存储在 DAGNode 的 `value` 字段中。
            // 修正：AllocaInst 类型的 DAGNode 应该有一个 value 对应 AllocaInst*
            // 但它本身不应该有 result_vreg，因为不映射到物理寄存器。
            create_node(DAGNode::ALLOCA_ADDR, alloca, value_to_node, nodes_storage);
        } else if (auto store = dynamic_cast<StoreInst*>(inst)) {
            auto store_node = create_node(DAGNode::STORE, store, value_to_node, nodes_storage);

            // 获取要存储的值
            DAGNode* val_node = get_operand_node(store->getValue(), value_to_node, nodes_storage);

            // 获取内存位置的指针 (基地址)
            Value* ptr_ir = store->getPointer();
            DAGNode* ptr_node = get_operand_node(ptr_ir, value_to_node, nodes_storage);

            store_node->operands.push_back(val_node);

            // === 修改开始：处理带索引的 StoreInst ===
            if (store->getNumIndices() > 0) {
                if (DEBUG) std::cerr << "处理带索引的 StoreInst: " << store->getNumIndices() << " 个索引\n";

                // 假设只有一个索引
                Value* index_ir = store->getIndex(0); // 获取索引 IR Value*
                DAGNode* index_node = get_operand_node(index_ir, value_to_node, nodes_storage); // 索引 DAG 节点

                // 1. 获取元素大小的 ConstantValue * (例如 4 字节)
                // ConstantValue::get 返回裸指针，其生命周期由 IR 框架自身管理（假定是单例或池化）。
                Value* const_4_value_ir = ConstantValue::get(4);
                // 为这个常量创建一个 DAGNode
                DAGNode* size_node = create_node(DAGNode::CONSTANT, const_4_value_ir, value_to_node, nodes_storage);


                // 2. 创建一个 BINARY (MUL) 节点来计算字节偏移量 (index * element_size)
                // BinaryInst 构造函数是 protected 的，需要通过静态工厂方法创建
                Instruction* dummy_mul_inst_raw_ptr = BinaryInst::create(BinaryInst::kMul, Type::getIntType(), index_ir, const_4_value_ir, bb);
                // 将所有权转移到成员变量 temp_instructions_storage
                temp_instructions_storage.push_back(std::unique_ptr<Instruction>(dummy_mul_inst_raw_ptr)); // 存储临时的 Instruction

                // 为这个新的 BinaryInst 创建一个 DAGNode，它的类型是 DAGNode::BINARY
                DAGNode* byte_offset_node = create_node(DAGNode::BINARY, dummy_mul_inst_raw_ptr, value_to_node, nodes_storage);

                byte_offset_node->operands.push_back(index_node);
                byte_offset_node->operands.push_back(size_node);
                index_node->users.push_back(byte_offset_node);
                size_node->users.push_back(byte_offset_node);


                // 3. 创建一个 BINARY (ADD) 节点来计算最终地址 (base_address + byte_offset)
                // 创建另一个临时的 BinaryInst。
                Instruction* dummy_add_inst_raw_ptr = BinaryInst::create(BinaryInst::kAdd, Type::getIntType(), ptr_ir, dummy_mul_inst_raw_ptr, bb);
                temp_instructions_storage.push_back(std::unique_ptr<Instruction>(dummy_add_inst_raw_ptr)); // 存储临时的 Instruction

                // 为这个新的 BinaryInst 创建一个 DAGNode
                DAGNode* final_addr_node = create_node(DAGNode::BINARY, dummy_add_inst_raw_ptr, value_to_node, nodes_storage);

                final_addr_node->operands.push_back(ptr_node);
                final_addr_node->operands.push_back(byte_offset_node);
                ptr_node->users.push_back(final_addr_node);
                byte_offset_node->users.push_back(final_addr_node);

                // 现在，STORE 节点的操作数是要存储的值和最终地址
                store_node->operands.push_back(final_addr_node);
                final_addr_node->users.push_back(store_node);

            } else { // 原始的非索引 StoreInst 处理
                store_node->operands.push_back(ptr_node);
                ptr_node->users.push_back(store_node);
            }
            // === 修改结束 ===

        } else if (auto load = dynamic_cast<LoadInst*>(inst)) {
            auto load_node = create_node(DAGNode::LOAD, load, value_to_node, nodes_storage);

            // 获取内存位置的指针 (基地址)
            Value* ptr_ir = load->getPointer();
            DAGNode* ptr_node = get_operand_node(ptr_ir, value_to_node, nodes_storage);

            // === 修改开始：处理带索引的 LoadInst ===
            if (load->getNumIndices() > 0) {
                // 假设只有一个索引
                Value* index_ir = load->getIndex(0);
                DAGNode* index_node = get_operand_node(index_ir, value_to_node, nodes_storage);

                // 1. 获取元素大小的 ConstantValue * (例如 4 字节)
                Value* const_4_value_ir = ConstantValue::get(4);
                DAGNode* size_node = create_node(DAGNode::CONSTANT, const_4_value_ir, value_to_node, nodes_storage);

                // 2. 创建一个 BINARY (MUL) 节点来计算字节偏移量 (index * element_size)
                Instruction* dummy_mul_inst_raw_ptr = BinaryInst::create(BinaryInst::kMul, Type::getIntType(), index_ir, const_4_value_ir, bb);
                temp_instructions_storage.push_back(std::unique_ptr<Instruction>(dummy_mul_inst_raw_ptr)); // 存储临时的 Instruction

                DAGNode* byte_offset_node = create_node(DAGNode::BINARY, dummy_mul_inst_raw_ptr, value_to_node, nodes_storage);

                byte_offset_node->operands.push_back(index_node);
                byte_offset_node->operands.push_back(size_node);
                index_node->users.push_back(byte_offset_node);
                size_node->users.push_back(byte_offset_node);

                // 3. 创建一个 BINARY (ADD) 节点来计算最终地址 (base_address + byte_offset)
                Instruction* dummy_add_inst_raw_ptr = BinaryInst::create(BinaryInst::kAdd, Type::getIntType(), ptr_ir, dummy_mul_inst_raw_ptr, bb);
                temp_instructions_storage.push_back(std::unique_ptr<Instruction>(dummy_add_inst_raw_ptr)); // 存储临时的 Instruction

                DAGNode* final_addr_node = create_node(DAGNode::BINARY, dummy_add_inst_raw_ptr, value_to_node, nodes_storage);

                final_addr_node->operands.push_back(ptr_node);
                final_addr_node->operands.push_back(byte_offset_node);
                ptr_node->users.push_back(final_addr_node);
                byte_offset_node->users.push_back(final_addr_node);

                // 现在，LOAD 节点的操作数是最终地址
                load_node->operands.push_back(final_addr_node);
                final_addr_node->users.push_back(load_node);

            } else { // 原始的非索引 LoadInst 处理
                load_node->operands.push_back(ptr_node);
                ptr_node->users.push_back(load_node);
            }
            // === 修改结束 ===
        } else if (auto bin = dynamic_cast<BinaryInst*>(inst)) {
            if (value_to_node.count(bin)) continue; // CSE

            if (bin->getKind() == BinaryInst::kSub || bin->getKind() == BinaryInst::kFSub) {
                Value* lhs_ir = bin->getLhs();
                if (auto const_lhs = dynamic_cast<ConstantValue*>(lhs_ir)) {
                    bool is_neg = false;
                    if (const_lhs->getType()->isInt()) {
                        if (const_lhs->getInt() == 0) {
                            is_neg = true;
                        }
                    } else if (const_lhs->getType()->isFloat()) {
                        if (std::fabs(const_lhs->getFloat()) < std::numeric_limits<float>::epsilon()) {
                            is_neg = true;
                        }
                    }

                    if (is_neg) {
                        auto unary_node = create_node(DAGNode::UNARY, bin, value_to_node, nodes_storage); // 传递参数
                        Value* operand_ir = bin->getRhs();
                        DAGNode* operand_node = get_operand_node(operand_ir, value_to_node, nodes_storage); // 传递参数
                        unary_node->operands.push_back(operand_node);
                        operand_node->users.push_back(unary_node);
                        continue;
                    }
                }
            }
            // 常规二进制操作
            auto bin_node = create_node(DAGNode::BINARY, bin, value_to_node, nodes_storage); // 传递参数

            DAGNode* lhs_node = get_operand_node(bin->getLhs(), value_to_node, nodes_storage); // 传递参数
            DAGNode* rhs_node = get_operand_node(bin->getRhs(), value_to_node, nodes_storage); // 传递参数

            bin_node->operands.push_back(lhs_node);
            bin_node->operands.push_back(rhs_node);
            lhs_node->users.push_back(bin_node);
            rhs_node->users.push_back(bin_node);

        } else if (auto un_inst = dynamic_cast<UnaryInst*>(inst)) {
            if (value_to_node.count(un_inst)) continue;

            auto unary_node = create_node(DAGNode::UNARY, un_inst, value_to_node, nodes_storage); // 传递参数

            Value* operand_ir = un_inst->getOperand();
            DAGNode* operand_node = get_operand_node(operand_ir, value_to_node, nodes_storage); // 传递参数

            unary_node->operands.push_back(operand_node);
            operand_node->users.push_back(unary_node);

        } else if (auto call = dynamic_cast<CallInst*>(inst)) {
            if (value_to_node.count(call)) continue;
            auto call_node = create_node(DAGNode::CALL, call, value_to_node, nodes_storage); // 传递参数
            for (auto arg : call->getArguments()) {
                auto arg_val_ir = arg->getValue();
                DAGNode* arg_node = get_operand_node(arg_val_ir, value_to_node, nodes_storage); // 传递参数
                call_node->operands.push_back(arg_node);
                arg_node->users.push_back(call_node);
            }
        } else if (auto ret = dynamic_cast<ReturnInst*>(inst)) {
            if (DEBUG) std::cerr << "处理 RETURN 指令: " << ret->getName() << "\n"; // 调试输出
            auto ret_node = create_node(DAGNode::RETURN, ret, value_to_node, nodes_storage); // 传递参数
            if (ret->hasReturnValue()) {
                auto val_ir = ret->getReturnValue();
                DAGNode* val_node = get_operand_node(val_ir, value_to_node, nodes_storage); // 传递参数
                ret_node->operands.push_back(val_node);
                val_node->users.push_back(ret_node);
            }
        } else if (auto cond_br = dynamic_cast<CondBrInst*>(inst)) {
            auto br_node = create_node(DAGNode::BRANCH, cond_br, value_to_node, nodes_storage); // 传递参数
            auto cond_ir = cond_br->getCondition();

            if (auto constant_cond = dynamic_cast<ConstantValue*>(cond_ir)) {
                br_node->inst = "j " + (constant_cond->getInt() ? cond_br->getThenBlock()->getName() : cond_br->getElseBlock()->getName());
            } else {
                DAGNode* cond_node = get_operand_node(cond_ir, value_to_node, nodes_storage); // 传递参数
                br_node->operands.push_back(cond_node);
                cond_node->users.push_back(br_node);
            }
        } else if (auto uncond_br = dynamic_cast<UncondBrInst*>(inst)) {
            auto br_node = create_node(DAGNode::BRANCH, uncond_br, value_to_node, nodes_storage); // 传递参数
            br_node->inst = "j " + uncond_br->getBlock()->getName();
        }
    }
    return nodes_storage;
}

// 打印 DAG
void RISCv64CodeGen::print_dag(const std::vector<std::unique_ptr<DAGNode>>& dag, const std::string& bb_name) {
    std::cerr << "=== DAG for Basic Block: " << bb_name << " ===\n";
    std::set<DAGNode*> visited;

    // 辅助映射，用于在打印输出中为节点分配顺序 ID
    std::map<DAGNode*, int> node_to_id;
    int current_id = 0;
    for (const auto& node_ptr : dag) {
        node_to_id[node_ptr.get()] = current_id++;
    }

    std::function<void(DAGNode*, int)> print_node = [&](DAGNode* node, int indent) {
        if (!node) return;

        std::string current_indent(indent, ' ');
        int node_id = node_to_id.count(node) ? node_to_id[node] : -1; // 获取分配的 ID

        std::cerr << current_indent << "Node#" << node_id << ": " << node->getNodeKindString();
        if (!node->result_vreg.empty()) {
            std::cerr << " (vreg: " << node->result_vreg << ")";
        }

        if (node->value) {
            std::cerr << " [";
            if (auto inst = dynamic_cast<Instruction*>(node->value)) {
                std::cerr << inst->getKindString();
                if (!inst->getName().empty()) {
                    std::cerr << "(" << inst->getName() << ")";
                }
            } else if (auto constant = dynamic_cast<ConstantValue*>(node->value)) {
                if (constant->isInt()) {
                    std::cerr << "ConstInt(" << constant->getInt() << ")";
                } else {
                    std::cerr << "ConstFloat(" << constant->getFloat() << ")";
                }
            } else if (auto global = dynamic_cast<GlobalValue*>(node->value)) {
                std::cerr << "Global(" << global->getName() << ")";
            } else if (auto alloca = dynamic_cast<AllocaInst*>(node->value)) {
                std::cerr << "Alloca(" << (alloca->getName().empty() ? ("%" + std::to_string(reinterpret_cast<uintptr_t>(alloca) % 1000)) : alloca->getName()) << ")";
            }
            std::cerr << "]";
        }
        std::cerr << " -> Inst: \"" << node->inst << "\""; // 打印选定的指令
        std::cerr << "\n";

        if (visited.find(node) != visited.end()) {
            std::cerr << current_indent << " (已打印后代)\n";
            return; // 避免循环的无限递归
        }
        visited.insert(node);

        if (!node->operands.empty()) {
            std::cerr << current_indent << "  操作数:\n";
            for (auto operand : node->operands) {
                print_node(operand, indent + 4);
            }
        }
        // 移除了 users 打印，以简化输出并避免 DAG 中的冗余递归。
        // Users 更适用于向上遍历，而不是向下遍历。
    };

    // 遍历 DAG，以尊重依赖的方式打印。
    // 当前实现：遍历所有节点，从作为“根”的节点开始打印（没有用户或副作用节点）。
    // 每次打印新的根时，重置 visited 集合，以允许共享子图被重新打印（尽管这不是最高效的方式）。
    for (const auto& node_ptr : dag) {
        // 只有那些没有用户或者表示副作用（如 store/branch/return）的节点才被视为“根”
        // 这样可以确保所有指令（包括那些没有明确结果的）都被打印
        if (node_ptr->users.empty() || node_ptr->kind == DAGNode::STORE || node_ptr->kind == DAGNode::RETURN || node_ptr->kind == DAGNode::BRANCH) {
            visited.clear(); // 为每个根重置 visited，允许重新打印共享子图
            print_node(node_ptr.get(), 0);
        }
    }
    std::cerr << "=== DAG 结束 ===\n\n";
}

// 指令选择
void RISCv64CodeGen::select_instructions(DAGNode* node, const RegAllocResult& alloc) {
    if (!node) return;
    if (!node->inst.empty()) return; // 指令已选择，跳过重复处理

    // 递归地为操作数选择指令，确保依赖先被处理
    for (auto operand : node->operands) {
        if (operand) {
            select_instructions(operand, alloc);
        }
    }

    std::stringstream ss_inst; // 使用 stringstream 构建指令

    // 获取分配的物理寄存器，若未分配则回退到 t0
    auto get_preg_or_temp = [&](const std::string& vreg) {
        if (vreg.empty()) { // 添加对空 vreg 的明确检查
             if (DEBUG) std::cerr << "警告: 虚拟寄存器 (空字符串) 没有分配物理寄存器，使用临时寄存器 t0 代替。\n";
             return reg_to_string(PhysicalReg::T0);
        }
        if (alloc.vreg_to_preg.count(vreg)) {
            return reg_to_string(alloc.vreg_to_preg.at(vreg));
        }
        if (DEBUG) std::cerr << "警告: 虚拟寄存器 " << vreg << " 没有分配物理寄存器，使用临时寄存器 t0 代替。\n";
        return reg_to_string(PhysicalReg::T0); // 回退到临时寄存器 t0
    };

    // 获取栈变量的内存偏移量
    auto get_stack_offset = [&](Value* val) -> std::string { // 返回类型明确为 std::string
        if (alloc.stack_map.count(val)) {
            if (DEBUG) { // 避免在非DEBUG模式下打印大量内容
                std::cout << "获取栈变量的内存偏移量，变量名： " << (val ? val->getName() : "unknown") << std::endl;
            }
            return std::to_string(alloc.stack_map.at(val));
        }
        if (DEBUG) std::cerr << "警告: 栈变量 " << (val ? val->getName() : "unknown") << " 没有在栈映射中找到，使用默认偏移 0。\n";
        // 如果没有找到映射，返回默认偏移量 "0"
        return std::string("0"); // 默认或错误情况
    };

    switch (node->kind) {
        case DAGNode::CONSTANT: {
            // 处理常量节点
            if (auto constant = dynamic_cast<ConstantValue*>(node->value)) {
                std::string dest_reg = get_preg_or_temp(node->result_vreg);
                if (constant->isInt()) {
                    ss_inst << "li " << dest_reg << ", " << constant->getInt();
                } else {
                    float f = constant->getFloat();
                    uint32_t float_bits = *(uint32_t*)&f;
                    ss_inst << "li " << dest_reg << ", " << float_bits << "\n";
                    ss_inst << "fmv.w.x " << dest_reg << ", " << dest_reg;
                }
            } else if (auto global = dynamic_cast<GlobalValue*>(node->value)) {
                std::string dest_reg = get_preg_or_temp(node->result_vreg);
                ss_inst << "la " << dest_reg << ", " << global->getName();
            }
            break;
        }
        case DAGNode::ALLOCA_ADDR: {
            // ALLOCA_ADDR 节点不直接生成指令，由 LOAD/STORE 使用
            break;
        }
        case DAGNode::LOAD: {
            // 处理加载指令
            if (node->operands.empty() || !node->operands[0]) break;
            std::string dest_reg = get_preg_or_temp(node->result_vreg);
            DAGNode* ptr_node = node->operands[0];

            if (ptr_node->kind == DAGNode::ALLOCA_ADDR) {
                if (auto alloca_inst = dynamic_cast<AllocaInst*>(ptr_node->value)) {
                    int offset = alloc.stack_map.at(alloca_inst);
                    ss_inst << "lw " << dest_reg << ", " << offset << "(s0)";
                }
            } else {
                std::string ptr_reg = get_preg_or_temp(ptr_node->result_vreg);
                ss_inst << "lw " << dest_reg << ", 0(" << ptr_reg << ")";
            }
            break;
        }
        case DAGNode::STORE: {
            // 处理存储指令
            if (node->operands.size() < 2 || !node->operands[0] || !node->operands[1]) break;
            DAGNode* val_node = node->operands[0];
            DAGNode* ptr_node = node->operands[1];

            std::string src_reg;
            if (val_node->kind == DAGNode::CONSTANT) {
                src_reg = get_preg_or_temp(val_node->result_vreg);
            } else {
                src_reg = get_preg_or_temp(val_node->result_vreg);
            }

            if (ptr_node->kind == DAGNode::ALLOCA_ADDR) {
                if (auto alloca_inst = dynamic_cast<AllocaInst*>(ptr_node->value)) {
                    int offset = alloc.stack_map.at(alloca_inst);
                    ss_inst << "sw " << src_reg << ", " << offset << "(s0)";
                }
            } else {
                std::string ptr_reg = get_preg_or_temp(ptr_node->result_vreg);
                ss_inst << "sw " << src_reg << ", 0(" << ptr_reg << ")";
            }
            break;
        }
        case DAGNode::BINARY: {
            if (node->operands.size() < 2 || !node->operands[0] || !node->operands[1]) break;
            auto bin = dynamic_cast<BinaryInst*>(node->value);
            if (!bin) break;

            std::string dest_reg = get_preg_or_temp(node->result_vreg);

            // 检查是否是 base + offset 的地址计算
            if (bin->getKind() == BinaryInst::kAdd) {
                DAGNode* op0 = node->operands[0];
                DAGNode* op1 = node->operands[1];

                DAGNode* base_node = nullptr;
                DAGNode* offset_node = nullptr;
                bool is_alloca_base = false;

                // 识别 base_address + byte_offset 模式
                if (op0->kind == DAGNode::ALLOCA_ADDR) {
                    base_node = op0;
                    offset_node = op1;
                    is_alloca_base = true;
                } else if (op1->kind == DAGNode::ALLOCA_ADDR) {
                    base_node = op1;
                    offset_node = op0;
                    is_alloca_base = true;
                }

                if (is_alloca_base) {
                    if (auto alloca_inst = dynamic_cast<AllocaInst*>(base_node->value)) {
                        std::string offset_str = get_stack_offset(alloca_inst);
                        // 将字符串偏移量转换为 int，以便进行可能的调试和更清晰的逻辑
                        // 注意：addi 指令可以直接接受字符串形式的立即数

                        std::string offset_reg = get_preg_or_temp(offset_node->result_vreg); // 获取索引偏移量的寄存器

                        // 生成两条指令来计算最终地址：
                        // 1. addi 将 s0 加上 offset 得到 b 的实际基地址（放入 dest_reg）
                        // 2. addw 将 dest_reg 和索引偏移量寄存器相加，得到最终地址
                        ss_inst << "addi " << dest_reg << ", s0, " << offset_str << "\n"; // 使用字符串形式的偏移量
                        ss_inst << "    addw " << dest_reg << ", " << dest_reg << ", " << offset_reg;
                        node->inst = ss_inst.str();
                        break;
                    }
                }
            }

            std::string lhs_reg = get_preg_or_temp(node->operands[0]->result_vreg);
            std::string rhs_reg = get_preg_or_temp(node->operands[1]->result_vreg);

            std::string opcode;
            switch (bin->getKind()) {
                // RV64 修改: 使用带 'w' 后缀的32位指令，确保结果被正确符号扩展
                case BinaryInst::kAdd: opcode = "addw"; break;
                case BinaryInst::kSub: opcode = "subw"; break;
                case BinaryInst::kMul: opcode = "mulw"; break;
                case Instruction::kDiv: opcode = "divw"; break;
                case Instruction::kRem: opcode = "remw"; break;
                case BinaryInst::kICmpEQ:
                    // RV64 修改: 使用 subw
                    ss_inst << "subw " << dest_reg << ", " << lhs_reg << ", " << rhs_reg << "\n";
                    ss_inst << "    seqz " << dest_reg << ", " << dest_reg;
                    node->inst = ss_inst.str();
                    return;
                case BinaryInst::kICmpGE:
                    // slt 比较64位寄存器，由于 lw 和 'w' 指令都进行了符号扩展，这里的比较是正确的
                    ss_inst << "slt " << dest_reg << ", " << lhs_reg << ", " << rhs_reg << "\n";
                    ss_inst << "    xori " << dest_reg << ", " << dest_reg << ", 1";
                    node->inst = ss_inst.str();
                    return;
                case BinaryInst::kICmpGT:
                    opcode = "slt";
                    ss_inst << opcode << " " << dest_reg << ", " << rhs_reg << ", " << lhs_reg;
                    node->inst = ss_inst.str();
                    return;
                case BinaryInst::kICmpLE:
                    ss_inst << "slt " << dest_reg << ", " << rhs_reg << ", " << lhs_reg << "\n";
                    ss_inst << "    xori " << dest_reg << ", " << dest_reg << ", 1";
                    node->inst = ss_inst.str();
                    return;
                case BinaryInst::kICmpLT:
                    opcode = "slt";
                    ss_inst << opcode << " " << dest_reg << ", " << lhs_reg << ", " << rhs_reg;
                    node->inst = ss_inst.str();
                    return;
                case BinaryInst::kICmpNE:
                     // RV64 修改: 使用 subw
                    ss_inst << "subw " << dest_reg << ", " << lhs_reg << ", " << rhs_reg << "\n";
                    ss_inst << "    snez " << dest_reg << ", " << dest_reg;
                    node->inst = ss_inst.str();
                    return;
                default:
                    throw std::runtime_error("不支持的二元指令类型: " + bin->getKindString());
            }
            if (!opcode.empty()) {
                ss_inst << opcode << " " << dest_reg << ", " << lhs_reg << ", " << rhs_reg;
            }
            break;
        }
        case DAGNode::UNARY: {
            if (node->operands.empty() || !node->operands[0]) break;
            auto unary = dynamic_cast<UnaryInst*>(node->value);
            if (!unary) break;

            std::string dest_reg = get_preg_or_temp(node->result_vreg);
            std::string src_reg = get_preg_or_temp(node->operands[0]->result_vreg);

            switch (unary->getKind()) {
                case UnaryInst::kNeg:
                    // RV64 修改: 使用 subw 实现32位取负 (negw 伪指令)
                    ss_inst << "subw " << dest_reg << ", x0, " << src_reg;
                    break;
                case UnaryInst::kNot:
                    // 整数逻辑非：seqz rd, rs (rs == 0 时 rd = 1，否则 rd = 0)
                    ss_inst << "seqz " << dest_reg << ", " << src_reg;
                    break;
                case UnaryInst::kFNeg:
                case UnaryInst::kFNot:
                case UnaryInst::kFtoI:
                case UnaryInst::kItoF:
                case UnaryInst::kBitFtoI:
                case UnaryInst::kBitItoF:
                    // 浮点相关指令，当前不支持
                    throw std::runtime_error("不支持的浮点一元指令类型: " + unary->getKindString());
                default:
                    throw std::runtime_error("不支持的一元指令类型: " + unary->getKindString());
            }
            break;
        }
        case DAGNode::CALL: {
            // 处理函数调用指令
            if (!node->value) break;
            auto call = dynamic_cast<CallInst*>(node->value);
            if (!call) break;

            for (size_t i = 0; i < node->operands.size() && i < 8; ++i) {
                if (node->operands[i] && !node->operands[i]->result_vreg.empty()) {
                    ss_inst << "mv " << reg_to_string(static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + i))
                            << ", " << get_preg_or_temp(node->operands[i]->result_vreg) << "\n";
                } else if (node->operands[i] && node->operands[i]->kind == DAGNode::CONSTANT) {
                    if (auto const_val = dynamic_cast<ConstantValue*>(node->operands[i]->value)) {
                        ss_inst << "li " << reg_to_string(static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + i))
                                << ", " << const_val->getInt() << "\n";
                    } else if (auto global_val = dynamic_cast<GlobalValue*>(node->operands[i]->value)) {
                         ss_inst << "la " << reg_to_string(static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + i))
                                << ", " << global_val->getName() << "\n";
                    }
                }
            }
            ss_inst << "call " << call->getCallee()->getName();

            if ((call->getType()->isInt() || call->getType()->isFloat()) && !node->result_vreg.empty()) {
                ss_inst << "\nmv " << get_preg_or_temp(node->result_vreg) << ", a0";
            }
            break;
        }
        case DAGNode::RETURN: {
            // 处理返回指令
            if (!node->operands.empty() && node->operands[0]) {
                std::string return_val_reg = get_preg_or_temp(node->operands[0]->result_vreg);
                ss_inst << "mv a0, " << return_val_reg << "\n";
            }

            if (alloc.stack_size > 0) {
                int aligned_stack_size = (alloc.stack_size + 15) & ~15;
                // RV64 修改: 使用 ld (load doubleword) 恢复 8 字节的 ra 和 s0
                // 并使用正确的偏移量
                ss_inst << "    ld ra, " << (aligned_stack_size - 8) << "(sp)\n";
                ss_inst << "    ld s0, " << (aligned_stack_size - 16) << "(sp)\n";
                ss_inst << "    addi sp, sp, " << aligned_stack_size << "\n";
            }
            ss_inst << "    ret";
            break;
        }
        case DAGNode::BRANCH: {
            // 处理分支指令
            auto br = dynamic_cast<CondBrInst*>(node->value);
            auto uncond_br = dynamic_cast<UncondBrInst*>(node->value);

            if (node->inst.empty()) {
                if (br) {
                    if (node->operands.empty() || !node->operands[0]) break;
                    std::string cond_reg = get_preg_or_temp(node->operands[0]->result_vreg);
                    std::string then_block = br->getThenBlock()->getName();
                    std::string else_block = br->getElseBlock()->getName();

                    if (then_block.empty()) {
                        then_block = ENTRY_BLOCK_PSEUDO_NAME + "then";
                    }
                    if (else_block.empty()) {
                        else_block = ENTRY_BLOCK_PSEUDO_NAME + "else";
                    }

                    ss_inst << "bnez " << cond_reg << ", " << then_block << "\n";
                    ss_inst << "    j " << else_block;
                } else if (uncond_br) {
                    std::string target_block = uncond_br->getBlock()->getName();
                    if (target_block.empty()) {
                        target_block = ENTRY_BLOCK_PSEUDO_NAME + "target";
                    }
                    ss_inst << "j " << target_block;
                }
            } else {
                ss_inst << node->inst;
            }
            break;
        }
        default:
            throw std::runtime_error("不支持的节点类型: " + node->getNodeKindString());
    }
    node->inst = ss_inst.str(); // 存储生成的指令
}

// 指令发射
void RISCv64CodeGen::emit_instructions(DAGNode* node, std::stringstream& ss, const RegAllocResult& alloc, std::set<DAGNode*>& emitted_nodes) {
    if (!node || emitted_nodes.count(node)) {
        return; // 已发射或为空
    }

    // 递归地发射操作数以确保满足依赖关系
    for (auto operand : node->operands) {
        if (operand) {
            emit_instructions(operand, ss, alloc, emitted_nodes);
        }
    }

    // 标记当前节点为已发射
    emitted_nodes.insert(node);

    // 分割多行指令并处理每一行
    std::stringstream node_inst_ss(node->inst);
    std::string line;

    while (std::getline(node_inst_ss, line, '\n')) {
        // 清除前导/尾随空白并移除行开头的潜在标签
        line = std::regex_replace(line, std::regex("^\\s*[^\\s:]*:\\s*"), ""); // 移除标签（例如 `label: inst`）
        line = std::regex_replace(line, std::regex("^\\s+|\\s+$"), ""); // 清除空白

        if (line.empty()) continue;

        // 处理虚拟寄存器替换和溢出/加载逻辑
        std::string processed_line = line;

        // 替换结果虚拟寄存器 (如果此行中存在)
        if (!node->result_vreg.empty() && alloc.vreg_to_preg.count(node->result_vreg)) {
            std::string preg = reg_to_string(alloc.vreg_to_preg.at(node->result_vreg));
            processed_line = std::regex_replace(processed_line, std::regex("\\b" + node->result_vreg + "\\b"), preg);
        }

        // 替换操作数虚拟寄存器 (如果此行中存在)
        for (auto operand : node->operands) {
            if (operand && !operand->result_vreg.empty() && alloc.vreg_to_preg.count(operand->result_vreg)) {
                std::string operand_preg = reg_to_string(alloc.vreg_to_preg.at(operand->result_vreg));
                processed_line = std::regex_replace(processed_line, std::regex("\\b" + operand->result_vreg + "\\b"), operand_preg);
            }
        }

        // 添加处理后的指令
        ss << "    " << processed_line << "\n";
    }
}

// 辅助函数：将集合打印为字符串
std::string print_set(const std::set<std::string>& s) {
    std::stringstream ss;
    ss << "{";
    bool first = true;
    for (const auto& elem : s) {
        if (!first) {
            ss << ", ";
        }
        ss << elem;
        first = false;
    }
    ss << "}";
    return ss.str();
}

// 活跃性分析（更新以支持浮点指令）
std::map<Instruction*, std::set<std::string>> RISCv64CodeGen::liveness_analysis(Function* func) {
    std::map<Instruction*, std::set<std::string>> live_in, live_out;
    bool changed = true;

    for (const auto& bb : func->getBasicBlocks()) {
        for (const auto& inst_ptr : bb->getInstructions()) {
            live_in[inst_ptr.get()] = {};
            live_out[inst_ptr.get()] = {};
        }
    }

    int iteration_count = 0;
    while (changed) {
        changed = false;
        iteration_count++;
        if (DEEPDEBUG) std::cerr << "\n--- 活跃性分析迭代: " << iteration_count << " ---" << std::endl;

        for (auto it = func->getBasicBlocks_NoRange().rbegin(); it != func->getBasicBlocks_NoRange().rend(); ++it) {
            auto bb = it->get();
            if (DEEPDEBUG) std::cerr << "  基本块: " << bb->getName() << std::endl;

            std::set<std::string> live_out_for_bb_inst = {};
            for (const auto& succ_bb : bb->getSuccessors()) {
                if (!succ_bb->getInstructions().empty()) {
                    Instruction* first_inst_in_succ = succ_bb->getInstructions().front().get();
                    live_out_for_bb_inst.insert(live_in[first_inst_in_succ].begin(), live_in[first_inst_in_succ].end());
                }
            }

            for (auto inst_it = bb->getInstructions().rbegin(); inst_it != bb->getInstructions().rend(); ++inst_it) {
                auto inst = inst_it->get();
                if (DEEPDEBUG) std::cerr << "    指令 (BB: " << bb->getName() << ", 地址: " << static_cast<void*>(inst) << ")" << std::endl;

                std::set<std::string> current_live_in = live_in[inst];
                std::set<std::string> current_live_out = live_out[inst];
                std::set<std::string> new_live_out_calc;

                if (inst_it == bb->getInstructions().rbegin()) {
                    new_live_out_calc = live_out_for_bb_inst;
                    if (DEEPDEBUG) std::cerr << "      指令是基本块的最后一条指令，live_out 取自后继基本块 live_in 的并集: " << print_set(new_live_out_calc) << std::endl;
                } else {
                    auto prev_inst_it = std::prev(inst_it);
                    new_live_out_calc = live_in[prev_inst_it->get()];
                    if (DEEPDEBUG) std::cerr << "      指令不是基本块的最后一条，其 live_out 是其后继指令 live_in: " << print_set(new_live_out_calc) << std::endl;
                }

                std::set<std::string> use_set, def_set;

                // 定义 (Def)
                if (!inst->getType()->isVoid() && !dynamic_cast<AllocaInst*>(inst) && !dynamic_cast<StoreInst*>(inst) &&
                    !dynamic_cast<ReturnInst*>(inst) && !dynamic_cast<CondBrInst*>(inst) && !dynamic_cast<UncondBrInst*>(inst) && value_vreg_map.count(inst)) {
                    def_set.insert(value_vreg_map.at(inst));
                    if (DEEPDEBUG) std::cerr << "        指令 (地址: " << static_cast<void*>(inst) << ") 定义了虚拟寄存器: " << value_vreg_map.at(inst) << std::endl;
                }

                // StoreInst 的值可能被“杀死”
                if (auto store = dynamic_cast<StoreInst*>(inst)) {
                    Value* stored_value = store->getValue();
                    if (value_vreg_map.count(stored_value) && !dynamic_cast<AllocaInst*>(stored_value)) {
                        bool is_unique_user = true;
                        if (!stored_value->getUses().empty()) {
                            is_unique_user = (stored_value->getUses().size() == 1 && stored_value->getUses().front()->getUser() == inst);
                        } else {
                            is_unique_user = false;
                        }
                        if (is_unique_user) {
                            def_set.insert(value_vreg_map.at(stored_value));
                            if (DEEPDEBUG) std::cerr << "        Store 指令 (地址: " << static_cast<void*>(inst) << ") 将被存储的值 '" << value_vreg_map.at(stored_value) << "' 添加到 def_set (启发式)." << std::endl;
                        }
                    }
                }

                // 使用 (Use)
                for (const auto& operand_use : inst->getOperands()) {
                    Value* operand = operand_use->getValue();
                    if (value_vreg_map.count(operand) && !dynamic_cast<AllocaInst*>(operand)) {
                        use_set.insert(value_vreg_map.at(operand));
                        if (DEEPDEBUG) std::cerr << "          指令 (地址: " << static_cast<void*>(inst) << ") 使用了虚拟寄存器: " << value_vreg_map.at(operand) << std::endl;
                    }
                }
                if (DEEPDEBUG) std::cerr << "      指令 (地址: " << static_cast<void*>(inst) << ") 的 use_set: " << print_set(use_set) << std::endl;
                if (DEEPDEBUG) std::cerr << "      指令 (地址: " << static_cast<void*>(inst) << ") 的 def_set: " << print_set(def_set) << std::endl;

                std::set<std::string> new_live_in = use_set;
                for (const auto& vreg : new_live_out_calc) {
                    if (def_set.find(vreg) == def_set.end()) {
                        new_live_in.insert(vreg);
                    }
                }

                if (DEEPDEBUG) std::cerr << "      指令 (地址: " << static_cast<void*>(inst) << ") 计算出的 new_live_in: " << print_set(new_live_in) << std::endl;
                if (DEEPDEBUG) std::cerr << "      指令 (地址: " << static_cast<void*>(inst) << ") 当前 live_in: " << print_set(current_live_in) << ", 当前 live_out: " << print_set(current_live_out) << std::endl;

                if (new_live_in != current_live_in || new_live_out_calc != current_live_out) {
                    live_in[inst] = new_live_in;
                    live_out[inst] = new_live_out_calc;
                    changed = true;
                    if (DEEPDEBUG) std::cerr << "      指令 (地址: " << static_cast<void*>(inst) << ") 活跃性集合发生变化，更新并继续迭代." << std::endl;
                }
            }
        }
    }
    return live_in;
}

// 干扰图构建 (基本保持不变)
std::map<std::string, std::set<std::string>> RISCv64CodeGen::build_interference_graph(
    const std::map<Instruction*, std::set<std::string>>& live_sets) {
    std::map<std::string, std::set<std::string>> graph;

    // 确保 live_sets 中所有存在的虚拟寄存器最初都在图中
    for (const auto& pair : live_sets) {
        for (const auto& vreg : pair.second) {
            graph[vreg] = {}; // 初始化空集合
        }
    }

    for (const auto& pair : live_sets) {
        auto inst = pair.first;
        const auto& live_after_inst = pair.second; // 这实际上是下一条指令/基本块入口的 live_in

        std::string defined_vreg;
        // 修正：只有当指令结果是需要物理寄存器时才视为定义。
        // AllocaInst 不应在此处处理。
        if (value_vreg_map.count(inst) && !dynamic_cast<AllocaInst*>(inst)) {
            defined_vreg = value_vreg_map.at(inst);
        }

        // 将从 defined vreg 到此时所有其他活跃 vreg 的边添加
        if (!defined_vreg.empty()) {
            for (const auto& live_vreg : live_after_inst) {
                if (live_vreg != defined_vreg) { // 虚拟寄存器不与其自身干扰
                    graph[defined_vreg].insert(live_vreg);
                    graph[live_vreg].insert(defined_vreg); // 对称边
                }
            }
        }

        // 对于 store 指令，要存储的值和目标地址指针是同时活跃的，必须互相干扰。
        if (auto store = dynamic_cast<StoreInst*>(inst)) {
            Value* val_operand = store->getValue();
            Value* ptr_operand = store->getPointer();

            if (value_vreg_map.count(val_operand) && value_vreg_map.count(ptr_operand)) {
                const std::string& val_vreg = value_vreg_map.at(val_operand);
                const std::string& ptr_vreg = value_vreg_map.at(ptr_operand);
                if (val_vreg != ptr_vreg) {
                    graph[val_vreg].insert(ptr_vreg);
                    graph[ptr_vreg].insert(val_vreg);
                }
            }
        }
        // 可选：为其他有两个或以上源操作数的指令（如 add）添加类似逻辑，
        // 确保它们的操作数虚拟寄存器互相干扰。
        else if (auto bin = dynamic_cast<BinaryInst*>(inst)) {
             Value* lhs_operand = bin->getLhs();
             Value* rhs_operand = bin->getRhs();
             if (value_vreg_map.count(lhs_operand) && value_vreg_map.count(rhs_operand)) {
                const std::string& lhs_vreg = value_vreg_map.at(lhs_operand);
                const std::string& rhs_vreg = value_vreg_map.at(rhs_operand);
                if (lhs_vreg != rhs_vreg) {
                    graph[lhs_vreg].insert(rhs_vreg);
                    graph[rhs_vreg].insert(rhs_vreg);
                }
             }
        }
    }
    return graph;
}

// 图着色（支持浮点寄存器）
void RISCv64CodeGen::color_graph(std::map<std::string, PhysicalReg>& vreg_to_preg,
                                 const std::map<std::string, std::set<std::string>>& interference_graph) {
    vreg_to_preg.clear();

    // 分离整数和浮点寄存器池
    std::vector<PhysicalReg> int_regs = {
        PhysicalReg::T0, PhysicalReg::T1, PhysicalReg::T2, PhysicalReg::T3,
        PhysicalReg::T4, PhysicalReg::T5, PhysicalReg::T6,
        PhysicalReg::A0, PhysicalReg::A1, PhysicalReg::A2, PhysicalReg::A3,
        PhysicalReg::A4, PhysicalReg::A5, PhysicalReg::A6, PhysicalReg::A7,
        PhysicalReg::S0, PhysicalReg::S1, PhysicalReg::S2, PhysicalReg::S3,
        PhysicalReg::S4, PhysicalReg::S5, PhysicalReg::S6, PhysicalReg::S7,
        PhysicalReg::S8, PhysicalReg::S9, PhysicalReg::S10, PhysicalReg::S11
    };
    std::vector<PhysicalReg> float_regs = {
        PhysicalReg::F0, PhysicalReg::F1, PhysicalReg::F2, PhysicalReg::F3,
        PhysicalReg::F4, PhysicalReg::F5, PhysicalReg::F6, PhysicalReg::F7,
        PhysicalReg::F8, PhysicalReg::F9, PhysicalReg::F10, PhysicalReg::F11,
        PhysicalReg::F12, PhysicalReg::F13, PhysicalReg::F14, PhysicalReg::F15,
        PhysicalReg::F16, PhysicalReg::F17, PhysicalReg::F18, PhysicalReg::F19,
        PhysicalReg::F20, PhysicalReg::F21, PhysicalReg::F22, PhysicalReg::F23,
        PhysicalReg::F24, PhysicalReg::F25, PhysicalReg::F26, PhysicalReg::F27,
        PhysicalReg::F28, PhysicalReg::F29, PhysicalReg::F30, PhysicalReg::F31
    };

    // 确定虚拟寄存器类型（整数或浮点）
    auto is_float_vreg = [&](const std::string& vreg) -> bool {
        for (const auto& pair : value_vreg_map) {
            if (pair.second == vreg) {
                if (auto inst = dynamic_cast<Instruction*>(pair.first)) {
                    if (inst->isUnary()) {
                        switch (inst->getKind()) {
                            case Instruction::kFNeg:
                            case Instruction::kFNot:
                            case Instruction::kFtoI:
                            case Instruction::kItoF:
                            case Instruction::kBitFtoI:
                            case Instruction::kBitItoF:
                                return true; // 浮点相关指令
                            default:
                                return inst->getType()->isFloat();
                        }
                    }
                    return inst->getType()->isFloat();
                } else if (auto constant = dynamic_cast<ConstantValue*>(pair.first)) {
                    return constant->isFloat();
                }
            }
        }
        return false; // 默认整数
    };

    // 按度数排序虚拟寄存器
    std::vector<std::pair<std::string, int>> vreg_degrees;
    for (const auto& entry : interference_graph) {
        vreg_degrees.push_back({entry.first, (int)entry.second.size()});
    }
    std::sort(vreg_degrees.begin(), vreg_degrees.end(),
              [](const auto& a, const auto& b) { return a.second > b.second; });

    for (const auto& vreg_deg_pair : vreg_degrees) {
        const std::string& vreg = vreg_deg_pair.first;
        std::set<PhysicalReg> used_colors;
        bool is_float = is_float_vreg(vreg);

        // 收集邻居使用的颜色
        if (interference_graph.count(vreg)) {
            for (const auto& neighbor_vreg : interference_graph.at(vreg)) {
                if (vreg_to_preg.count(neighbor_vreg)) {
                    used_colors.insert(vreg_to_preg.at(neighbor_vreg));
                }
            }
        }

        // 选择合适的寄存器池
        const auto& available_regs = is_float ? float_regs : int_regs;

        // 查找第一个可用的寄存器
        bool colored = false;
        for (PhysicalReg preg : available_regs) {
            if (used_colors.find(preg) == used_colors.end()) {
                vreg_to_preg[vreg] = preg;
                colored = true;
                break;
            }
        }

        if (!colored) {
            std::cerr << "警告: 无法为 " << vreg << " 分配" << (is_float ? "浮点" : "整数") << "寄存器，将溢出到栈。\n";
            // 溢出处理：在 stack_map 中分配栈空间
            // 这里假设每个溢出变量占用 4 字节
            // 注意：实际中需要区分整数和浮点溢出的存储指令（如 sw vs fsw）
        }
    }
}

// 寄存器分配
RISCv64CodeGen::RegAllocResult RISCv64CodeGen::register_allocation(Function* func) {
    eliminate_phi(func);
    vreg_counter = 0;
    value_vreg_map.clear();

    // 为所有产生值的指令和操作数分配虚拟寄存器
    for (const auto& bb_ptr : func->getBasicBlocks()) {
        for (const auto& inst_ptr : bb_ptr->getInstructions()) {
            Instruction* inst = inst_ptr.get();
            if (!inst->getType()->isVoid() && !dynamic_cast<AllocaInst*>(inst)) {
                if (value_vreg_map.find(inst) == value_vreg_map.end()) {
                    value_vreg_map[inst] = "v" + std::to_string(vreg_counter++);
                }
            }
            for (const auto& operand_use : inst->getOperands()) {
                Value* operand = operand_use->getValue();
                if (dynamic_cast<ConstantValue*>(operand) || dynamic_cast<GlobalValue*>(operand)) {
                    if (value_vreg_map.find(operand) == value_vreg_map.end()) {
                        value_vreg_map[operand] = "v" + std::to_string(vreg_counter++);
                    }
                } else if (auto op_inst = dynamic_cast<Instruction*>(operand)) {
                    if (!op_inst->getType()->isVoid() && !dynamic_cast<AllocaInst*>(operand)) {
                        if (value_vreg_map.find(operand) == value_vreg_map.end()) {
                            value_vreg_map[operand] = "v" + std::to_string(vreg_counter++);
                        }
                    }
                }
            }
        }
    }

    RegAllocResult alloc_result;
    int current_stack_offset = 0;
    std::set<AllocaInst*> allocas_in_func;

    for (const auto& bb_ptr : func->getBasicBlocks()) {
        for (const auto& inst_ptr : bb_ptr->getInstructions()) {
            if (auto alloca = dynamic_cast<AllocaInst*>(inst_ptr.get())) {
                allocas_in_func.insert(alloca);
            }
        }
    }

    for (auto alloca : allocas_in_func) {
        int size = 4; // 假设 i32 或 float, 依旧是4字节
        alloc_result.stack_map[alloca] = current_stack_offset;
        current_stack_offset += size;
    }
    // RV64 修改: 为保存的 ra 和 s0 (各8字节) 预留16字节空间
    alloc_result.stack_size = current_stack_offset + 16;

    // 活跃性分析
    std::map<Instruction*, std::set<std::string>> live_sets = liveness_analysis(func);

    // 构建干扰图
    std::map<std::string, std::set<std::string>> interference_graph = build_interference_graph(live_sets);

    // 图着色
    color_graph(alloc_result.vreg_to_preg, interference_graph);

    if (DEBUG) {
        std::cerr << "=== 寄存器分配结果 (vreg_to_preg) ===\n";
        for (const auto& pair : alloc_result.vreg_to_preg) {
            std::cerr << "  " << pair.first << " -> " << reg_to_string(pair.second) << "\n";
        }
        std::cerr << "=== 寄存器分配结果结束 ===\n\n";

        std::cerr << "=== 活跃性分析结果 (live_in sets) ===\n";
        for (const auto& bb_ptr : func->getBasicBlocks()) {
            std::cerr << "Basic Block: " << bb_ptr->getName() << "\n";
            for (const auto& inst_ptr : bb_ptr->getInstructions()) {
                std::cerr << "  Inst: " << inst_ptr->getKindString();
                if (!inst_ptr->getName().empty()) {
                    std::cerr << "(" << inst_ptr->getName() << ")";
                }
                if (value_vreg_map.count(inst_ptr.get())) {
                    std::cerr << " (Def vreg: " << value_vreg_map.at(inst_ptr.get()) << ")";
                }
                std::cerr << " (Live In: {";
                bool first = true;
                if (live_sets.count(inst_ptr.get())) {
                    for (const auto& vreg : live_sets.at(inst_ptr.get())) {
                        if (!first) std::cerr << ", ";
                        std::cerr << vreg;
                        first = false;
                    }
                }
                std::cerr << "})\n";
            }
        }
        std::cerr << "=== 活跃性分析结果结束 ===\n\n";

        std::cerr << "=== 干扰图 ===\n";
        for (const auto& pair : interference_graph) {
            std::cerr << "  " << pair.first << ": {";
            bool first = true;
            for (const auto& neighbor : pair.second) {
                if (!first) std::cerr << ", ";
                std::cerr << neighbor;
                first = false;
            }
            std::cerr << "}\n";
        }
        std::cerr << "=== 干扰图结束 ===\n\n";
    }

    return alloc_result;
}

// Phi 消除 (简化版，将 Phi 的结果直接复制到每个前驱基本块的末尾)
void RISCv64CodeGen::eliminate_phi(Function* func) {
    // 这是一个占位符。适当的 phi 消除将涉及
    // 在每个前驱基本块的末尾插入 `mov` 指令，用于每个 phi 操作数。
    // 对于给定的 IR 示例，没有 phi 节点，所以这可能不是严格必要的，
    // 但如果前端生成 phi 节点，则有此阶段是好的做法。
    // 目前，我们假设没有生成 phi 节点或者它们已在前端处理。
}

} // namespace sysy