Merge branch 'backend-llir' into backend

This commit is contained in:
Lixuanwang
2025-07-19 18:00:42 +08:00
12 changed files with 1589 additions and 1571 deletions

View File

@@ -0,0 +1,32 @@
#ifndef RISCV64_ASMPRINTER_H
#define RISCV64_ASMPRINTER_H
#include "RISCv64LLIR.h"
#include <iostream>
namespace sysy {
class RISCv64AsmPrinter {
public:
RISCv64AsmPrinter(MachineFunction* mfunc);
// 主入口
void run(std::ostream& os);
private:
// 打印各个部分
void printPrologue();
void printEpilogue();
void printBasicBlock(MachineBasicBlock* mbb);
void printInstruction(MachineInstr* instr);
// 辅助函数
std::string regToString(PhysicalReg reg);
void printOperand(MachineOperand* op);
MachineFunction* MFunc;
std::ostream* OS;
};
} // namespace sysy
#endif // RISCV64_ASMPRINTER_H

View File

@@ -3,128 +3,23 @@
#include "IR.h"
#include <string>
#include <vector>
#include <map>
#include <set>
#include <memory>
#include <iostream>
#include <functional> // For std::function
extern int DEBUG;
extern int DEEPDEBUG;
namespace sysy {
// 为活跃性分析的结果定义一个结构体,以同时持有 live_in 和 live_out 集合
struct LivenessResult {
std::map<Instruction*, std::set<std::string>> live_in;
std::map<Instruction*, std::set<std::string>> live_out;
};
// RISCv64CodeGen 现在是一个高层驱动器
class RISCv64CodeGen {
public:
enum class PhysicalReg {
ZERO, RA, SP, GP, TP, T0, T1, T2, S0, S1, A0, A1, A2, A3, A4, A5, A6, A7, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, T3, T4, T5, T6,
F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15,F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31
};
// Move DAGNode and RegAllocResult to public section
struct DAGNode {
enum NodeKind { CONSTANT, LOAD, STORE, BINARY, CALL, RETURN, BRANCH, ALLOCA_ADDR, UNARY, MEMSET };
NodeKind kind;
Value* value = nullptr; // For IR Value
std::string inst; // Generated RISC-V instruction(s) for this node
std::string result_vreg; // Virtual register assigned to this node's result
std::vector<DAGNode*> operands;
std::vector<DAGNode*> users; // For debugging and potentially optimizations
DAGNode(NodeKind k) : kind(k) {}
// Debugging / helper
std::string getNodeKindString() const {
switch (kind) {
case CONSTANT: return "CONSTANT";
case LOAD: return "LOAD";
case STORE: return "STORE";
case BINARY: return "BINARY";
case CALL: return "CALL";
case RETURN: return "RETURN";
case BRANCH: return "BRANCH";
case ALLOCA_ADDR: return "ALLOCA_ADDR";
case UNARY: return "UNARY";
case MEMSET: return "MEMSET";
default: return "UNKNOWN";
}
}
};
struct RegAllocResult {
std::map<std::string, PhysicalReg> vreg_to_preg; // Virtual register to Physical Register mapping
std::map<Value*, int> stack_map; // Value (AllocaInst) to stack offset
int stack_size = 0; // Total stack frame size for locals and spills
};
RISCv64CodeGen(Module* mod) : module(mod) {}
// 唯一的公共入口点
std::string code_gen();
std::string module_gen();
std::string function_gen(Function* func);
// 修改 basicBlock_gen 的声明,添加 int block_idx 参数
std::string basicBlock_gen(BasicBlock* bb, const RegAllocResult& alloc, int block_idx);
// DAG related
std::vector<std::unique_ptr<DAGNode>> build_dag(BasicBlock* bb);
void select_instructions(DAGNode* node, const RegAllocResult& alloc);
// 改变 emit_instructions 的参数,使其可以直接添加汇编指令到 main ss
void emit_instructions(DAGNode* node, std::stringstream& ss, const RegAllocResult& alloc, std::set<DAGNode*>& emitted_nodes);
// Register Allocation related
LivenessResult liveness_analysis(Function* func);
std::map<std::string, std::set<std::string>> build_interference_graph(const LivenessResult& liveness);
void color_graph(std::map<std::string, PhysicalReg>& vreg_to_preg,
const std::map<std::string, std::set<std::string>>& interference_graph);
RegAllocResult register_allocation(Function* func);
void eliminate_phi(Function* func); // Phi elimination is typically done before DAG building
// Utility
std::string reg_to_string(PhysicalReg reg);
void print_dag(const std::vector<std::unique_ptr<DAGNode>>& dag, const std::string& bb_name);
private:
static const std::vector<PhysicalReg> allocable_regs;
std::map<Value*, std::string> value_vreg_map; // Maps IR Value* to its virtual register name
// 模块级代码生成
std::string module_gen();
// 函数级代码生成 (实现新的流水线)
std::string function_gen(Function* func);
Module* module;
int vreg_counter = 0; // Counter for unique virtual register names
int alloca_offset_counter = 0; // Counter for alloca offsets
// 新增一个成员变量来存储当前函数的所有 DAGNode以确保其生命周期贯穿整个函数代码生成
// 这样可以在多个 BasicBlock_gen 调用中访问到完整的 DAG 节点
std::vector<std::unique_ptr<DAGNode>> current_function_dag_nodes;
// 为空标签定义一个伪名称前缀,加上块索引以确保唯一性
const std::string ENTRY_BLOCK_PSEUDO_NAME = "entry_block_";
int local_label_counter = 0; // 用于生成唯一的本地标签 (如 memset 循环, 匿名块跳转等)
// !!! 修改get_operand_node 辅助函数现在需要传入 value_to_node 和 nodes_storage 的引用
// 因为它们是 build_dag 局部管理的
DAGNode* get_operand_node(
Value* val_ir,
std::map<Value*, DAGNode*>& value_to_node,
std::vector<std::unique_ptr<DAGNode>>& nodes_storage
);
// !!! 新增create_node 辅助函数也需要传入 value_to_node 和 nodes_storage 的引用
// 并且它应该不再是 lambda而是一个真正的成员函数
DAGNode* create_node(
DAGNode::NodeKind kind,
Value* val,
std::map<Value*, DAGNode*>& value_to_node,
std::vector<std::unique_ptr<DAGNode>>& nodes_storage
);
std::vector<std::unique_ptr<Instruction>> temp_instructions_storage; // 用于存储 build_dag 中创建的临时 BinaryInst
};
} // namespace sysy

49
src/include/RISCv64ISel.h Normal file
View File

@@ -0,0 +1,49 @@
#ifndef RISCV64_ISEL_H
#define RISCV64_ISEL_H
#include "RISCv64LLIR.h"
namespace sysy {
class RISCv64ISel {
public:
RISCv64ISel();
// 模块主入口将一个高层IR函数转换为底层LLIR函数
std::unique_ptr<MachineFunction> runOnFunction(Function* func);
// 公开接口以便后续模块如RegAlloc可以查询或创建vreg
unsigned getVReg(Value* val);
unsigned getNewVReg() { return vreg_counter++; }
private:
// DAG节点定义作为ISel的内部实现细节
struct DAGNode;
// 指令选择主流程
void select();
// 为单个基本块生成指令
void selectBasicBlock(BasicBlock* bb);
// 核心函数为DAG节点选择并生成MachineInstr
void selectNode(DAGNode* node);
// DAG 构建相关函数 (从原RISCv64Backend迁移)
std::vector<std::unique_ptr<DAGNode>> build_dag(BasicBlock* bb);
DAGNode* get_operand_node(Value* val_ir, std::map<Value*, DAGNode*>&, std::vector<std::unique_ptr<DAGNode>>&);
DAGNode* create_node(int kind, Value* val, std::map<Value*, DAGNode*>&, std::vector<std::unique_ptr<DAGNode>>&);
// 状态
Function* F; // 当前处理的高层IR函数
std::unique_ptr<MachineFunction> MFunc; // 正在构建的底层LLIR函数
MachineBasicBlock* CurMBB; // 当前正在处理的机器基本块
// 映射关系
std::map<Value*, unsigned> vreg_map;
std::map<const BasicBlock*, MachineBasicBlock*> bb_map;
unsigned vreg_counter;
int local_label_counter;
};
} // namespace sysy
#endif // RISCV64_ISEL_H

200
src/include/RISCv64LLIR.h Normal file
View File

@@ -0,0 +1,200 @@
#ifndef RISCV64_LLIR_H
#define RISCV64_LLIR_H
#include "IR.h" // 确保包含了您自己的IR头文件
#include <string>
#include <vector>
#include <memory>
#include <cstdint>
#include <map>
// 前向声明,避免循环引用
namespace sysy {
class Function;
class RISCv64ISel;
}
namespace sysy {
// 物理寄存器定义
enum class PhysicalReg {
ZERO, RA, SP, GP, TP, T0, T1, T2, S0, S1, A0, A1, A2, A3, A4, A5, A6, A7, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, T3, T4, T5, T6,
F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15,F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31
};
// RISC-V 指令操作码枚举
enum class RVOpcodes {
// 算术指令
ADD, ADDI, ADDW, ADDIW, SUB, SUBW, MUL, MULW, DIV, DIVW, REM, REMW,
// 逻辑指令
XOR, XORI, OR, ORI, AND, ANDI,
// 移位指令
SLL, SLLI, SLLW, SLLIW, SRL, SRLI, SRLW, SRLIW, SRA, SRAI, SRAW, SRAIW,
// 比较指令
SLT, SLTI, SLTU, SLTIU,
// 内存访问指令
LW, LH, LB, LWU, LHU, LBU, SW, SH, SB, LD, SD,
// 控制流指令
J, JAL, JALR, RET,
BEQ, BNE, BLT, BGE, BLTU, BGEU,
// 伪指令
LI, LA, MV, NEG, NEGW, SEQZ, SNEZ,
// 函数调用
CALL,
// 特殊标记,非指令
LABEL,
// 新增伪指令,用于解耦栈帧处理
FRAME_LOAD, // 从栈帧加载 (AllocaInst)
FRAME_STORE, // 保存到栈帧 (AllocaInst)
};
class MachineOperand;
class RegOperand;
class ImmOperand;
class LabelOperand;
class MemOperand;
class MachineInstr;
class MachineBasicBlock;
class MachineFunction;
// 操作数基类
class MachineOperand {
public:
enum OperandKind { KIND_REG, KIND_IMM, KIND_LABEL, KIND_MEM };
MachineOperand(OperandKind kind) : kind(kind) {}
virtual ~MachineOperand() = default;
OperandKind getKind() const { return kind; }
private:
OperandKind kind;
};
// 寄存器操作数
class RegOperand : public MachineOperand {
public:
// 构造虚拟寄存器
RegOperand(unsigned vreg_num)
: MachineOperand(KIND_REG), vreg_num(vreg_num), is_virtual(true) {}
// 构造物理寄存器
RegOperand(PhysicalReg preg)
: MachineOperand(KIND_REG), preg(preg), is_virtual(false) {}
bool isVirtual() const { return is_virtual; }
unsigned getVRegNum() const { return vreg_num; }
PhysicalReg getPReg() const { return preg; }
void setPReg(PhysicalReg new_preg) {
preg = new_preg;
is_virtual = false;
}
private:
unsigned vreg_num = 0;
PhysicalReg preg = PhysicalReg::ZERO;
bool is_virtual;
};
// 立即数操作数
class ImmOperand : public MachineOperand {
public:
ImmOperand(int64_t value) : MachineOperand(KIND_IMM), value(value) {}
int64_t getValue() const { return value; }
private:
int64_t value;
};
// 标签操作数
class LabelOperand : public MachineOperand {
public:
LabelOperand(const std::string& name) : MachineOperand(KIND_LABEL), name(name) {}
const std::string& getName() const { return name; }
private:
std::string name;
};
// 内存操作数, 表示 offset(base_reg)
class MemOperand : public MachineOperand {
public:
MemOperand(std::unique_ptr<RegOperand> base, std::unique_ptr<ImmOperand> offset)
: MachineOperand(KIND_MEM), base(std::move(base)), offset(std::move(offset)) {}
RegOperand* getBase() const { return base.get(); }
ImmOperand* getOffset() const { return offset.get(); }
private:
std::unique_ptr<RegOperand> base;
std::unique_ptr<ImmOperand> offset;
};
// 机器指令
class MachineInstr {
public:
MachineInstr(RVOpcodes opcode) : opcode(opcode) {}
RVOpcodes getOpcode() const { return opcode; }
const std::vector<std::unique_ptr<MachineOperand>>& getOperands() const { return operands; }
std::vector<std::unique_ptr<MachineOperand>>& getOperands() { return operands; }
void addOperand(std::unique_ptr<MachineOperand> operand) {
operands.push_back(std::move(operand));
}
private:
RVOpcodes opcode;
std::vector<std::unique_ptr<MachineOperand>> operands;
};
// 机器基本块
class MachineBasicBlock {
public:
MachineBasicBlock(const std::string& name, MachineFunction* parent)
: name(name), parent(parent) {}
const std::string& getName() const { return name; }
MachineFunction* getParent() const { return parent; }
const std::vector<std::unique_ptr<MachineInstr>>& getInstructions() const { return instructions; }
std::vector<std::unique_ptr<MachineInstr>>& getInstructions() { return instructions; }
void addInstruction(std::unique_ptr<MachineInstr> instr) {
instructions.push_back(std::move(instr));
}
std::vector<MachineBasicBlock*> successors;
std::vector<MachineBasicBlock*> predecessors;
private:
std::string name;
std::vector<std::unique_ptr<MachineInstr>> instructions;
MachineFunction* parent;
};
// 栈帧信息
struct StackFrameInfo {
int locals_size = 0; // 仅为AllocaInst分配的大小
int spill_size = 0; // 仅为溢出分配的大小
int total_size = 0; // 总大小
std::map<unsigned, int> alloca_offsets; // <AllocaInst的vreg, 栈偏移>
std::map<unsigned, int> spill_offsets; // <溢出vreg, 栈偏移>
};
// 机器函数
class MachineFunction {
public:
MachineFunction(Function* func, RISCv64ISel* isel) : F(func), name(func->getName()), isel(isel) {}
Function* getFunc() const { return F; }
RISCv64ISel* getISel() const { return isel; }
const std::string& getName() const { return name; }
StackFrameInfo& getFrameInfo() { return frame_info; }
const std::vector<std::unique_ptr<MachineBasicBlock>>& getBlocks() const { return blocks; }
std::vector<std::unique_ptr<MachineBasicBlock>>& getBlocks() { return blocks; }
void addBlock(std::unique_ptr<MachineBasicBlock> block) {
blocks.push_back(std::move(block));
}
private:
Function* F;
RISCv64ISel* isel; // 指向创建它的ISel用于获取vreg映射等信息
std::string name;
std::vector<std::unique_ptr<MachineBasicBlock>> blocks;
StackFrameInfo frame_info;
};
} // namespace sysy
#endif // RISCV64_LLIR_H

View File

@@ -0,0 +1,18 @@
// RISCv64Passes.h
#ifndef RISCV64_PASSES_H
#define RISCV64_PASSES_H
#include "RISCv64LLIR.h"
namespace sysy {
// 此处为未来优化Pass的基类或独立类定义
// 例如:
// class PeepholeOptimizer {
// public:
// void runOnMachineFunction(MachineFunction* mfunc);
// };
} // namespace sysy
#endif // RISCV64_PASSES_H

View File

@@ -0,0 +1,56 @@
#ifndef RISCV64_REGALLOC_H
#define RISCV64_REGALLOC_H
#include "RISCv64LLIR.h"
namespace sysy {
class RISCv64RegAlloc {
public:
RISCv64RegAlloc(MachineFunction* mfunc);
// 模块主入口
void run();
private:
using LiveSet = std::set<unsigned>; // 活跃虚拟寄存器集合
using InterferenceGraph = std::map<unsigned, std::set<unsigned>>;
// 栈帧管理
void eliminateFrameIndices();
// 活跃性分析
void analyzeLiveness();
// 构建干扰图
void buildInterferenceGraph();
// 图着色分配寄存器
void colorGraph();
// 重写函数替换vreg并插入溢出代码
void rewriteFunction();
// 辅助函数获取指令的Use/Def集合
void getInstrUseDef(MachineInstr* instr, LiveSet& use, LiveSet& def);
MachineFunction* MFunc;
// 活跃性分析结果
std::map<const MachineInstr*, LiveSet> live_in_map;
std::map<const MachineInstr*, LiveSet> live_out_map;
// 干扰图
InterferenceGraph interference_graph;
// 图着色结果
std::map<unsigned, PhysicalReg> color_map; // vreg -> preg
std::set<unsigned> spilled_vregs; // 被溢出的vreg集合
// 可用的物理寄存器池
std::vector<PhysicalReg> allocable_int_regs;
};
} // namespace sysy
#endif // RISCV64_REGALLOC_H