Compare commits

...

26 Commits

Author SHA1 Message Date
Lixuanwang
b2b88ee511 [backend-beta] saving for simpler implementation for register allocation 2025-06-24 05:02:11 +08:00
Lixuanwang
395e6e4003 [backend] fixed many bugs 2025-06-24 03:23:45 +08:00
Lixuanwang
20cc08708a [backend] introduced debug option 2025-06-24 02:56:17 +08:00
Lixuanwang
942cb32976 [backend] fixed bugs 2025-06-24 00:42:14 +08:00
Lixuanwang
ac7569d890 Merge branch 'IROptPre' into backend 2025-06-24 00:40:36 +08:00
Lixuanwang
11cd32e6df [backend] fixed some bugs 2025-06-24 00:35:38 +08:00
Lixuanwang
617244fae7 [backend] switch to simpler implementation for inst selection 2025-06-24 00:30:33 +08:00
Lixuanwang
3c3f48ee87 [backend] fixed 1 segmentation fault 2025-06-23 22:38:29 +08:00
rain2133
10b43fc90d 修复若干bug 2025-06-23 17:04:45 +08:00
Lixuanwang
ab3eb253f9 [backend] debugging segmentation fault caused by branch instr 2025-06-23 17:02:29 +08:00
rain2133
3d233ff199 基本完成CFG优化(IR修复) 2025-06-23 16:25:52 +08:00
Lixuanwang
7d37bd7528 [backend] introduced DAG, GraphAlloc 2025-06-23 15:38:01 +08:00
rain2133
568e9af626 IRoptpre 初步构建 2025-06-23 13:17:15 +08:00
rain2133
63fc92dcbd 数组命名修复 2025-06-23 11:35:44 +08:00
Lixuanwang
af00612376 [backend] supported if 2025-06-23 06:16:19 +08:00
rain2133
29f75e60a5 Merge remote-tracking branch 'origin/IRPrinter' into IRPrinter 2025-06-23 00:24:19 +08:00
rain2133
9d8930f5df fix % repeat in IR print 2025-06-23 00:22:15 +08:00
ladev789
10e1476ba1 [backend] test01 passed 2025-06-22 20:05:34 +08:00
ladev789
b94e87637a Merge remote-tracking branch 'origin/IRPrinter' into backend 2025-06-22 20:00:29 +08:00
ladev789
88a561177d [backend] incorrect asm output 2025-06-22 20:00:03 +08:00
rain2133
3da2f3ec80 修复函数类型判断,终端跑通所有测试代码。Printer格式需要修复 2025-06-22 18:40:33 +08:00
rain2133
496e2abfb6 构建IR打印器,llvm风格,跑通大部分样例(9/10),待修复 2025-06-22 17:59:19 +08:00
lixuanwang
4711fb603b fixed bugs brought out by merging 2025-06-22 14:39:38 +08:00
lixuanwang
dda8bbe444 Merge branch 'array_add' 2025-06-22 14:24:00 +08:00
ladev789
25a8c72a9b [backend] it works 1.0 2025-06-22 14:06:14 +08:00
lixuanwang
232ed6d023 [backend] introduced rv32 backend 2025-06-21 17:26:50 +08:00
20 changed files with 1821 additions and 155 deletions

2
.gitignore vendored
View File

@@ -50,3 +50,5 @@ GTAGS
__init__.py __init__.py
*.pyc *.pyc
.DS_*

17
TODO.md
View File

@@ -3,20 +3,27 @@
### 1. **前端必须模块** ### 1. **前端必须模块**
- **词法/语法分析**(已完成): - **词法/语法分析**(已完成):
- `SysYLexer`/`SysYParser`ANTLR生成的解析器 - `SysYLexer`/`SysYParser`ANTLR生成的解析器
- **IR生成核心** - **IR生成核心**(已完成)
- `SysYIRGenerator`将AST转换为中间表示IR - `SysYIRGenerator`将AST转换为中间表示IR
- `IRBuilder`:构建指令和基本块的工具类(你们正在实现的部分) - `IRBuilder`:构建指令和基本块的工具类(你们正在实现的部分)
- **IR打印器**(基本完成)
- `SysYIRPrinter`: 打印llvm ir格式的指令优化遍后查看优化效果la指令,subarray数组翻译范式需要改进
### 2. **中端必要优化(最小集合)** ### 2. **中端必要优化(最小集合)**
- **CFG优化**(待测试)
- `SysYIROptPre`CFG优化顺便解决IR生成的缺陷void自动添加ret指令合并嵌套if/while语句生成的多个exit后续可以实现回填机制
常量传播 常量传播
| 优化阶段 | 关键作用 | 是否必须 | | 优化阶段 | 关键作用 | 是否必须 |
|-------------------|----------------------------------|----------| |-------------------|----------------------------------|----------|
| `Mem2Reg` | 消除冗余内存访问转换为SSA形式 | ✅ 核心 | | `Mem2Reg` | 消除冗余内存访问转换为SSA形式 | ✅ 核心 |(必须)
| `DCE` (死代码消除) | 移除无用指令 | ✅ 必要 | | `DCE` (死代码消除) | 移除无用指令 | ✅ 必要 |(必须)
| `DFE` (死函数消除) | 移除未使用的函数 | ✅ 必要 | | `DFE` (死函数消除) | 移除未使用的函数 | ✅ 必要 |(必须)
| `FuncAnalysis` | 函数调用关系分析 | ✅ 基础 |
| `Global2Local` | 全局变量降级为局部变量 | ✅ 重要 | | `Global2Local` | 全局变量降级为局部变量 | ✅ 重要 |
还需要做 Reg2Mem
### 3. **后端核心流程(必须实现)** ### 3. **后端核心流程(必须实现)**
```mermaid ```mermaid
graph LR graph LR

View File

@@ -16,8 +16,9 @@ add_executable(sysyc
IR.cpp IR.cpp
SysYIRGenerator.cpp SysYIRGenerator.cpp
# Backend.cpp # Backend.cpp
# LLVMIRGenerator.cpp SysYIRPrinter.cpp
# LLVMIRGenerator_1.cpp SysYIROptPre.cpp
RISCv32Backend.cpp
) )
target_include_directories(sysyc PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/include) target_include_directories(sysyc PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_compile_options(sysyc PRIVATE -frtti) target_compile_options(sysyc PRIVATE -frtti)

View File

@@ -91,7 +91,7 @@ std::any LLVMIRGenerator::visitVarDecl(SysYParser::VarDeclContext* ctx) {
if (varDef->ASSIGN()) { if (varDef->ASSIGN()) {
value = std::any_cast<std::string>(varDef->initVal()->accept(this)); value = std::any_cast<std::string>(varDef->initVal()->accept(this));
if (irTmpTable.find(value) != irTmpTable.end() && sysy::isa<sysy::ConstantValue>(irTmpTable[value])) { if (irTmpTable.find(value) != irTmpTable.end() && isa<sysy::ConstantValue>(irTmpTable[value])) {
initValue = irTmpTable[value]; initValue = irTmpTable[value];
} }
} }
@@ -134,7 +134,7 @@ std::any LLVMIRGenerator::visitConstDecl(SysYParser::ConstDeclContext* ctx) {
try { try {
value = std::any_cast<std::string>(constDef->constInitVal()->accept(this)); value = std::any_cast<std::string>(constDef->constInitVal()->accept(this));
if (sysy::isa<sysy::ConstantValue>(irTmpTable[value])) { if (isa<sysy::ConstantValue>(irTmpTable[value])) {
initValue = irTmpTable[value]; initValue = irTmpTable[value];
} }
} catch (...) { } catch (...) {
@@ -310,7 +310,7 @@ std::any LLVMIRGenerator::visitFuncDef(SysYParser::FuncDefContext* ctx) {
} else { } else {
irStream << " ret " << currentReturnType << " 0\n"; irStream << " ret " << currentReturnType << " 0\n";
sysy::IRBuilder builder(currentIRBlock); sysy::IRBuilder builder(currentIRBlock);
builder.createReturnInst(sysy::ConstantValue::get(getIRType("int"),0)); builder.createReturnInst(sysy::ConstantValue::get(0));
} }
} }
irStream << "}\n"; irStream << "}\n";
@@ -524,10 +524,10 @@ std::any LLVMIRGenerator::visitNumber(SysYParser::NumberContext* ctx) {
sysy::Value* irValue = nullptr; sysy::Value* irValue = nullptr;
if (ctx->ILITERAL()) { if (ctx->ILITERAL()) {
value = ctx->ILITERAL()->getText(); value = ctx->ILITERAL()->getText();
irValue = sysy::ConstantValue::get(getIRType("int"), std::stoi(value)); irValue = sysy::ConstantValue::get(std::stoi(value));
} else if (ctx->FLITERAL()) { } else if (ctx->FLITERAL()) {
value = ctx->FLITERAL()->getText(); value = ctx->FLITERAL()->getText();
irValue = sysy::ConstantValue::get(getIRType("float"), std::stof(value)); irValue = sysy::ConstantValue::get(std::stof(value));
} else { } else {
value = ""; value = "";
} }

0
src/Mem2Reg.cpp Normal file
View File

618
src/RISCv32Backend.cpp Normal file
View File

@@ -0,0 +1,618 @@
#include "RISCv32Backend.h"
#include <sstream>
#include <algorithm>
#include <stack>
namespace sysy {
std::string RISCv32CodeGen::code_gen() {
std::stringstream ss;
ss << ".text\n";
ss << module_gen();
return ss.str();
}
std::string RISCv32CodeGen::module_gen() {
std::stringstream ss;
for (auto& global : module->getGlobals()) {
ss << ".global " << global->getName() << "\n";
ss << ".section .data\n";
ss << ".align 2\n";
ss << global->getName() << ":\n";
for (auto value : global->getInitValues().getValues()) {
auto const_val = dynamic_cast<ConstantValue*>(value);
if (const_val->isInt()) {
ss << ".word " << const_val->getInt() << "\n";
} else {
ss << ".float " << const_val->getFloat() << "\n";
}
}
}
ss << ".section .text\n";
for (auto& func : module->getFunctions()) {
ss << function_gen(func.second.get());
}
return ss.str();
}
std::string RISCv32CodeGen::function_gen(Function* func) {
std::stringstream ss;
ss << ".global " << func->getName() << "\n";
ss << ".type " << func->getName() << ", @function\n";
ss << func->getName() << ":\n";
// Perform register allocation
auto live_sets = liveness_analysis(func);
auto interference_graph = build_interference_graph(live_sets);
auto alloc = color_graph(func, interference_graph);
// Prologue: Adjust stack and save callee-saved registers
if (alloc.stack_size > 0) {
ss << " addi sp, sp, -" << alloc.stack_size << "\n";
ss << " sw ra, " << (alloc.stack_size - 4) << "(sp)\n";
}
for (auto preg : callee_saved) {
if (std::find_if(alloc.vreg_to_preg.begin(), alloc.vreg_to_preg.end(),
[preg](const auto& pair) { return pair.second == preg; }) != alloc.vreg_to_preg.end()) {
ss << " sw " << get_preg_str(preg) << ", " << (alloc.stack_size - 8) << "(sp)\n";
}
}
int block_idx = 0;
for (auto& bb : func->getBasicBlocks()) {
ss << basicBlock_gen(bb.get(), alloc, block_idx++);
}
// Epilogue: Restore callee-saved registers and stack
for (auto preg : callee_saved) {
if (std::find_if(alloc.vreg_to_preg.begin(), alloc.vreg_to_preg.end(),
[preg](const auto& pair) { return pair.second == preg; }) != alloc.vreg_to_preg.end()) {
ss << " lw " << get_preg_str(preg) << ", " << (alloc.stack_size - 8) << "(sp)\n";
}
}
if (alloc.stack_size > 0) {
ss << " lw ra, " << (alloc.stack_size - 4) << "(sp)\n";
ss << " addi sp, sp, " << alloc.stack_size << "\n";
}
ss << " ret\n";
return ss.str();
}
std::string RISCv32CodeGen::basicBlock_gen(BasicBlock* bb, const RegAllocResult& alloc, int block_idx) {
std::stringstream ss;
ss << ".L" << block_idx << ":\n";
auto dag_nodes = build_dag(bb);
for (auto& node : dag_nodes) {
select_instructions(node.get(), alloc);
}
std::set<DAGNode*> emitted_nodes;
for (auto& node : dag_nodes) {
emit_instructions(node.get(), ss, alloc, emitted_nodes);
}
return ss.str();
}
std::vector<std::unique_ptr<RISCv32CodeGen::DAGNode>> RISCv32CodeGen::build_dag(BasicBlock* bb) {
std::vector<std::unique_ptr<DAGNode>> nodes;
std::map<Value*, DAGNode*> value_to_node;
int vreg_counter = 0;
for (auto& inst : bb->getInstructions()) {
if (auto alloca = dynamic_cast<AllocaInst*>(inst.get())) {
auto node = std::make_unique<DAGNode>(DAGNode::ALLOCA_ADDR);
node->value = alloca;
node->result_vreg = "%" + inst->getName(); // Use IR name (%a(0), %b(0))
value_to_node[alloca] = node.get();
nodes.push_back(std::move(node));
} else if (auto load = dynamic_cast<LoadInst*>(inst.get())) {
auto node = std::make_unique<DAGNode>(DAGNode::LOAD);
node->value = load;
node->result_vreg = "%" + inst->getName(); // Use IR name (%0, %1)
auto pointer = load->getPointer();
if (value_to_node.count(pointer)) {
node->operands.push_back(value_to_node[pointer]);
value_to_node[pointer]->users.push_back(node.get());
}
value_to_node[load] = node.get();
nodes.push_back(std::move(node));
} else if (auto store = dynamic_cast<StoreInst*>(inst.get())) {
auto node = std::make_unique<DAGNode>(DAGNode::STORE);
node->value = store;
auto value_operand = store->getValue();
auto pointer = store->getPointer();
if (value_to_node.count(value_operand)) {
node->operands.push_back(value_to_node[value_operand]);
value_to_node[value_operand]->users.push_back(node.get());
} else if (auto const_val = dynamic_cast<ConstantValue*>(value_operand)) {
auto const_node = std::make_unique<DAGNode>(DAGNode::CONSTANT);
const_node->value = const_val;
const_node->result_vreg = "%" + std::to_string(vreg_counter++); // Use simple %N for constants
value_to_node[value_operand] = const_node.get();
node->operands.push_back(const_node.get());
const_node->users.push_back(node.get());
nodes.push_back(std::move(const_node));
}
if (value_to_node.count(pointer)) {
node->operands.push_back(value_to_node[pointer]);
value_to_node[pointer]->users.push_back(node.get());
}
nodes.push_back(std::move(node));
} else if (auto binary = dynamic_cast<BinaryInst*>(inst.get())) {
auto node = std::make_unique<DAGNode>(DAGNode::BINARY);
node->value = binary;
node->result_vreg = "%" + inst->getName(); // Use IR name (%2)
for (auto operand : binary->getOperands()) {
auto op_value = operand->getValue();
if (value_to_node.count(op_value)) {
node->operands.push_back(value_to_node[op_value]);
value_to_node[op_value]->users.push_back(node.get());
} else if (auto const_val = dynamic_cast<ConstantValue*>(op_value)) {
auto const_node = std::make_unique<DAGNode>(DAGNode::CONSTANT);
const_node->value = const_val;
const_node->result_vreg = "%" + std::to_string(vreg_counter++);
value_to_node[op_value] = const_node.get();
node->operands.push_back(const_node.get());
const_node->users.push_back(node.get());
nodes.push_back(std::move(const_node));
}
}
value_to_node[binary] = node.get();
nodes.push_back(std::move(node));
} else if (auto ret = dynamic_cast<ReturnInst*>(inst.get())) {
auto node = std::make_unique<DAGNode>(DAGNode::RETURN);
node->value = ret;
if (ret->hasReturnValue()) {
auto value_operand = ret->getReturnValue();
if (value_to_node.count(value_operand)) {
node->operands.push_back(value_to_node[value_operand]);
value_to_node[value_operand]->users.push_back(node.get());
} else if (auto const_val = dynamic_cast<ConstantValue*>(value_operand)) {
auto const_node = std::make_unique<DAGNode>(DAGNode::CONSTANT);
const_node->value = const_val;
const_node->result_vreg = "%" + std::to_string(vreg_counter++);
value_to_node[value_operand] = const_node.get();
node->operands.push_back(const_node.get());
const_node->users.push_back(node.get());
nodes.push_back(std::move(const_node));
}
}
nodes.push_back(std::move(node));
} else if (auto cond_br = dynamic_cast<CondBrInst*>(inst.get())) {
auto node = std::make_unique<DAGNode>(DAGNode::BRANCH);
node->value = cond_br;
auto condition = cond_br->getCondition();
if (value_to_node.count(condition)) {
node->operands.push_back(value_to_node[condition]);
value_to_node[condition]->users.push_back(node.get());
} else if (auto const_val = dynamic_cast<ConstantValue*>(condition)) {
auto const_node = std::make_unique<DAGNode>(DAGNode::CONSTANT);
const_node->value = const_val;
const_node->result_vreg = "%" + std::to_string(vreg_counter++);
value_to_node[condition] = const_node.get();
node->operands.push_back(const_node.get());
const_node->users.push_back(node.get());
nodes.push_back(std::move(const_node));
}
nodes.push_back(std::move(node));
} else if (auto uncond_br = dynamic_cast<UncondBrInst*>(inst.get())) {
auto node = std::make_unique<DAGNode>(DAGNode::BRANCH);
node->value = uncond_br;
nodes.push_back(std::move(node));
}
}
return nodes;
}
void RISCv32CodeGen::select_instructions(DAGNode* node, const RegAllocResult& alloc) {
if (node->inst.empty()) {
switch (node->kind) {
case DAGNode::CONSTANT: {
auto const_val = dynamic_cast<ConstantValue*>(node->value);
if (const_val->isInt()) {
node->inst = "li " + node->result_vreg + ", " + std::to_string(const_val->getInt());
} else {
node->inst = "# float constant not implemented";
}
break;
}
case DAGNode::LOAD: {
auto load = dynamic_cast<LoadInst*>(node->value);
auto pointer = load->getPointer();
if (auto alloca = dynamic_cast<AllocaInst*>(pointer)) {
if (alloc.stack_map.count(alloca)) {
node->inst = "lw " + node->result_vreg + ", " + std::to_string(alloc.stack_map.at(alloca)) + "(sp)";
}
} else if (auto global = dynamic_cast<GlobalValue*>(pointer)) {
node->inst = "lw " + node->result_vreg + ", " + global->getName() + "(gp)";
}
break;
}
case DAGNode::STORE: {
auto store = dynamic_cast<StoreInst*>(node->value);
auto pointer = store->getPointer();
auto value_vreg = node->operands[0]->result_vreg;
if (auto alloca = dynamic_cast<AllocaInst*>(pointer)) {
if (alloc.stack_map.count(alloca)) {
node->inst = "sw " + value_vreg + ", " + std::to_string(alloc.stack_map.at(alloca)) + "(sp)";
}
} else if (auto global = dynamic_cast<GlobalValue*>(pointer)) {
node->inst = "sw " + value_vreg + ", " + global->getName() + "(gp)";
}
break;
}
case DAGNode::BINARY: {
auto binary = dynamic_cast<BinaryInst*>(node->value);
auto lhs_vreg = node->operands[0]->result_vreg;
auto rhs_vreg = node->operands[1]->result_vreg;
std::string op;
switch (binary->getKind()) {
case Instruction::kAdd: op = "add"; break;
case Instruction::kSub: op = "sub"; break;
case Instruction::kMul: op = "mul"; break;
case Instruction::kDiv: op = "div"; break;
case Instruction::kICmpEQ: op = "seq"; break;
case Instruction::kICmpNE: op = "sne"; break;
case Instruction::kICmpLT: op = "slt"; break;
case Instruction::kICmpGT: op = "sgt"; break;
case Instruction::kICmpLE: op = "sle"; break;
case Instruction::kICmpGE: op = "sge"; break;
default: op = "# unknown"; break;
}
node->inst = op + " " + node->result_vreg + ", " + lhs_vreg + ", " + rhs_vreg;
break;
}
case DAGNode::RETURN: {
auto ret = dynamic_cast<ReturnInst*>(node->value);
if (ret->hasReturnValue()) {
auto value_vreg = node->operands[0]->result_vreg;
node->inst = "mv a0, " + value_vreg;
} else {
node->inst = "ret";
}
break;
}
case DAGNode::BRANCH: {
if (auto cond_br = dynamic_cast<CondBrInst*>(node->value)) {
auto condition_vreg = node->operands[0]->result_vreg;
auto then_block = cond_br->getThenBlock();
auto else_block = cond_br->getElseBlock();
int then_idx = 0, else_idx = 0;
int idx = 0;
for (auto& bb : cond_br->getFunction()->getBasicBlocks()) {
if (bb.get() == then_block) then_idx = idx;
if (bb.get() == else_block) else_idx = idx;
idx++;
}
node->inst = "bne " + condition_vreg + ", zero, .L" + std::to_string(then_idx) + "\n j .L" + std::to_string(else_idx);
} else if (auto uncond_br = dynamic_cast<UncondBrInst*>(node->value)) {
auto target_block = uncond_br->getBlock();
int target_idx = 0;
int idx = 0;
for (auto& bb : uncond_br->getFunction()->getBasicBlocks()) {
if (bb.get() == target_block) target_idx = idx;
idx++;
}
node->inst = "j .L" + std::to_string(target_idx);
}
break;
}
default:
node->inst = "# unimplemented";
break;
}
}
}
void RISCv32CodeGen::emit_instructions(DAGNode* node, std::stringstream& ss, const RegAllocResult& alloc, std::set<DAGNode*>& emitted_nodes) {
if (emitted_nodes.count(node)) return;
for (auto operand : node->operands) {
emit_instructions(operand, ss, alloc, emitted_nodes);
}
if (!node->inst.empty() && node->inst != "# unimplemented" && node->inst.find("# alloca") == std::string::npos) {
std::string inst = node->inst;
std::vector<std::pair<std::string, std::string>> replacements;
// Collect replacements for result and operand virtual registers
if (node->result_vreg != "" && node->kind != DAGNode::ALLOCA_ADDR) {
if (alloc.vreg_to_preg.count(node->result_vreg)) {
replacements.emplace_back(node->result_vreg, get_preg_str(alloc.vreg_to_preg.at(node->result_vreg)));
} else if (alloc.spill_map.count(node->result_vreg)) {
auto temp_reg = PhysicalReg::T0;
replacements.emplace_back(node->result_vreg, get_preg_str(temp_reg));
inst = inst.substr(0, inst.find('\n')); // Handle multi-line instructions
ss << " " << inst << "\n";
ss << " sw " << get_preg_str(temp_reg) << ", " << alloc.spill_map.at(node->result_vreg) << "(sp)\n";
emitted_nodes.insert(node);
return;
} else {
ss << "# Error: Virtual register " << node->result_vreg << " not allocated (kind: " << node->getNodeKindString() << ")\n";
}
}
for (auto operand : node->operands) {
if (operand->result_vreg != "" && operand->kind != DAGNode::ALLOCA_ADDR) {
if (alloc.vreg_to_preg.count(operand->result_vreg)) {
replacements.emplace_back(operand->result_vreg, get_preg_str(alloc.vreg_to_preg.at(operand->result_vreg)));
} else if (alloc.spill_map.count(operand->result_vreg)) {
auto temp_reg = PhysicalReg::T1;
ss << " lw " << get_preg_str(temp_reg) << ", " << alloc.spill_map.at(operand->result_vreg) << "(sp)\n";
replacements.emplace_back(operand->result_vreg, get_preg_str(temp_reg));
} else {
ss << "# Error: Operand virtual register " << operand->result_vreg << " not allocated (kind: " << operand->getNodeKindString() << ")\n";
}
}
}
// Perform all replacements only if vreg exists in inst
for (const auto& [vreg, preg] : replacements) {
size_t pos = inst.find(vreg);
while (pos != std::string::npos) {
inst.replace(pos, vreg.length(), preg);
pos = inst.find(vreg, pos + preg.length());
}
}
// Emit the instruction
if (node->kind == DAGNode::BRANCH || inst.find('\n') != std::string::npos) {
ss << inst << "\n";
} else if (inst != "ret") {
ss << " " << inst << "\n";
}
}
emitted_nodes.insert(node);
}
std::map<Instruction*, std::set<std::string>> RISCv32CodeGen::liveness_analysis(Function* func) {
std::map<Instruction*, std::set<std::string>> live_in, live_out;
bool changed;
// Build DAG for all basic blocks
std::map<BasicBlock*, std::vector<std::unique_ptr<DAGNode>>> bb_dags;
for (auto& bb : func->getBasicBlocks()) {
bb_dags[bb.get()] = build_dag(bb.get());
}
// Initialize live_in and live_out
for (auto& bb : func->getBasicBlocks()) {
for (auto& inst : bb->getInstructions()) {
live_in[inst.get()];
live_out[inst.get()];
}
}
do {
changed = false;
for (auto& bb : func->getBasicBlocks()) {
// Reverse iterate for backward analysis
for (auto it = bb->getInstructions().rbegin(); it != bb->getInstructions().rend(); ++it) {
auto inst = it->get();
std::set<std::string> new_live_in, new_live_out;
// live_out = union of live_in of successors
for (auto succ : bb->getSuccessors()) {
if (!succ->getInstructions().empty()) {
auto succ_inst = succ->getInstructions().front().get();
new_live_out.insert(live_in[succ_inst].begin(), live_in[succ_inst].end());
}
}
// Collect def and use
std::set<std::string> def, use;
// IR instruction def
if (inst->getName() != "" && !dynamic_cast<AllocaInst*>(inst)) {
def.insert("%" + inst->getName());
}
// IR instruction use
for (auto operand : inst->getOperands()) {
auto value = operand->getValue();
if (auto op_inst = dynamic_cast<Instruction*>(value)) {
if (op_inst->getName() != "" && !dynamic_cast<AllocaInst*>(op_inst)) {
use.insert("%" + op_inst->getName());
}
}
}
// DAG node def and use
for (auto& node : bb_dags[bb.get()]) {
if (node->value == inst && node->kind != DAGNode::ALLOCA_ADDR) {
if (node->result_vreg != "") {
def.insert(node->result_vreg);
}
for (auto operand : node->operands) {
if (operand->result_vreg != "" && operand->kind != DAGNode::ALLOCA_ADDR) {
use.insert(operand->result_vreg);
}
}
}
// Constants
if (node->kind == DAGNode::CONSTANT) {
for (auto user : node->users) {
if (user->value == inst) {
use.insert(node->result_vreg);
}
}
}
}
// live_in = use U (live_out - def)
std::set<std::string> live_out_minus_def;
std::set_difference(new_live_out.begin(), new_live_out.end(),
def.begin(), def.end(),
std::inserter(live_out_minus_def, live_out_minus_def.begin()));
new_live_in.insert(use.begin(), use.end());
new_live_in.insert(live_out_minus_def.begin(), live_out_minus_def.end());
// Debug
std::cerr << "Instruction: " << (inst->getName() != "" ? "%" + inst->getName() : "none") << "\n";
std::cerr << " def: "; for (const auto& d : def) std::cerr << d << " "; std::cerr << "\n";
std::cerr << " use: "; for (const auto& u : use) std::cerr << u << " "; std::cerr << "\n";
std::cerr << " live_in: "; for (const auto& v : new_live_in) std::cerr << v << " "; std::cerr << "\n";
std::cerr << " live_out: "; for (const auto& v : new_live_out) std::cerr << v << " "; std::cerr << "\n";
if (live_in[inst] != new_live_in || live_out[inst] != new_live_out) {
live_in[inst] = new_live_in;
live_out[inst] = new_live_out;
changed = true;
}
}
}
} while (changed);
// Debug live_out
for (const auto& [inst, live_vars] : live_out) {
std::cerr << "Instruction: " << (inst->getName() != "" ? "%" + inst->getName() : "none") << " live_out: ";
for (const auto& var : live_vars) {
std::cerr << var << " ";
}
std::cerr << "\n";
}
return live_out;
}
std::map<std::string, std::set<std::string>> RISCv32CodeGen::build_interference_graph(
const std::map<Instruction*, std::set<std::string>>& live_sets) {
std::map<std::string, std::set<std::string>> interference_graph;
for (const auto& [inst, live_vars] : live_sets) {
std::string def_var = inst->getName() != "" && !dynamic_cast<AllocaInst*>(inst) ? "%" + inst->getName() : "";
if (def_var != "") {
interference_graph[def_var]; // Initialize
for (const auto& live_var : live_vars) {
if (live_var != def_var && live_var.find("%a(") != 0 && live_var.find("%b(") != 0) {
interference_graph[def_var].insert(live_var);
interference_graph[live_var].insert(def_var);
}
}
}
// Initialize all live variables
for (const auto& live_var : live_vars) {
if (live_var.find("%a(") != 0 && live_var.find("%b(") != 0) {
interference_graph[live_var];
}
}
// Live variables interfere with each other
for (auto it1 = live_vars.begin(); it1 != live_vars.end(); ++it1) {
if (it1->find("%a(") == 0 || it1->find("%b(") == 0) continue;
for (auto it2 = std::next(it1); it2 != live_vars.end(); ++it2) {
if (it2->find("%a(") == 0 || it2->find("%b(") == 0) continue;
interference_graph[*it1].insert(*it2);
interference_graph[*it2].insert(*it1);
}
}
}
// Debug
for (const auto& [vreg, neighbors] : interference_graph) {
std::cerr << "Vreg " << vreg << " interferes with: ";
for (const auto& neighbor : neighbors) {
std::cerr << neighbor << " ";
}
std::cerr << "\n";
}
return interference_graph;
}
RISCv32CodeGen::RegAllocResult RISCv32CodeGen::color_graph(Function* func, const std::map<std::string, std::set<std::string>>& interference_graph) {
RegAllocResult alloc;
std::map<std::string, std::set<std::string>> ig = interference_graph;
std::stack<std::string> stack;
std::set<std::string> spilled;
// Available physical registers
std::vector<PhysicalReg> available_regs = {
PhysicalReg::T0, PhysicalReg::T1, PhysicalReg::T2, PhysicalReg::T3, PhysicalReg::T4, PhysicalReg::T5, PhysicalReg::T6,
PhysicalReg::S0, PhysicalReg::S1, PhysicalReg::S2, PhysicalReg::S3, PhysicalReg::S4, PhysicalReg::S5,
PhysicalReg::S6, PhysicalReg::S7, PhysicalReg::S8, PhysicalReg::S9, PhysicalReg::S10, PhysicalReg::S11
};
// Simplify: Push nodes with degree < number of registers
while (!ig.empty()) {
bool simplified = false;
for (auto it = ig.begin(); it != ig.end();) {
if (it->second.size() < available_regs.size()) {
stack.push(it->first);
for (auto& [vreg, neighbors] : ig) {
neighbors.erase(it->first);
}
it = ig.erase(it);
simplified = true;
} else {
++it;
}
}
if (!simplified) {
// Spill the node with the highest degree
auto max_it = ig.begin();
for (auto it = ig.begin(); it != ig.end(); ++it) {
if (it->second.size() > max_it->second.size()) {
max_it = it;
}
}
spilled.insert(max_it->first);
for (auto& [vreg, neighbors] : ig) {
neighbors.erase(max_it->first);
}
ig.erase(max_it);
}
}
// Assign colors (physical registers)
while (!stack.empty()) {
auto vreg = stack.top();
stack.pop();
std::set<PhysicalReg> used_colors;
if (interference_graph.count(vreg)) {
for (const auto& neighbor : interference_graph.at(vreg)) {
if (alloc.vreg_to_preg.count(neighbor)) {
used_colors.insert(alloc.vreg_to_preg.at(neighbor));
}
}
}
bool assigned = false;
for (auto preg : available_regs) {
if (!used_colors.count(preg)) {
alloc.vreg_to_preg[vreg] = preg;
assigned = true;
break;
}
}
if (!assigned) {
spilled.insert(vreg);
}
}
// Allocate stack space for AllocaInst and spilled virtual registers
int stack_offset = 0;
for (auto& bb : func->getBasicBlocks()) {
for (auto& inst : bb->getInstructions()) {
if (auto alloca = dynamic_cast<AllocaInst*>(inst.get())) {
alloc.stack_map[alloca] = stack_offset;
stack_offset += 4; // 4 bytes per variable
}
}
}
for (const auto& vreg : spilled) {
alloc.spill_map[vreg] = stack_offset;
stack_offset += 4;
}
alloc.stack_size = stack_offset + 8; // Extra space for ra and callee-saved
// Debug output to verify register allocation
for (const auto& [vreg, preg] : alloc.vreg_to_preg) {
std::cerr << "Vreg " << vreg << " assigned to " << get_preg_str(preg) << "\n";
}
for (const auto& vreg : spilled) {
std::cerr << "Vreg " << vreg << " spilled to stack offset " << alloc.spill_map.at(vreg) << "\n";
}
return alloc;
}
RISCv32CodeGen::PhysicalReg RISCv32CodeGen::get_preg_or_temp(const std::string& vreg, const RegAllocResult& alloc) const {
if (alloc.vreg_to_preg.count(vreg)) {
return alloc.vreg_to_preg.at(vreg);
}
return PhysicalReg::T0; // Fallback for spilled registers, handled in emit_instructions
}
} // namespace sysy

103
src/RISCv32Backend.h Normal file
View File

@@ -0,0 +1,103 @@
#ifndef RISCV32_BACKEND_H
#define RISCV32_BACKEND_H
#include "IR.h"
#include <string>
#include <vector>
#include <map>
#include <set>
#include <memory>
#include <iostream>
#include <functional>
#include <stack>
namespace sysy {
class RISCv32CodeGen {
public:
enum class PhysicalReg {
ZERO, RA, SP, GP, TP, T0, T1, T2, S0, S1, A0, A1, A2, A3, A4, A5, A6, A7, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, T3, T4, T5, T6
};
struct DAGNode {
enum NodeKind { CONSTANT, LOAD, STORE, BINARY, CALL, RETURN, BRANCH, ALLOCA_ADDR };
NodeKind kind;
Value* value = nullptr;
std::string inst;
std::string result_vreg;
std::vector<DAGNode*> operands;
std::vector<DAGNode*> users;
DAGNode(NodeKind k) : kind(k) {}
std::string getNodeKindString() const {
switch (kind) {
case CONSTANT: return "CONSTANT";
case LOAD: return "LOAD";
case STORE: return "STORE";
case BINARY: return "BINARY";
case CALL: return "CALL";
case RETURN: return "RETURN";
case BRANCH: return "BRANCH";
case ALLOCA_ADDR: return "ALLOCA_ADDR";
default: return "UNKNOWN";
}
}
};
struct RegAllocResult {
std::map<std::string, PhysicalReg> vreg_to_preg; // 虚拟寄存器到物理寄存器的映射
std::map<Value*, int> stack_map; // AllocaInst到栈偏移的映射
std::map<std::string, int> spill_map; // 溢出的虚拟寄存器到栈偏移的映射
int stack_size = 0; // 总栈帧大小
};
RISCv32CodeGen(Module* mod) : module(mod) {}
std::string code_gen();
std::string module_gen();
std::string function_gen(Function* func);
std::string basicBlock_gen(BasicBlock* bb, const RegAllocResult& alloc, int block_idx);
std::vector<std::unique_ptr<DAGNode>> build_dag(BasicBlock* bb);
void select_instructions(DAGNode* node, const RegAllocResult& alloc);
void emit_instructions(DAGNode* node, std::stringstream& ss, const RegAllocResult& alloc, std::set<DAGNode*>& emitted_nodes);
std::map<Instruction*, std::set<std::string>> liveness_analysis(Function* func);
std::map<std::string, std::set<std::string>> build_interference_graph(
const std::map<Instruction*, std::set<std::string>>& live_sets);
RegAllocResult color_graph(Function* func, const std::map<std::string, std::set<std::string>>& interference_graph);
private:
Module* module;
std::map<PhysicalReg, std::string> preg_to_str = {
{PhysicalReg::ZERO, "zero"}, {PhysicalReg::RA, "ra"}, {PhysicalReg::SP, "sp"},
{PhysicalReg::GP, "gp"}, {PhysicalReg::TP, "tp"}, {PhysicalReg::T0, "t0"},
{PhysicalReg::T1, "t1"}, {PhysicalReg::T2, "t2"}, {PhysicalReg::S0, "s0"},
{PhysicalReg::S1, "s1"}, {PhysicalReg::A0, "a0"}, {PhysicalReg::A1, "a1"},
{PhysicalReg::A2, "a2"}, {PhysicalReg::A3, "a3"}, {PhysicalReg::A4, "a4"},
{PhysicalReg::A5, "a5"}, {PhysicalReg::A6, "a6"}, {PhysicalReg::A7, "a7"},
{PhysicalReg::S2, "s2"}, {PhysicalReg::S3, "s3"}, {PhysicalReg::S4, "s4"},
{PhysicalReg::S5, "s5"}, {PhysicalReg::S6, "s6"}, {PhysicalReg::S7, "s7"},
{PhysicalReg::S8, "s8"}, {PhysicalReg::S9, "s9"}, {PhysicalReg::S10, "s10"},
{PhysicalReg::S11, "s11"}, {PhysicalReg::T3, "t3"}, {PhysicalReg::T4, "t4"},
{PhysicalReg::T5, "t5"}, {PhysicalReg::T6, "t6"}
};
std::vector<PhysicalReg> caller_saved = {
PhysicalReg::T0, PhysicalReg::T1, PhysicalReg::T2, PhysicalReg::T3, PhysicalReg::T4, PhysicalReg::T5, PhysicalReg::T6,
PhysicalReg::A0, PhysicalReg::A1, PhysicalReg::A2, PhysicalReg::A3, PhysicalReg::A4, PhysicalReg::A5, PhysicalReg::A6, PhysicalReg::A7
};
std::vector<PhysicalReg> callee_saved = {
PhysicalReg::S0, PhysicalReg::S1, PhysicalReg::S2, PhysicalReg::S3, PhysicalReg::S4, PhysicalReg::S5,
PhysicalReg::S6, PhysicalReg::S7, PhysicalReg::S8, PhysicalReg::S9, PhysicalReg::S10, PhysicalReg::S11
};
std::string get_preg_str(PhysicalReg preg) const {
return preg_to_str.at(preg);
}
PhysicalReg get_preg_or_temp(const std::string& vreg, const RegAllocResult& alloc) const;
};
} // namespace sysy
#endif // RISCV32_BACKEND_H

View File

@@ -73,10 +73,12 @@ std::any SysYIRGenerator::visitGlobalVarDecl(SysYParser::GlobalVarDeclContext *c
} }
} }
ValueCounter values = {};
if (varDef->initVal() != nullptr) {
ArrayValueTree* root = std::any_cast<ArrayValueTree *>(varDef->initVal()->accept(this)); ArrayValueTree* root = std::any_cast<ArrayValueTree *>(varDef->initVal()->accept(this));
ValueCounter values;
Utils::tree2Array(type, root, dims, dims.size(), values, &builder); Utils::tree2Array(type, root, dims, dims.size(), values, &builder);
delete root; delete root;
}
// 创建全局变量,并更新符号表 // 创建全局变量,并更新符号表
module->createGlobalValue(name, Type::getPointerType(type), dims, values); module->createGlobalValue(name, Type::getPointerType(type), dims, values);
} }
@@ -456,7 +458,7 @@ std::any SysYIRGenerator::visitReturnStmt(SysYParser::ReturnStmtContext *ctx) {
returnValue = std::any_cast<Value *>(visitExp(ctx->exp())); returnValue = std::any_cast<Value *>(visitExp(ctx->exp()));
} }
Type* funcType = builder.getBasicBlock()->getParent()->getType(); Type* funcType = builder.getBasicBlock()->getParent()->getReturnType();
if (funcType!= returnValue->getType() && returnValue != nullptr) { if (funcType!= returnValue->getType() && returnValue != nullptr) {
ConstantValue * constValue = dynamic_cast<ConstantValue *>(returnValue); ConstantValue * constValue = dynamic_cast<ConstantValue *>(returnValue);
if (constValue != nullptr) { if (constValue != nullptr) {

489
src/SysYIROptPre.cpp Normal file
View File

@@ -0,0 +1,489 @@
/**
* @file: Sysyoptimization.cpp
* @brief CFG优化
* @Author : Ixeux email:you@domain.com
* @Version : 1.0
* @Creat Date : 2024-08-10
*
*/
#include "SysYIROptPre.h"
#include <cassert>
#include <list>
#include <map>
#include <memory>
#include <string>
#include "IR.h"
#include "IRBuilder.h"
namespace sysy {
/**
* use删除operand,以免扰乱后续分析
* instr: 要删除的指令
*/
void SysYOptPre::usedelete(Instruction *instr) {
for (auto &use : instr->getOperands()) {
Value* val = use->getValue();
// std::cout << delete << val->getName() << std::endl;
val->removeUse(use);
}
}
// 删除br后的无用指令
void SysYOptPre::SysYDelInstAfterBr() {
auto &functions = pModule->getFunctions();
for (auto &function : functions) {
auto basicBlocks = function.second->getBasicBlocks();
for (auto &basicBlock : basicBlocks) {
bool Branch = false;
auto &instructions = basicBlock->getInstructions();
auto Branchiter = instructions.end();
for (auto iter = instructions.begin(); iter != instructions.end(); ++iter) {
if (Branch)
usedelete(iter->get());
else if ((*iter)->isTerminator()){
Branch = true;
Branchiter = iter;
}
}
if (Branchiter != instructions.end()) ++Branchiter;
while (Branchiter != instructions.end())
Branchiter = instructions.erase(Branchiter);
if (Branch) { // 更新前驱后继关系
auto thelastinstinst = basicBlock->getInstructions().end();
--thelastinstinst;
auto &Successors = basicBlock->getSuccessors();
for (auto iterSucc = Successors.begin(); iterSucc != Successors.end();) {
(*iterSucc)->removePredecessor(basicBlock.get());
basicBlock->removeSuccessor(*iterSucc);
}
if (thelastinstinst->get()->isUnconditional()) {
BasicBlock* branchBlock = dynamic_cast<BasicBlock *>(thelastinstinst->get()->getOperand(0));
basicBlock->addSuccessor(branchBlock);
branchBlock->addPredecessor(basicBlock.get());
} else if (thelastinstinst->get()->isConditional()) {
BasicBlock* thenBlock = dynamic_cast<BasicBlock *>(thelastinstinst->get()->getOperand(1));
BasicBlock* elseBlock = dynamic_cast<BasicBlock *>(thelastinstinst->get()->getOperand(2));
basicBlock->addSuccessor(thenBlock);
basicBlock->addSuccessor(elseBlock);
thenBlock->addPredecessor(basicBlock.get());
elseBlock->addPredecessor(basicBlock.get());
}
}
}
}
}
void SysYOptPre::SysYBlockMerge() {
auto &functions = pModule->getFunctions(); //std::map<std::string, std::unique_ptr<Function>>
for (auto &function : functions) {
// auto basicBlocks = function.second->getBasicBlocks();
auto &func = function.second;
for (auto blockiter = func->getBasicBlocks().begin();
blockiter != func->getBasicBlocks().end();) {
if (blockiter->get()->getNumSuccessors() == 1) {
// 如果当前块只有一个后继块
// 且后继块只有一个前驱块
// 则将当前块和后继块合并
if (((blockiter->get())->getSuccessors()[0])->getNumPredecessors() == 1) {
// std::cout << "merge block: " << blockiter->get()->getName() << std::endl;
BasicBlock* block = blockiter->get();
BasicBlock* nextBlock = blockiter->get()->getSuccessors()[0];
auto nextarguments = nextBlock->getArguments();
// 删除br指令
if (block->getNumInstructions() != 0) {
auto thelastinstinst = block->end();
(--thelastinstinst);
if (thelastinstinst->get()->isUnconditional()) {
usedelete(thelastinstinst->get());
block->getInstructions().erase(thelastinstinst);
} else if (thelastinstinst->get()->isConditional()) {
// 如果是条件分支,判断条件是否相同,主要优化相同布尔表达式
if (thelastinstinst->get()->getOperand(1)->getName() == thelastinstinst->get()->getOperand(1)->getName()) {
usedelete(thelastinstinst->get());
block->getInstructions().erase(thelastinstinst);
}
}
}
// 将后继块的指令移动到当前块
// 并将后继块的父指针改为当前块
for (auto institer = nextBlock->begin(); institer != nextBlock->end();) {
institer->get()->setParent(block);
block->getInstructions().emplace_back(institer->release());
institer = nextBlock->getInstructions().erase(institer);
}
// 合并参数
// TODO是否需要去重?
for (auto &argm : nextarguments) {
argm->setParent(block);
block->insertArgument(argm);
}
// 更新前驱后继关系,类似树节点操作
block->removeSuccessor(nextBlock);
nextBlock->removePredecessor(block);
std::list<BasicBlock *> succshoulddel;
for (auto &succ : nextBlock->getSuccessors()) {
block->addSuccessor(succ);
succ->replacePredecessor(nextBlock, block);
succshoulddel.push_back(succ);
}
for (auto del : succshoulddel) {
nextBlock->removeSuccessor(del);
}
func->removeBasicBlock(nextBlock);
} else {
blockiter++;
}
} else {
blockiter++;
}
}
}
}
// 删除无前驱块兼容SSA后的处理
void SysYOptPre::SysYDelNoPreBLock() {
auto &functions = pModule->getFunctions(); // std::map<std::string, std::unique_ptr<sysy::Function>>
for (auto &function : functions) {
auto &func = function.second;
for (auto &block : func->getBasicBlocks()) {
block->setreachableFalse();
}
// 对函数基本块做一个拓扑排序,排查不可达基本块
auto entryBlock = func->getEntryBlock();
entryBlock->setreachableTrue();
std::queue<BasicBlock *> blockqueue;
blockqueue.push(entryBlock);
while (!blockqueue.empty()) {
auto block = blockqueue.front();
blockqueue.pop();
for (auto &succ : block->getSuccessors()) {
if (!succ->getreachable()) {
succ->setreachableTrue();
blockqueue.push(succ);
}
}
}
// 删除不可达基本块指令
for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();blockIter++) {
if (!blockIter->get()->getreachable())
for (auto &iterInst : blockIter->get()->getInstructions())
usedelete(iterInst.get());
}
for (auto blockIter = func->getBasicBlocks().begin(); blockIter != func->getBasicBlocks().end();) {
if (!blockIter->get()->getreachable()) {
for (auto succblock : blockIter->get()->getSuccessors()) {
int indexphi = 1;
for (auto pred : succblock->getPredecessors()) {
if (pred == blockIter->get()) {
break;
}
indexphi++;
}
for (auto &phiinst : succblock->getInstructions()) {
if (phiinst->getKind() != Instruction::kPhi) {
break;
}
phiinst->removeOperand(indexphi);
}
}
// 删除不可达基本块,注意迭代器不可达问题
func->removeBasicBlock((blockIter++)->get());
} else {
blockIter++;
}
}
}
}
void SysYOptPre::SysYDelEmptyBlock() {
auto &functions = pModule->getFunctions();
for (auto &function : functions) {
// 收集不可达基本块
// 这里的不可达基本块是指没有实际指令的基本块
// 当一个基本块没有实际指令例如只有phi指令和一个uncondbr指令时也会被视作不可达
auto basicBlocks = function.second->getBasicBlocks();
std::map<sysy::BasicBlock *, BasicBlock *> EmptyBlocks;
// 空块儿和后继的基本块的映射
for (auto &basicBlock : basicBlocks) {
if (basicBlock->getNumInstructions() == 0) {
if (basicBlock->getNumSuccessors() == 1) {
EmptyBlocks[basicBlock.get()] = basicBlock->getSuccessors().front();
}
}
else{
// 如果只有phi指令和一个uncondbr。(phi)*(uncondbr)?
// 判断除了最后一个指令之外是不是只有phi指令
bool onlyPhi = true;
for (auto &inst : basicBlock->getInstructions()) {
if (!inst->isPhi() && !inst->isUnconditional()) {
onlyPhi = false;
break;
}
}
if(onlyPhi)
EmptyBlocks[basicBlock.get()] = basicBlock->getSuccessors().front();
}
}
// 更新基本块信息,增加必要指令
for (auto &basicBlock : basicBlocks) {
// 把空块转换成只有跳转指令的不可达块
if (distance(basicBlock->begin(), basicBlock->end()) == 0) {
if (basicBlock->getNumSuccessors() == 0) {
continue;
}
if (basicBlock->getNumSuccessors() > 1) {
assert("");
}
pBuilder->setPosition(basicBlock.get(), basicBlock->end());
pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {});
continue;
}
auto thelastinst = basicBlock->getInstructions().end();
--thelastinst;
// 根据br指令传递的后继块信息跳过空块链
if (thelastinst->get()->isUnconditional()) {
BasicBlock* OldBrBlock = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0));
BasicBlock *thelastBlockOld = nullptr;
// 如果空块链表为多个块
while (EmptyBlocks.find(dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))) !=
EmptyBlocks.end()) {
thelastBlockOld = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0));
thelastinst->get()->replaceOperand(0, EmptyBlocks[thelastBlockOld]);
}
basicBlock->removeSuccessor(OldBrBlock);
OldBrBlock->removePredecessor(basicBlock.get());
basicBlock->addSuccessor(dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0)));
dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get());
if (thelastBlockOld != nullptr) {
int indexphi = 0;
for (auto &pred : dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))->getPredecessors()) {
if (pred == thelastBlockOld) {
break;
}
indexphi++;
}
// 更新phi指令的操作数
// 移除thelastBlockOld对应的phi操作数
for (auto &InstInNew : dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))->getInstructions()) {
if (InstInNew->isPhi()) {
dynamic_cast<PhiInst *>(InstInNew.get())->removeOperand(indexphi + 1);
} else {
break;
}
}
}
} else if (thelastinst->get()->getKind() == Instruction::kCondBr) {
auto OldThenBlock = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1));
auto OldElseBlock = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2));
BasicBlock *thelastBlockOld = nullptr;
while (EmptyBlocks.find(dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1))) !=
EmptyBlocks.end()) {
thelastBlockOld = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1));
thelastinst->get()->replaceOperand(
1, EmptyBlocks[dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1))]);
}
basicBlock->removeSuccessor(OldThenBlock);
OldThenBlock->removePredecessor(basicBlock.get());
// 处理 then 和 else 分支合并的情况
if (dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1)) ==
dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2))) {
auto thebrBlock = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1));
usedelete(thelastinst->get());
thelastinst = basicBlock->getInstructions().erase(thelastinst);
pBuilder->setPosition(basicBlock.get(), basicBlock->end());
pBuilder->createUncondBrInst(thebrBlock, {});
continue;
}
basicBlock->addSuccessor(dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1)));
dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1))->addPredecessor(basicBlock.get());
// auto indexInNew = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))->getPredecessors().
if (thelastBlockOld != nullptr) {
int indexphi = 0;
for (auto &pred : dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1))->getPredecessors()) {
if (pred == thelastBlockOld) {
break;
}
indexphi++;
}
for (auto &InstInNew : dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1))->getInstructions()) {
if (InstInNew->isPhi()) {
dynamic_cast<PhiInst *>(InstInNew.get())->removeOperand(indexphi + 1);
} else {
break;
}
}
}
thelastBlockOld = nullptr;
while (EmptyBlocks.find(dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2))) !=
EmptyBlocks.end()) {
thelastBlockOld = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2));
thelastinst->get()->replaceOperand(
2, EmptyBlocks[dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2))]);
}
basicBlock->removeSuccessor(OldElseBlock);
OldElseBlock->removePredecessor(basicBlock.get());
// 处理 then 和 else 分支合并的情况
if (dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1)) ==
dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2))) {
auto thebrBlock = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(1));
usedelete(thelastinst->get());
thelastinst = basicBlock->getInstructions().erase(thelastinst);
pBuilder->setPosition(basicBlock.get(), basicBlock->end());
pBuilder->createUncondBrInst(thebrBlock, {});
continue;
}
basicBlock->addSuccessor(dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2)));
dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2))->addPredecessor(basicBlock.get());
if (thelastBlockOld != nullptr) {
int indexphi = 0;
for (auto &pred : dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2))->getPredecessors()) {
if (pred == thelastBlockOld) {
break;
}
indexphi++;
}
for (auto &InstInNew : dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(2))->getInstructions()) {
if (InstInNew->isPhi()) {
dynamic_cast<PhiInst *>(InstInNew.get())->removeOperand(indexphi + 1);
} else {
break;
}
}
}
} else {
if (basicBlock->getNumSuccessors() == 1) {
pBuilder->setPosition(basicBlock.get(), basicBlock->end());
pBuilder->createUncondBrInst(basicBlock->getSuccessors()[0], {});
auto thelastinst = basicBlock->getInstructions().end();
(--thelastinst);
auto OldBrBlock = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0));
sysy::BasicBlock *thelastBlockOld = nullptr;
while (EmptyBlocks.find(dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))) !=
EmptyBlocks.end()) {
thelastBlockOld = dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0));
thelastinst->get()->replaceOperand(
0, EmptyBlocks[dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))]);
}
basicBlock->removeSuccessor(OldBrBlock);
OldBrBlock->removePredecessor(basicBlock.get());
basicBlock->addSuccessor(dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0)));
dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))->addPredecessor(basicBlock.get());
if (thelastBlockOld != nullptr) {
int indexphi = 0;
for (auto &pred : dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))->getPredecessors()) {
if (pred == thelastBlockOld) {
break;
}
indexphi++;
}
for (auto &InstInNew : dynamic_cast<BasicBlock *>(thelastinst->get()->getOperand(0))->getInstructions()) {
if (InstInNew->isPhi()) {
dynamic_cast<PhiInst *>(InstInNew.get())->removeOperand(indexphi + 1);
} else {
break;
}
}
}
}
}
}
for (auto iter = function.second->getBasicBlocks().begin(); iter != function.second->getBasicBlocks().end();) {
if (EmptyBlocks.find(iter->get()) != EmptyBlocks.end()) {
// EntryBlock跳过
if (iter->get() == function.second->getEntryBlock()) {
++iter;
continue;
}
for (auto &iterInst : iter->get()->getInstructions())
usedelete(iterInst.get());
// 删除不可达基本块的phi指令的操作数
for (auto &succ : iter->get()->getSuccessors()) {
int index = 0;
for (auto &pred : succ->getPredecessors()) {
if (pred == iter->get()) {
break;
}
index++;
}
for (auto &instinsucc : succ->getInstructions()) {
if (instinsucc->isPhi()) {
dynamic_cast<PhiInst *>(instinsucc.get())->removeOperand(index);
} else {
break;
}
}
}
function.second->removeBasicBlock((iter++)->get());
} else {
++iter;
}
}
}
}
// 如果函数没有返回指令,则添加一个默认返回指令(主要解决void函数没有返回指令的问题)
void SysYOptPre::SysYAddReturn() {
auto &functions = pModule->getFunctions();
for (auto &function : functions) {
auto &func = function.second;
auto basicBlocks = func->getBasicBlocks();
for (auto &block : basicBlocks) {
if (block->getNumSuccessors() == 0) {
// 如果基本块没有后继块,则添加一个返回指令
if (block->getNumInstructions() == 0) {
pBuilder->setPosition(block.get(), block->end());
pBuilder->createReturnInst({});
}
auto thelastinst = block->getInstructions().end();
--thelastinst;
if (thelastinst->get()->getKind() != Instruction::kReturn) {
pBuilder->setPosition(block.get(), block->end());
// TODO: 如果int float函数缺少返回值是否需要报错
if (func->getReturnType()->isInt()) {
pBuilder->createReturnInst(ConstantValue::get(0));
} else if (func->getReturnType()->isFloat()) {
pBuilder->createReturnInst(ConstantValue::get(0.0F));
} else {
pBuilder->createReturnInst({});
}
}
}
}
}
}
} // namespace sysy

479
src/SysYIRPrinter.cpp Normal file
View File

@@ -0,0 +1,479 @@
#include "SysYIRPrinter.h"
#include <cassert>
#include <fstream>
#include <iostream>
#include <string>
#include "IR.h"
namespace sysy {
void SysYPrinter::printIR() {
const auto &functions = pModule->getFunctions();
//TODO: Print target datalayout and triple (minimal required by LLVM)
printGlobalVariable();
for (const auto &iter : functions) {
if (iter.second->getName() == "main") {
printFunction(iter.second.get());
break;
}
}
for (const auto &iter : functions) {
if (iter.second->getName() != "main") {
printFunction(iter.second.get());
}
}
}
std::string SysYPrinter::getTypeString(Type *type) {
if (type->isVoid()) {
return "void";
} else if (type->isInt()) {
return "i32";
} else if (type->isFloat()) {
return "float";
} else if (auto ptrType = dynamic_cast<PointerType*>(type)) {
return getTypeString(ptrType->getBaseType()) + "*";
} else if (auto ptrType = dynamic_cast<FunctionType*>(type)) {
return getTypeString(ptrType->getReturnType());
}
assert(false && "Unsupported type");
return "";
}
std::string SysYPrinter::getValueName(Value *value) {
if (auto global = dynamic_cast<GlobalValue*>(value)) {
return "@" + global->getName();
} else if (auto inst = dynamic_cast<Instruction*>(value)) {
return "%" + inst->getName();
} else if (auto constVal = dynamic_cast<ConstantValue*>(value)) {
if (constVal->isFloat()) {
return std::to_string(constVal->getFloat());
}
return std::to_string(constVal->getInt());
} else if (auto constVar = dynamic_cast<ConstantVariable*>(value)) {
return constVar->getName();
}
assert(false && "Unknown value type");
return "";
}
void SysYPrinter::printType(Type *type) {
std::cout << getTypeString(type);
}
void SysYPrinter::printValue(Value *value) {
std::cout << getValueName(value);
}
void SysYPrinter::printGlobalVariable() {
auto &globals = pModule->getGlobals();
for (const auto &global : globals) {
std::cout << "@" << global->getName() << " = global ";
auto baseType = dynamic_cast<PointerType *>(global->getType())->getBaseType();
printType(baseType);
if (global->getNumDims() > 0) {
// Array type
std::cout << " [";
for (unsigned i = 0; i < global->getNumDims(); i++) {
if (i > 0) std::cout << " x ";
std::cout << getValueName(global->getDim(i));
}
std::cout << "]";
}
std::cout << " ";
if (global->getNumDims() > 0) {
// Array initializer
std::cout << "[";
auto values = global->getInitValues();
auto counterValues = values.getValues();
auto counterNumbers = values.getNumbers();
for (size_t i = 0; i < counterNumbers.size(); i++) {
if (i > 0) std::cout << ", ";
if (baseType->isFloat()) {
std::cout << "float " << dynamic_cast<ConstantValue*>(counterValues[i])->getFloat();
} else {
std::cout << "i32 " << dynamic_cast<ConstantValue*>(counterValues[i])->getInt();
}
}
std::cout << "]";
} else {
// Scalar initializer
if (baseType->isFloat()) {
std::cout << "float " << dynamic_cast<ConstantValue*>(global->getByIndex(0))->getFloat();
} else {
std::cout << "i32 " << dynamic_cast<ConstantValue*>(global->getByIndex(0))->getInt();
}
}
std::cout << ", align 4" << std::endl;
}
}
void SysYPrinter::printFunction(Function *function) {
// Function signature
std::cout << "define ";
printType(function->getReturnType());
std::cout << " @" << function->getName() << "(";
auto entryBlock = function->getEntryBlock();
auto &args = entryBlock->getArguments();
for (size_t i = 0; i < args.size(); i++) {
if (i > 0) std::cout << ", ";
printType(args[i]->getType());
std::cout << " %" << args[i]->getName();
}
std::cout << ") {" << std::endl;
// Function body
for (const auto &blockIter : function->getBasicBlocks()) {
// Basic block label
BasicBlock* blockPtr = blockIter.get();
if (blockPtr == function->getEntryBlock()) {
std::cout << "entry:" << std::endl;
} else if (!blockPtr->getName().empty()) {
std::cout << blockPtr->getName() << ":" << std::endl;
}
// Instructions
for (const auto &instIter : blockIter->getInstructions()) {
auto inst = instIter.get();
std::cout << " ";
printInst(inst);
}
}
std::cout << "}" << std::endl << std::endl;
}
void SysYPrinter::printInst(Instruction *pInst) {
using Kind = Instruction::Kind;
switch (pInst->getKind()) {
case Kind::kAdd:
case Kind::kSub:
case Kind::kMul:
case Kind::kDiv:
case Kind::kRem:
case Kind::kFAdd:
case Kind::kFSub:
case Kind::kFMul:
case Kind::kFDiv:
case Kind::kICmpEQ:
case Kind::kICmpNE:
case Kind::kICmpLT:
case Kind::kICmpGT:
case Kind::kICmpLE:
case Kind::kICmpGE:
case Kind::kFCmpEQ:
case Kind::kFCmpNE:
case Kind::kFCmpLT:
case Kind::kFCmpGT:
case Kind::kFCmpLE:
case Kind::kFCmpGE:
case Kind::kAnd:
case Kind::kOr: {
auto binInst = dynamic_cast<BinaryInst *>(pInst);
// Print result variable if exists
if (!binInst->getName().empty()) {
std::cout << "%" << binInst->getName() << " = ";
}
// Operation name
switch (pInst->getKind()) {
case Kind::kAdd: std::cout << "add"; break;
case Kind::kSub: std::cout << "sub"; break;
case Kind::kMul: std::cout << "mul"; break;
case Kind::kDiv: std::cout << "sdiv"; break;
case Kind::kRem: std::cout << "srem"; break;
case Kind::kFAdd: std::cout << "fadd"; break;
case Kind::kFSub: std::cout << "fsub"; break;
case Kind::kFMul: std::cout << "fmul"; break;
case Kind::kFDiv: std::cout << "fdiv"; break;
case Kind::kICmpEQ: std::cout << "icmp eq"; break;
case Kind::kICmpNE: std::cout << "icmp ne"; break;
case Kind::kICmpLT: std::cout << "icmp slt"; break;
case Kind::kICmpGT: std::cout << "icmp sgt"; break;
case Kind::kICmpLE: std::cout << "icmp sle"; break;
case Kind::kICmpGE: std::cout << "icmp sge"; break;
case Kind::kFCmpEQ: std::cout << "fcmp oeq"; break;
case Kind::kFCmpNE: std::cout << "fcmp one"; break;
case Kind::kFCmpLT: std::cout << "fcmp olt"; break;
case Kind::kFCmpGT: std::cout << "fcmp ogt"; break;
case Kind::kFCmpLE: std::cout << "fcmp ole"; break;
case Kind::kFCmpGE: std::cout << "fcmp oge"; break;
case Kind::kAnd: std::cout << "and"; break;
case Kind::kOr: std::cout << "or"; break;
default: break;
}
// Types and operands
std::cout << " ";
printType(binInst->getType());
std::cout << " ";
printValue(binInst->getLhs());
std::cout << ", ";
printValue(binInst->getRhs());
std::cout << std::endl;
} break;
case Kind::kNeg:
case Kind::kNot:
case Kind::kFNeg:
case Kind::kFNot:
case Kind::kFtoI:
case Kind::kBitFtoI:
case Kind::kItoF:
case Kind::kBitItoF: {
auto unyInst = dynamic_cast<UnaryInst *>(pInst);
if (!unyInst->getName().empty()) {
std::cout << "%" << unyInst->getName() << " = ";
}
switch (pInst->getKind()) {
case Kind::kNeg: std::cout << "sub "; break;
case Kind::kNot: std::cout << "xor "; break;
case Kind::kFNeg: std::cout << "fneg "; break;
case Kind::kFNot: std::cout << "fneg "; break; // FNot not standard, map to fneg
case Kind::kFtoI: std::cout << "fptosi "; break;
case Kind::kBitFtoI: std::cout << "bitcast "; break;
case Kind::kItoF: std::cout << "sitofp "; break;
case Kind::kBitItoF: std::cout << "bitcast "; break;
default: break;
}
printType(unyInst->getType());
std::cout << " ";
// Special handling for negation
if (pInst->getKind() == Kind::kNeg || pInst->getKind() == Kind::kNot) {
std::cout << "i32 0, ";
}
printValue(pInst->getOperand(0));
// For bitcast, need to specify destination type
if (pInst->getKind() == Kind::kBitFtoI || pInst->getKind() == Kind::kBitItoF) {
std::cout << " to ";
printType(unyInst->getType());
}
std::cout << std::endl;
} break;
case Kind::kCall: {
auto callInst = dynamic_cast<CallInst *>(pInst);
auto function = callInst->getCallee();
if (!callInst->getName().empty()) {
std::cout << "%" << callInst->getName() << " = ";
}
std::cout << "call ";
printType(callInst->getType());
std::cout << " @" << function->getName() << "(";
auto params = callInst->getArguments();
bool first = true;
for (auto &param : params) {
if (!first) std::cout << ", ";
first = false;
printType(param->getValue()->getType());
std::cout << " ";
printValue(param->getValue());
}
std::cout << ")" << std::endl;
} break;
case Kind::kCondBr: {
auto condBrInst = dynamic_cast<CondBrInst *>(pInst);
std::cout << "br i1 ";
printValue(condBrInst->getCondition());
std::cout << ", label %" << condBrInst->getThenBlock()->getName();
std::cout << ", label %" << condBrInst->getElseBlock()->getName();
std::cout << std::endl;
} break;
case Kind::kBr: {
auto brInst = dynamic_cast<UncondBrInst *>(pInst);
std::cout << "br label %" << brInst->getBlock()->getName();
std::cout << std::endl;
} break;
case Kind::kReturn: {
auto retInst = dynamic_cast<ReturnInst *>(pInst);
std::cout << "ret ";
if (retInst->getNumOperands() != 0) {
printType(retInst->getOperand(0)->getType());
std::cout << " ";
printValue(retInst->getOperand(0));
} else {
std::cout << "void";
}
std::cout << std::endl;
} break;
case Kind::kAlloca: {
auto allocaInst = dynamic_cast<AllocaInst *>(pInst);
std::cout << "%" << allocaInst->getName() << " = alloca ";
auto baseType = dynamic_cast<PointerType *>(allocaInst->getType())->getBaseType();
printType(baseType);
if (allocaInst->getNumDims() > 0) {
std::cout << ", ";
for (size_t i = 0; i < allocaInst->getNumDims(); i++) {
if (i > 0) std::cout << ", ";
printType(Type::getIntType());
std::cout << " ";
printValue(allocaInst->getDim(i));
}
}
std::cout << ", align 4" << std::endl;
} break;
case Kind::kLoad: {
auto loadInst = dynamic_cast<LoadInst *>(pInst);
std::cout << "%" << loadInst->getName() << " = load ";
printType(loadInst->getType());
std::cout << ", ";
printType(loadInst->getPointer()->getType());
std::cout << " ";
printValue(loadInst->getPointer());
if (loadInst->getNumIndices() > 0) {
std::cout << ", ";
for (size_t i = 0; i < loadInst->getNumIndices(); i++) {
if (i > 0) std::cout << ", ";
printType(Type::getIntType());
std::cout << " ";
printValue(loadInst->getIndex(i));
}
}
std::cout << ", align 4" << std::endl;
} break;
case Kind::kLa: {
auto laInst = dynamic_cast<LaInst *>(pInst);
std::cout << "%" << laInst->getName() << " = getelementptr inbounds ";
auto ptrType = dynamic_cast<PointerType*>(laInst->getPointer()->getType());
printType(ptrType->getBaseType());
std::cout << ", ";
printType(laInst->getPointer()->getType());
std::cout << " ";
printValue(laInst->getPointer());
std::cout << ", ";
for (size_t i = 0; i < laInst->getNumIndices(); i++) {
if (i > 0) std::cout << ", ";
printType(Type::getIntType());
std::cout << " ";
printValue(laInst->getIndex(i));
}
std::cout << std::endl;
} break;
case Kind::kStore: {
auto storeInst = dynamic_cast<StoreInst *>(pInst);
std::cout << "store ";
printType(storeInst->getValue()->getType());
std::cout << " ";
printValue(storeInst->getValue());
std::cout << ", ";
printType(storeInst->getPointer()->getType());
std::cout << " ";
printValue(storeInst->getPointer());
if (storeInst->getNumIndices() > 0) {
std::cout << ", ";
for (size_t i = 0; i < storeInst->getNumIndices(); i++) {
if (i > 0) std::cout << ", ";
printType(Type::getIntType());
std::cout << " ";
printValue(storeInst->getIndex(i));
}
}
std::cout << ", align 4" << std::endl;
} break;
case Kind::kMemset: {
auto memsetInst = dynamic_cast<MemsetInst *>(pInst);
std::cout << "call void @llvm.memset.p0.";
printType(memsetInst->getPointer()->getType());
std::cout << "(";
printType(memsetInst->getPointer()->getType());
std::cout << " ";
printValue(memsetInst->getPointer());
std::cout << ", i8 ";
printValue(memsetInst->getValue());
std::cout << ", i32 ";
printValue(memsetInst->getSize());
std::cout << ", i1 false)" << std::endl;
} break;
case Kind::kPhi: {
auto phiInst = dynamic_cast<PhiInst *>(pInst);
std::cout << "%" << phiInst->getName() << " = phi ";
printType(phiInst->getType());
for (unsigned i = 0; i < phiInst->getNumOperands(); i += 2) {
if (i > 0) std::cout << ", ";
std::cout << "[ ";
printValue(phiInst->getOperand(i));
std::cout << ", %" << dynamic_cast<BasicBlock*>(phiInst->getOperand(i+1))->getName() << " ]";
}
std::cout << std::endl;
} break;
case Kind::kGetSubArray: {
auto getSubArrayInst = dynamic_cast<GetSubArrayInst *>(pInst);
std::cout << "%" << getSubArrayInst->getName() << " = getelementptr inbounds ";
auto ptrType = dynamic_cast<PointerType*>(getSubArrayInst->getFatherArray()->getType());
printType(ptrType->getBaseType());
std::cout << ", ";
printType(getSubArrayInst->getFatherArray()->getType());
std::cout << " ";
printValue(getSubArrayInst->getFatherArray());
std::cout << ", ";
bool firstIndex = true;
for (auto &index : getSubArrayInst->getIndices()) {
if (!firstIndex) std::cout << ", ";
firstIndex = false;
printType(Type::getIntType());
std::cout << " ";
printValue(index->getValue());
}
std::cout << std::endl;
} break;
default:
assert(false && "Unsupported instruction kind");
break;
}
}
} // namespace sysy

View File

@@ -96,7 +96,7 @@ class IRBuilder {
std::string newName; std::string newName;
if (name.empty()) { if (name.empty()) {
std::stringstream ss; std::stringstream ss;
ss << "%" << tmpIndex; ss << tmpIndex;
newName = ss.str(); newName = ss.str();
tmpIndex++; tmpIndex++;
} else { } else {
@@ -136,7 +136,7 @@ class IRBuilder {
std::string newName; std::string newName;
if (name.empty()) { if (name.empty()) {
std::stringstream ss; std::stringstream ss;
ss << "%" << tmpIndex; ss << tmpIndex;
newName = ss.str(); newName = ss.str();
tmpIndex++; tmpIndex++;
} else { } else {
@@ -221,7 +221,7 @@ class IRBuilder {
std::string newName; std::string newName;
if (name.empty() && callee->getReturnType() != Type::getVoidType()) { if (name.empty() && callee->getReturnType() != Type::getVoidType()) {
std::stringstream ss; std::stringstream ss;
ss << "%" << tmpIndex; ss << tmpIndex;
newName = ss.str(); newName = ss.str();
tmpIndex++; tmpIndex++;
} else { } else {
@@ -268,7 +268,7 @@ class IRBuilder {
std::string newName; std::string newName;
if (name.empty()) { if (name.empty()) {
std::stringstream ss; std::stringstream ss;
ss << "%" << tmpIndex; ss << tmpIndex;
newName = ss.str(); newName = ss.str();
tmpIndex++; tmpIndex++;
} else { } else {
@@ -284,7 +284,7 @@ class IRBuilder {
std::string newName; std::string newName;
if (name.empty()) { if (name.empty()) {
std::stringstream ss; std::stringstream ss;
ss << "%" << tmpIndex; ss << tmpIndex;
newName = ss.str(); newName = ss.str();
tmpIndex++; tmpIndex++;
} else { } else {
@@ -315,7 +315,7 @@ class IRBuilder {
auto fatherArrayValue = dynamic_cast<Value *>(fatherArray); auto fatherArrayValue = dynamic_cast<Value *>(fatherArray);
auto childArray = new AllocaInst(fatherArrayValue->getType(), subDims, block, childArrayName); auto childArray = new AllocaInst(fatherArrayValue->getType(), subDims, block, childArrayName);
auto inst = new GetSubArrayInst(fatherArray, childArray, indices, block, name); auto inst = new GetSubArrayInst(fatherArray, childArray, indices, block, childArrayName);
assert(inst); assert(inst);
block->getInstructions().emplace(position, inst); block->getInstructions().emplace(position, inst);
return inst; return inst;

View File

@@ -1,99 +0,0 @@
#pragma once
#include "SysYBaseVisitor.h"
#include "SysYParser.h"
#include "IR.h" // 引入 SysY IR 头文件
#include "IRBuilder.h"
#include <sstream>
#include <map>
#include <vector>
#include <stack>
#include <memory>
class LLVMIRGenerator : public SysYBaseVisitor {
public:
// 生成 IR文本和数据结构
std::string generateIR(SysYParser::CompUnitContext* unit);
// 获取文本格式的 LLVM IR
std::string getIR() const { return irStream.str(); }
// 获取 SysY IR 数据结构
sysy::Module* getModule() const { return module.get(); }
private:
// 文本输出相关
std::stringstream irStream;
int tempCounter = 0; // 临时变量计数器
std::string currentVarType; // 当前变量类型(文本 IR 用)
// 符号表:映射变量名到 {分配地址/寄存器, 类型}(文本 IR
std::map<std::string, std::pair<std::string, std::string>> symbolTable;
// 临时变量表:映射临时变量名到类型(文本 IR
std::map<std::string, std::string> tmpTable;
std::vector<std::string> globalVars; // 全局变量列表(文本 IR
// SysY IR 数据结构
std::unique_ptr<sysy::Module> module; // SysY IR 模块
// 符号表:映射变量名到 SysY IR 的 Value 指针
std::map<std::string, sysy::Value*> irSymbolTable;
// 临时变量表:映射临时变量名到 SysY IR 的 Value 指针
std::map<std::string, sysy::Value*> irTmpTable;
// 当前上下文
std::string currentFunction; // 当前函数名(文本 IR
std::string currentReturnType; // 当前函数返回类型(文本 IR
sysy::Function* currentIRFunction = nullptr; // 当前 SysY IR 函数
sysy::BasicBlock* currentIRBlock = nullptr; // 当前 SysY IR 基本块
// 循环控制
std::vector<std::string> breakStack; // break 标签栈(文本 IR
std::vector<std::string> continueStack; // continue 标签栈(文本 IR
bool hasReturn = false; // 是否有返回语句(文本 IR
struct LoopLabels {
std::string breakLabel; // break 跳转目标标签(文本 IR
std::string continueLabel; // continue 跳转目标标签(文本 IR
sysy::BasicBlock* irBreakBlock = nullptr; // break 跳转目标块SysY IR
sysy::BasicBlock* irContinueBlock = nullptr; // continue 跳转目标块SysY IR
};
std::stack<LoopLabels> loopStack; // 管理循环的 break 和 continue 标签
bool inFunction = false; // 标记是否在函数内部
// 辅助函数(文本 IR
std::string getNextTemp(); // 获取下一个临时变量名
std::string getLLVMType(const std::string& type); // 转换 SysY 类型到 LLVM 类型
// 辅助函数SysY IR
sysy::Type* getIRType(const std::string& type); // 转换 SysY 类型到 SysY IR 类型
std::string getIRTempName(); // 获取 SysY IR 临时变量名
void setIRPosition(sysy::BasicBlock* block); // 设置当前 IR 插入点
// 访问方法
std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override;
std::any visitConstDecl(SysYParser::ConstDeclContext* ctx) override;
std::any visitVarDecl(SysYParser::VarDeclContext* ctx) override;
std::any visitVarDef(SysYParser::VarDefContext* ctx) override;
std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override;
std::any visitBlockStmt(SysYParser::BlockStmtContext* ctx) override;
std::any visitLValue(SysYParser::LValueContext* ctx) override;
// std::any visitPrimaryExp(SysYParser::PrimaryExpContext* ctx) override;
std::any visitPrimExp(SysYParser::PrimExpContext* ctx) override;
std::any visitParenExp(SysYParser::ParenExpContext* ctx) override;
std::any visitNumber(SysYParser::NumberContext* ctx) override;
std::any visitString(SysYParser::StringContext* ctx) override;
std::any visitCall(SysYParser::CallContext* ctx) override;
std::any visitUnExp(SysYParser::UnExpContext* ctx) override;
std::any visitMulExp(SysYParser::MulExpContext* ctx) override;
std::any visitAddExp(SysYParser::AddExpContext* ctx) override;
std::any visitRelExp(SysYParser::RelExpContext* ctx) override;
std::any visitEqExp(SysYParser::EqExpContext* ctx) override;
std::any visitLAndExp(SysYParser::LAndExpContext* ctx) override;
std::any visitLOrExp(SysYParser::LOrExpContext* ctx) override;
std::any visitAssignStmt(SysYParser::AssignStmtContext* ctx) override;
std::any visitIfStmt(SysYParser::IfStmtContext* ctx) override;
std::any visitWhileStmt(SysYParser::WhileStmtContext* ctx) override;
std::any visitBreakStmt(SysYParser::BreakStmtContext* ctx) override;
std::any visitContinueStmt(SysYParser::ContinueStmtContext* ctx) override;
std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override;
};

0
src/include/Mem2Reg.h Normal file
View File

View File

@@ -0,0 +1,37 @@
#pragma once
#include "IR.h"
#include "IRBuilder.h"
namespace sysy {
// 优化前对SysY IR的预处理也可以视作部分CFG优化
// 主要包括删除无用指令、合并基本块、删除空块等
// 这些操作可以在SysY IR生成时就完成但为了简化IR生成过程
// 这里将其放在SysY IR生成后进行预处理
// 同时兼容phi节点的处理可以再mem2reg后再次调用优化
class SysYOptPre {
private:
Module *pModule;
IRBuilder *pBuilder;
public:
SysYOptPre(Module *pMoudle, IRBuilder *pBuilder) : pModule(pMoudle), pBuilder(pBuilder) {}
void SysYOptimizateAfterIR(){
SysYDelInstAfterBr();
SysYBlockMerge();
SysYDelNoPreBLock();
SysYDelEmptyBlock();
SysYAddReturn();
}
void SysYDelInstAfterBr(); // 删除br后面的指令
void SysYDelEmptyBlock(); // 空块删除
void SysYDelNoPreBLock(); // 删除无前驱块
void SysYBlockMerge(); // 合并基本块(主要针对嵌套if while的exit块
// 也可以修改IR生成实现回填机制
void SysYAddReturn(); // 添加return指令(主要针对Void函数)
void usedelete(Instruction *instr); // use删除
};
} // namespace sysy

View File

@@ -0,0 +1,29 @@
#pragma once
#include <string>
#include "IR.h"
namespace sysy {
class SysYPrinter {
private:
Module *pModule;
public:
explicit SysYPrinter(Module *pModule) : pModule(pModule) {}
public:
void printIR();
void printGlobalVariable();
void printFunction(Function *function);
void printInst(Instruction *pInst);
void printType(Type *type);
void printValue(Value *value);
public:
static std::string getOperandName(Value *operand);
std::string getTypeString(Type *type);
std::string getValueName(Value *value);
};
} // namespace sysy

View File

@@ -8,6 +8,9 @@ using namespace std;
using namespace antlr4; using namespace antlr4;
// #include "Backend.h" // #include "Backend.h"
#include "SysYIRGenerator.h" #include "SysYIRGenerator.h"
#include "SysYIRPrinter.h"
#include "SysYIROptPre.h"
#include "RISCv32Backend.h"
// #include "LLVMIRGenerator.h" // #include "LLVMIRGenerator.h"
using namespace sysy; using namespace sysy;
@@ -71,27 +74,26 @@ int main(int argc, char **argv) {
// visit AST to generate IR // visit AST to generate IR
if (argStopAfter == "ir") {
SysYIRGenerator generator; SysYIRGenerator generator;
generator.visitCompUnit(moduleAST); generator.visitCompUnit(moduleAST);
if (argStopAfter == "ir") {
auto moduleIR = generator.get(); auto moduleIR = generator.get();
// moduleIR->print(cout); SysYPrinter printer(moduleIR);
printer.printIR();
auto builder = generator.getBuilder();
SysYOptPre optPre(moduleIR, builder);
optPre.SysYOptimizateAfterIR();
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
// else if (argStopAfter == "llvmir") {
// LLVMIRGenerator llvmirGenerator;
// llvmirGenerator.generateIR(moduleAST); // 使用公共接口生成 IR
// cout << llvmirGenerator.getIR();
// return EXIT_SUCCESS;
// }
// // generate assembly // generate assembly
// CodeGen codegen(moduleIR); auto module = generator.get();
// string asmCode = codegen.code_gen(); sysy::RISCv32CodeGen codegen(module);
// cout << asmCode << endl; string asmCode = codegen.code_gen();
// if (argStopAfter == "asm") if (argStopAfter == "asm") {
// return EXIT_SUCCESS; cout << asmCode << endl;
return EXIT_SUCCESS;
}
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@@ -1,12 +1,8 @@
//test add //test add
int main(){ int main(){
int a, b; int a, b;
float d;
a = 10; a = 10;
b = 2; b = 2;
int c = a; return a + b;
d = 1.1 ;
return a + b + c;
} }

View File

@@ -5,10 +5,10 @@ int main() {
const int b = 2; const int b = 2;
int c; int c;
if (a == b) if (a != b)
c = a + b; c = b - a + 20; // 21 <- this
else else
c = a * b; c = a * b + b + b + 10; // 16
return c; return c;
} }

View File

@@ -7,7 +7,7 @@ int mul(int x, int y) {
int main(){ int main(){
int a, b; int a, b;
a = 10; a = 10;
b = 0; b = 3;
a = mul(a, b); a = mul(a, b); //60
return a + b; return a + b; //66
} }