Initial commit from sysy-main

This commit is contained in:
Lixuanwang
2025-02-27 23:14:53 +08:00
commit cc523fd30b
1125 changed files with 257793 additions and 0 deletions

268
src/ASTPrinter.cpp Normal file
View File

@@ -0,0 +1,268 @@
#include <iostream>
#include "ASTPrinter.h"
#include "SysYParser.h"
using namespace std;
std::any ASTPrinter::visitNumber(SysYParser::NumberContext *ctx) {
cout << ctx->ILITERAL()->getText();
return nullptr;
}
std::any ASTPrinter::visitString(SysYParser::StringContext *ctx) {
cout << ctx->STRING()->getText();
return nullptr;
}
std::any ASTPrinter::visitModule(SysYParser::ModuleContext *ctx) {
for (auto dcl : ctx->dcl()) dcl->accept(this);
for (auto func : ctx->funcDef()) func->accept(this);
if (ctx->funcRParams()) ctx->funcRParams()->accept(this);
return nullptr;
}
std::any ASTPrinter::visitFuncRParams(SysYParser::FuncRParamsContext *ctx) {
bool first = true;
for (auto param : ctx->funcRParam()) {
if (!first) cout << ", ";
param->accept(this);
first = false;
}
return nullptr;
}
std::any ASTPrinter::visitExpAsRParam(SysYParser::ExpAsRParamContext *ctx) {
ctx->number()->accept(this);
return nullptr;
}
std::any ASTPrinter::visitStringAsRParam(SysYParser::StringAsRParamContext *ctx) {
ctx->string()->accept(this);
return nullptr;
}
std::any ASTPrinter::visitExpsAsRParam(SysYParser::ExpsAsRParamContext *ctx) {
bool first = true;
for (auto exp : ctx->exp()) {
if (!first) cout << ", ";
exp->accept(this);
first = false;
}
return nullptr;
}
std::any ASTPrinter::visitConstDecl(SysYParser::ConstDeclContext *ctx) {
cout << getIndent() << "const " << ctx->bType()->getText() << " ";
bool first = true;
for (auto def : ctx->constDef()) {
if (!first) cout << ", ";
def->accept(this);
first = false;
}
cout << ";" << endl;
return nullptr;
}
std::any ASTPrinter::visitVarDecl(SysYParser::VarDeclContext *ctx) {
cout << getIndent() << ctx->bType()->getText() << " ";
bool first = true;
for (auto def : ctx->varDef()) {
if (!first) cout << ", ";
def->accept(this);
first = false;
}
cout << ";" << endl;
return nullptr;
}
std::any ASTPrinter::visitVarDef(SysYParser::VarDefContext *ctx) {
cout << ctx->IDENT()->getText();
for (auto exp : ctx->constExp()) {
cout << "[";
exp->accept(this);
cout << "]";
}
if (ctx->initVal()) {
cout << " = ";
ctx->initVal()->accept(this);
}
return nullptr;
}
std::any ASTPrinter::visitConstDef(SysYParser::ConstDefContext *ctx) {
cout << ctx->IDENT()->getText();
for (auto exp : ctx->constExp()) {
cout << "[";
exp->accept(this);
cout << "]";
}
cout << " = ";
ctx->constInitVal()->accept(this);
return nullptr;
}
std::any ASTPrinter::visitFuncDef(SysYParser::FuncDefContext *ctx) {
cout << getIndent() << ctx->funcType()->getText() << " " << ctx->IDENT()->getText() << "(";
if (ctx->funcFParams()) ctx->funcFParams()->accept(this);
cout << ")" << endl;
ctx->block()->accept(this);
return nullptr;
}
std::any ASTPrinter::visitFuncFParams(SysYParser::FuncFParamsContext *ctx) {
bool first = true;
for (auto param : ctx->funcFParam()) {
if (!first) cout << ", ";
param->accept(this);
first = false;
}
return nullptr;
}
std::any ASTPrinter::visitFuncFParam(SysYParser::FuncFParamContext *ctx) {
cout << ctx->bType()->getText() << " " << ctx->IDENT()->getText();
if (!ctx->exp().empty()) {
cout << "[]";
for (auto exp : ctx->exp()) {
cout << "[";
exp->accept(this);
cout << "]";
}
}
return nullptr;
}
std::any ASTPrinter::visitExp(SysYParser::ExpContext *ctx) {
ctx->addExp()->accept(this);
return nullptr;
}
std::any ASTPrinter::visitCond(SysYParser::CondContext *ctx) {
ctx->lorExp()->accept(this);
return nullptr;
}
std::any ASTPrinter::visitLVal(SysYParser::LValContext *ctx) {
cout << ctx->IDENT()->getText();
for (auto exp : ctx->exp()) {
cout << "[";
exp->accept(this);
cout << "]";
}
return nullptr;
}
std::any ASTPrinter::visitAddExp(SysYParser::AddExpContext *ctx) {
if (ctx->addExp()) {
ctx->addExp()->accept(this);
cout << " " << ctx->ADD()->getText() << " ";
ctx->mulExp()->accept(this);
} else {
ctx->mulExp()->accept(this);
}
return nullptr;
}
std::any ASTPrinter::visitMulExp(SysYParser::MulExpContext *ctx) {
auto unaryExps = ctx->unaryExp();
if (unaryExps.size() == 1) {
unaryExps[0]->accept(this);
} else {
for (size_t i = 0; i < unaryExps.size() - 1; ++i) {
auto opNode = dynamic_cast<antlr4::tree::TerminalNode *>(ctx->children[2 * i + 1]);
if (opNode) {
unaryExps[i]->accept(this);
cout << " " << opNode->getText() << " ";
}
}
unaryExps.back()->accept(this);
}
return nullptr;
}
std::any ASTPrinter::visitUnaryExp(SysYParser::UnaryExpContext *ctx) {
if (ctx->primaryExp()) {
ctx->primaryExp()->accept(this);
} else if (ctx->IDENT()) {
cout << ctx->IDENT()->getText() << "(";
if (ctx->funcRParams()) ctx->funcRParams()->accept(this);
cout << ")";
} else if (ctx->unaryOp()) {
cout << ctx->unaryOp()->getText();
ctx->unaryExp()->accept(this);
}
return nullptr;
}
// std::any ASTPrinter::visitLorExp(SysYParser::LorExpContext *ctx) {
// if (ctx->lorExp()) {
// // 左递归部分
// ctx->lorExp()->accept(this);
// cout << " || ";
// ctx->landExp()->accept(this);
// } else {
// // 基础部分
// ctx->landExp()->accept(this);
// }
// return nullptr;
// }
std::any ASTPrinter::visitStmt(SysYParser::StmtContext *ctx) {
if (ctx->lVal() && ctx->exp()) {
cout << getIndent();
ctx->lVal()->accept(this);
cout << " = ";
ctx->exp()->accept(this);
cout << ";" << endl;
}
// else if (ctx->exp()) {
// cout << getIndent();
// ctx->exp()->accept(this);
// cout << ";" << endl;
// }
else if (ctx->block()) {
ctx->block()->accept(this);
} else {
for (auto child : ctx->children) {
if (auto terminal = dynamic_cast<antlr4::tree::TerminalNode *>(child)) {
if (terminal->getText() == "if") {
cout << getIndent() << "if (";
ctx->cond()->accept(this);
cout << ")" << endl;
ctx->stmt(0)->accept(this);
if (ctx->stmt().size() > 1) {
cout << getIndent() << "else" << endl;
ctx->stmt(1)->accept(this);
}
} else if (terminal->getText() == "while") {
cout << getIndent() << "while (";
ctx->cond()->accept(this);
cout << ")" << endl;
ctx->stmt(0)->accept(this);
} else if (terminal->getText() == "return") {
cout << getIndent() << "return";
if (ctx->exp()) {
cout << " ";
ctx->exp()->accept(this);
}
cout << ";" << endl;
} else if (terminal->getText() == "break") {
cout << getIndent() << "break;" << endl;
} else if (terminal->getText() == "continue") {
cout << getIndent() << "continue;" << endl;
}
}
}
}
return nullptr;
}
std::any ASTPrinter::visitBlock(SysYParser::BlockContext *ctx) {
cout << getIndent() << "{" << endl;
indentLevel++;
for (auto item : ctx->blockItem()) item->accept(this);
indentLevel--;
cout << getIndent() << "}" << endl;
return nullptr;
}

46
src/ASTPrinter.h Normal file
View File

@@ -0,0 +1,46 @@
#pragma once
#include "SysYBaseVisitor.h"
#include "SysYParser.h"
#include <iostream>
#include <string>
class ASTPrinter : public SysYBaseVisitor {
private:
int indentLevel = 0;
std::string getIndent() {
return std::string(indentLevel * 4, ' ');
}
public:
std::any visitModule(SysYParser::ModuleContext *ctx) override;
std::any visitConstDecl(SysYParser::ConstDeclContext *ctx) override;
std::any visitVarDecl(SysYParser::VarDeclContext *ctx) override;
std::any visitVarDef(SysYParser::VarDefContext *ctx) override;
std::any visitConstDef(SysYParser::ConstDefContext *ctx) override; // 新增
std::any visitFuncDef(SysYParser::FuncDefContext *ctx) override;
std::any visitFuncFParams(SysYParser::FuncFParamsContext *ctx) override;
std::any visitFuncFParam(SysYParser::FuncFParamContext *ctx) override;
std::any visitFuncRParams(SysYParser::FuncRParamsContext *ctx) override;
std::any visitExpAsRParam(SysYParser::ExpAsRParamContext *ctx) override;
std::any visitStringAsRParam(SysYParser::StringAsRParamContext *ctx) override;
std::any visitExpsAsRParam(SysYParser::ExpsAsRParamContext *ctx) override;
std::any visitExp(SysYParser::ExpContext *ctx) override;
std::any visitCond(SysYParser::CondContext *ctx) override;
std::any visitLVal(SysYParser::LValContext *ctx) override;
std::any visitAddExp(SysYParser::AddExpContext *ctx) override;
std::any visitMulExp(SysYParser::MulExpContext *ctx) override;
std::any visitUnaryExp(SysYParser::UnaryExpContext *ctx) override;
std::any visitNumber(SysYParser::NumberContext *ctx) override;
std::any visitString(SysYParser::StringContext *ctx) override;
std::any visitStmt(SysYParser::StmtContext *ctx) override;
std::any visitBlock(SysYParser::BlockContext *ctx) override;
// std::any ASTPrinter::visitLorExp(SysYParser::LorExpContext *ctx) override;
};

295
src/Backend.cpp Normal file
View File

@@ -0,0 +1,295 @@
#include "Backend.h"
using namespace sysy;
namespace sysy {
using RegId = RegManager::RegId;
string CodeGen::code_gen() {
string code;
code += module_gen(module);
return code;
}
string CodeGen::module_gen(Module *module) {
string code;
string dataCode;
string textCode;
// clear last module's label record
clearModuleRecord(module);
// generate asmcode for all global values
dataCode += globaldata_gen();
code += space + ".arch armv7ve " + eol;
code += space + ".text " + eol;
auto functions = module->getFunctions();
for (auto iter = functions->begin(); iter != functions->end(); ++iter) {
string name = iter->first;
Function *func = iter->second;
auto bblist = func->getBasicBlocks();
if (bblist.empty())
continue;
// generate asmcode for each function
textCode += function_gen(func) + eol;
}
code += (dataCode + textCode + eol);
return code;
}
string CodeGen::functionHead_gen(Function *func) {
string code;
code += space + ".globl " + func->getName() + eol;
code += space + ".p2align " + std::to_string(int_p2align) + eol;
code += space + ".type " + func->getName() + ", %function" + eol;
code += func->getName() + ":" + eol;
return code;
}
/**
* stack structure:
*
* last function stack
* -------------------------- <-- fp point
* callee-saved regs (include fp, sp, lr)
* --------------------------
* local variables and subfunc's return value (ir inst)
* --------------------------
* arg0,arg1,arg2,arg3 (as tempory variable)
* --------------------------
* caller-saved regs
* --------------------------
* arg_x ~ arg_4
* -------------------------- <-- sp point
* next function stack
*
*/
/**
* prologue :
* preserve callee-saved registers (lr, fp and other callee-saved regs)
* set new fp
* alloc stack space for local var/ args / return value.
* store args to stack
* */
string CodeGen::prologueCode_gen(Function *func) {
string code;
/**
*code in here
*/
return code;
}
/* epilogue :
* free stack space
* restore sp
* restore used callee-saved register(lr, fp and other callee-saved
* regs) bx lr
*/
string CodeGen::epilogueCode_gen(Function *func) {
string code;
/**
*code in here
*/
return code;
}
string CodeGen::function_gen(Function *func) {
curFunc = func;
clearFunctionRecord(func);
string bbCode;
auto bbs = func->getBasicBlocks();
for (auto iter = bbs.begin(); iter != bbs.end(); ++iter) {
auto bb = iter->get();
bbCode += basicBlock_gen(bb);
}
string code;
string funcHead = functionHead_gen(func);
string prologueCode = prologueCode_gen(func);
string epilogueCode = epilogueCode_gen(func);
string literalPoolsCode = literalPoolsCode_gen(func);
//
code = funcHead + prologueCode + bbCode + epilogueCode + literalPoolsCode;
return code;
}
string CodeGen::basicBlock_gen(BasicBlock *bb) {
curBB = bb;
string bbLabel = getBBLabel(bb);
string code;
code += bbLabel + ":" + eol;
for (auto &instr : bb->getInstructions()) {
auto instrType = instr->getKind();
code += instruction_gen(instr.get());
}
return code;
}
/**
* RegId : binaryInst_gen returns RegId as its destination operand
* code : asmcode generated by binaryInst_gen
*/
pair<RegId, string> CodeGen::binaryInst_gen(BinaryInst *bInst, RegId dstRegId) {
string code;
/**
*code in here
*/
return {dstRegId, code};
}
pair<RegId, string> CodeGen::unaryInst_gen(UnaryInst *uInst, RegId dstRegId) {
string code;
/**
*code in here
*/
return {dstRegId, code};
}
pair<RegId, string> CodeGen::allocaInst_gen(AllocaInst *aInst,
RegManager::RegId dstRegId) {
string code;
/**
*code in here
*/
return {dstRegId, code};
}
string CodeGen::storeInst_gen(StoreInst *stInst) {
string code;
/**
*code in here
*/
return code;
}
pair<RegId, string> CodeGen::loadInst_gen(LoadInst *ldInst, RegId dstRegId) {
string code;
/**
*code in here
*/
return {dstRegId, code};
}
string CodeGen::returnInst_gen(ReturnInst *retInst) {
string code;
/**
*code in here
*/
return code;
}
string CodeGen::uncondBrInst_gen(UncondBrInst *ubInst) {
string code;
/**
*code in here
*/
return code;
}
string CodeGen::condBrInst_gen(CondBrInst *cbInst) {
string code;
/**
*code in here
*/
return code;
}
pair<RegId, string> CodeGen::callInst_gen(CallInst *callInst, RegId dstRegId) {
string code;
/**
*code in here
*/
return {dstRegId, code};
}
string CodeGen::instruction_gen(Instruction *instr) {
string code;
string unkName = instr->getName();
RegManager::RegId dstRegId = RegManager::RNONE;
auto instrType = instr->getKind();
pair<RegId, string> tmp;
switch (instrType) {
// binary inst
case Instruction::kAdd:
case Instruction::kMul:
case Instruction::kSub: {
BinaryInst *bInst = dynamic_cast<BinaryInst *>(instr);
// registers are used only for instruction operation, consider use which
// register (any one that is free for use)
tmp = binaryInst_gen(bInst, RegManager::RANY);
code += tmp.second;
dstRegId = tmp.first;
break;
}
case Instruction::kLoad: {
LoadInst *ldInst = dynamic_cast<LoadInst *>(instr);
tmp = loadInst_gen(ldInst, RegManager::RANY);
code += M_emitComment("load inst");
code += tmp.second;
dstRegId = tmp.first;
break;
}
case Instruction::kStore: {
StoreInst *stInst = dynamic_cast<StoreInst *>(instr);
code += M_emitComment("store inst");
code += storeInst_gen(stInst);
return code;
break;
}
case Instruction::kAlloca: {
AllocaInst *aInst = dynamic_cast<AllocaInst *>(instr);
tmp = allocaInst_gen(aInst, RegManager::RANY);
code += M_emitComment("alloca inst");
code += tmp.second;
dstRegId = tmp.first;
break;
}
case Instruction::kReturn: {
ReturnInst *retInst = dynamic_cast<ReturnInst *>(instr);
code += M_emitComment("return inst");
code += returnInst_gen(retInst);
return code;
break;
}
case Instruction::kCall: {
CallInst *cInst = dynamic_cast<CallInst *>(instr);
auto tmp = callInst_gen(cInst, RegManager::RANY);
code += tmp.second;
dstRegId = tmp.first;
if (dstRegId == RegManager::R0)
return code;
break;
}
case Instruction::kBr: {
UncondBrInst *ubInst = dynamic_cast<UncondBrInst *>(instr);
code += uncondBrInst_gen(ubInst);
return code;
break;
}
case Instruction::kCondBr: {
CondBrInst *cbInst = dynamic_cast<CondBrInst *>(instr);
code += condBrInst_gen(cbInst);
return code;
break;
}
default: {
code +=
"ERROR CODE : instruction " + unkName + " is not implementation" + eol;
break;
}
}
if (!instr->getType()->isVoid()) {
code += storeRegToStack_gen(dstRegId, instr);
// regm.freeReg(dstRegId);//TODO : code in here.
}
return code;
}
//
string CodeGen::globaldata_gen() {
string asmCode;
/**
*code in here
*/
return asmCode;
}
string CodeGen::literalPoolsCode_gen(Function *func) {
string code;
/**
*code in here
*/
return code;
}
} // namespace sysy

217
src/Backend.h Normal file
View File

@@ -0,0 +1,217 @@
#pragma once
/**
* 后端设计需要注意的点
*
* label分配 :
* 全局变量的label,常量的label,basicblock的label,函数入口label,函数出口label
* 全局数据处理
* 常量处理
* 临时变量的处理(包括函数参数也是临时变量)
* 函数参数的处理
* 块参数的处理 (如果SSA问题通过块传参解决则需要处理块参数)
* 函数栈空间结构的设计
* 块的遍历问题
* arm32的过程调用约定
*
* 后端测试需要注意的点
*
* 如何汇编与链接.
* qemu以及树莓派如何运行.
*/
#include <algorithm>
#include <set>
#include <string>
#include <vector>
#include "IR.h"
using std::find;
using std::map;
using std::pair;
using std::set;
using std::string;
using std::to_string;
using std::vector;
#define Macro_ERROR_MSG(...) \
do { \
fprintf(stderr, "ERROR in line-%d of %s: ", __LINE__, __FILE__); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\n"); \
fflush(stderr); \
exit(1); \
} while (0);
#define M_emitInst(STR) (space + STR + eol)
#define M_emitComment(STR) (space + "//" + STR + eol)
#define M_emitLabel(STR) (STR + ":" + eol)
// #define REG_EXP
static const string space = string(4, ' ');
static const string eol = "\n";
// only for armv7
static const int int_align = 4;
static const int int_size = 4;
static const int int_p2align = 2;
static const int reg_size = 4;
// default arm32 max imm
//
static const uint32_t maxMovImm = 0xFFF;
static const string stackIndexReg = "fp";
static uint16_t getWordHigh(uint32_t cval) { return (cval >> 16) & 0xFFFF; }
static uint16_t getWordLow(uint32_t cval) { return cval & 0xFFFF; }
//
static string emitInst_1srcR_noDstR(string name, string srcReg) {
return space + name + " " + srcReg + eol;
}
static string emitInst_2srcR_1dstR(string name, string srcReg0, string srcReg1,
string dstReg) {
return space + name + " " + dstReg + ", " + srcReg0 + ", " + srcReg1 + eol;
}
namespace sysy {
//
class RegManager {
public:
//{0,1,2,3,4,5,6,7,8,9,10};
enum RegId : unsigned {
R0 = 0,
R1 = 1,
R2 = 2,
R3 = 3,
R4 = 4,
R5 = 5,
R6 = 6,
R7 = 7,
R8 = 8,
R9 = 9,
R10 = 10,
RNONE = 1024,
RANY = 2048,
};
static string toString(RegId reg) {
if (reg == RNONE)
return "RNONE";
if (reg == RANY)
return "RANY";
return "r" + to_string(reg);
}
};
class Operand {
public:
using RegId = RegManager::RegId;
enum Kind {
kReg,
kImm,
};
Kind kind;
union {
uint32_t imm;
RegId regId;
};
Operand(){};
Operand(uint32_t imm) : kind(kImm), imm(imm) {}
Operand(RegId regId) : kind(kReg), regId(regId) {}
bool isImm(void) { return kind == kImm; }
bool isReg(void) { return kind == kReg; }
uint32_t getImm(void) {
assert(kind == kImm);
return imm;
}
RegId getRegId(void) {
assert(kind == kReg);
return regId;
}
string toString(void) {
if (kind == kImm)
return "#" + to_string(imm);
else
return RegManager::toString(regId);
}
};
class CodeGen {
public:
using RegId = RegManager::RegId;
private:
sysy::Module *module;
Function *curFunc;
BasicBlock *curBB;
//
RegManager regm;
// globalValue
bool loadGlobalValByMOVWT = true;
// basicBlock
vector<BasicBlock *> linear_bb;
int bb_no = 0;
// function params, return value and localVar
map<Argument *, int> paramsStOffset;
map<Instruction *, int> localVarStOffset;
int retValueStOffset = 0;
size_t stOffsetAcc = 0;
// label manager
map<BasicBlock *, string> bb_labels;
uint64_t label_no = 0;
public:
CodeGen(Module *module) : module(module) {}
// code_gen function list
string code_gen();
string module_gen(Module *module);
string function_gen(Function *func);
string basicBlock_gen(BasicBlock *bb);
string instruction_gen(Instruction *instr);
string globaldata_gen();
string prologueCode_gen(Function *func);
string epilogueCode_gen(Function *func);
string literalPoolsCode_gen(Function *func);
string functionHead_gen(Function *func);
// Module
void clearModuleRecord(Module *module) { label_no = 0; }
// instruction gen function list
//<dstReg, code>
pair<RegId, string> loadInst_gen(LoadInst *ldInst, RegId dstRegId);
string storeInst_gen(StoreInst *stInst);
pair<RegId, string> allocaInst_gen(AllocaInst *aInst, RegId dstRegId);
string returnInst_gen(ReturnInst *retInst);
pair<RegId, string> callInst_gen(CallInst *retInst, RegId dstRegId);
pair<RegId, string> binaryInst_gen(BinaryInst *bInst, RegId dstRegId);
pair<RegId, string> unaryInst_gen(UnaryInst *bInst, RegId dstRegId);
string uncondBrInst_gen(UncondBrInst *ubInst);
string condBrInst_gen(CondBrInst *ubInst);
//
string storeRegToStack_gen(RegId regId, Instruction *inst) {
string code;
/**
*code in here
*/
return code;
}
// function
void clearFunctionRecord(Function *func) {
localVarStOffset.clear();
paramsStOffset.clear();
retValueStOffset = 0;
bb_labels.clear();
//
stOffsetAcc = 0;
}
string getBBLabel(BasicBlock *bb) {
auto t = bb_labels.find(bb);
string label;
if (t == bb_labels.end()) {
label = ".LBB_" + to_string(label_no++);
bb_labels.emplace(bb, label);
} else {
label = t->second;
}
return label;
}
};
} // namespace sysy

26
src/CMakeLists.txt Normal file
View File

@@ -0,0 +1,26 @@
# Generate lexer and parser with ANTLR
list(APPEND CMAKE_MODULE_PATH "${ANTLR_RUNTIME}/cmake")
include(FindANTLR)
antlr_target(SysYGen SysY.g4
LEXER PARSER
OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
VISITOR
)
add_library(SysYParser SHARED ${ANTLR_SysYGen_CXX_OUTPUTS})
target_include_directories(SysYParser PUBLIC ${ANTLR_RUNTIME}/runtime/src)
target_link_libraries(SysYParser PUBLIC antlr4_shared)
add_executable(sysyc
sysyc.cpp
ASTPrinter.cpp
IR.cpp
SysYIRGenerator.cpp
Backend.cpp
)
target_include_directories(sysyc PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(sysyc PRIVATE SysYParser)
# set(THREADS_PREFER_PTHREAD_FLAG ON)
# find_package(Threads REQUIRED)
# target_link_libraries(sysyc PRIVATE Threads::Threads)

531
src/IR.cpp Normal file
View File

@@ -0,0 +1,531 @@
#include "IR.h"
#include "range.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <functional>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <ostream>
#include <set>
#include <string>
#include <utility>
#include <vector>
using namespace std;
namespace sysy {
template <typename T>
ostream &interleave(ostream &os, const T &container, const string sep = ", ") {
auto b = container.begin(), e = container.end();
if (b == e)
return os;
os << *b;
for (b = next(b); b != e; b = next(b))
os << sep << *b;
return os;
}
static inline ostream &printVarName(ostream &os, const Value *var) {
return os << (dyncast<GlobalValue>(var) ? '@' : '%')
<< var->getName();
}
static inline ostream &printBlockName(ostream &os, const BasicBlock *block) {
return os << '^' << block->getName();
}
static inline ostream &printFunctionName(ostream &os, const Function *fn) {
return os << '@' << fn->getName();
}
static inline ostream &printOperand(ostream &os, const Value *value) {
auto constant = dyncast<ConstantValue>(value);
if (constant) {
constant->print(os);
return os;
}
return printVarName(os, value);
}
//===----------------------------------------------------------------------===//
// Types
//===----------------------------------------------------------------------===//
Type *Type::getIntType() {
static Type intType(kInt);
return &intType;
}
Type *Type::getFloatType() {
static Type floatType(kFloat);
return &floatType;
}
Type *Type::getVoidType() {
static Type voidType(kVoid);
return &voidType;
}
Type *Type::getLabelType() {
static Type labelType(kLabel);
return &labelType;
}
Type *Type::getPointerType(Type *baseType) {
// forward to PointerType
return PointerType::get(baseType);
}
Type *Type::getFunctionType(Type *returnType,
const vector<Type *> &paramTypes) {
// forward to FunctionType
return FunctionType::get(returnType, paramTypes);
}
int Type::getSize() const {
switch (kind) {
case kInt:
case kFloat:
return 4;
case kLabel:
case kPointer:
case kFunction:
return 8;
case kVoid:
return 0;
}
return 0;
}
void Type::print(ostream &os) const {
auto kind = getKind();
switch (kind) {
case kInt:
os << "int";
break;
case kFloat:
os << "float";
break;
case kVoid:
os << "void";
break;
case kPointer:
static_cast<const PointerType *>(this)->getBaseType()->print(os);
os << "*";
break;
case kFunction:
static_cast<const FunctionType *>(this)->getReturnType()->print(os);
os << "(";
interleave(os, static_cast<const FunctionType *>(this)->getParamTypes());
os << ")";
break;
case kLabel:
default:
cerr << "Unexpected type!\n";
break;
}
}
PointerType *PointerType::get(Type *baseType) {
static std::map<Type *, std::unique_ptr<PointerType>> pointerTypes;
auto iter = pointerTypes.find(baseType);
if (iter != pointerTypes.end())
return iter->second.get();
auto type = new PointerType(baseType);
assert(type);
auto result = pointerTypes.emplace(baseType, type);
return result.first->second.get();
}
FunctionType *FunctionType::get(Type *returnType,
const std::vector<Type *> &paramTypes) {
static std::set<std::unique_ptr<FunctionType>> functionTypes;
auto iter =
std::find_if(functionTypes.begin(), functionTypes.end(),
[&](const std::unique_ptr<FunctionType> &type) -> bool {
if (returnType != type->getReturnType() or
paramTypes.size() != type->getParamTypes().size())
return false;
return std::equal(paramTypes.begin(), paramTypes.end(),
type->getParamTypes().begin());
});
if (iter != functionTypes.end())
return iter->get();
auto type = new FunctionType(returnType, paramTypes);
assert(type);
auto result = functionTypes.emplace(type);
return result.first->get();
}
void Value::replaceAllUsesWith(Value *value) {
for (auto &use : uses)
use->getUser()->setOperand(use->getIndex(), value);
uses.clear();
}
bool Value::isConstant() const {
if (dyncast<ConstantValue>(this))
return true;
if (dyncast<GlobalValue>(this) or
dyncast<Function>(this))
return true;
// if (auto array = dyncast<const ArrayValue *>(this)) {
// auto elements = array->getValues();
// return all_of(elements.begin(), elements.end(),
// [](Value *v) -> bool { return v->isConstant(); });
// }
return false;
}
ConstantValue *ConstantValue::get(int value) {
static std::map<int, std::unique_ptr<ConstantValue>> intConstants;
auto iter = intConstants.find(value);
if (iter != intConstants.end())
return iter->second.get();
auto constant = new ConstantValue(value);
assert(constant);
auto result = intConstants.emplace(value, constant);
return result.first->second.get();
}
ConstantValue *ConstantValue::get(float value) {
static std::map<float, std::unique_ptr<ConstantValue>> floatConstants;
auto iter = floatConstants.find(value);
if (iter != floatConstants.end())
return iter->second.get();
auto constant = new ConstantValue(value);
assert(constant);
auto result = floatConstants.emplace(value, constant);
return result.first->second.get();
}
void ConstantValue::print(ostream &os) const {
if (isInt())
os << getInt();
else
os << getFloat();
}
Argument::Argument(Type *type, BasicBlock *block, int index,
const std::string &name)
: Value(kArgument, type, name), block(block), index(index) {
if (not hasName())
setName(to_string(block->getParent()->allocateVariableID()));
}
void Argument::print(std::ostream &os) const {
assert(hasName());
printVarName(os, this) << ": " << *getType();
}
BasicBlock::BasicBlock(Function *parent, const std::string &name)
: Value(kBasicBlock, Type::getLabelType(), name), parent(parent),
instructions(), arguments(), successors(), predecessors() {
if (not hasName())
setName("bb" + to_string(getParent()->allocateblockID()));
}
void BasicBlock::print(std::ostream &os) const {
assert(hasName());
os << " ";
printBlockName(os, this);
auto args = getArguments();
auto b = args.begin(), e = args.end();
if (b != e) {
os << '(';
printVarName(os, b->get()) << ": " << *b->get()->getType();
for (auto &arg : make_range(std::next(b), e)) {
os << ", ";
printVarName(os, arg.get()) << ": " << *arg->getType();
}
os << ')';
}
os << ":\n";
for (auto &inst : instructions) {
os << " " << *inst << '\n';
}
}
Instruction::Instruction(Kind kind, Type *type, BasicBlock *parent,
const std::string &name)
: User(kind, type, name), kind(kind), parent(parent) {
if (not type->isVoid() and not hasName())
setName(to_string(getFunction()->allocateVariableID()));
}
void CallInst::print(std::ostream &os) const {
if (not getType()->isVoid())
printVarName(os, this) << " = call ";
printFunctionName(os, getCallee()) << '(';
auto args = getArguments();
auto b = args.begin(), e = args.end();
if (b != e) {
printOperand(os, *b);
for (auto arg : make_range(std::next(b), e)) {
os << ", ";
printOperand(os, arg);
}
}
os << ") : " << *getType();
}
void UnaryInst::print(std::ostream &os) const {
printVarName(os, this) << " = ";
switch (getKind()) {
case kNeg:
os << "neg";
break;
case kNot:
os << "not";
break;
case kFNeg:
os << "fneg";
break;
case kFtoI:
os << "ftoi";
break;
case kIToF:
os << "itof";
break;
default:
assert(false);
}
printOperand(os, getOperand()) << " : " << *getType();
}
void BinaryInst::print(std::ostream &os) const {
printVarName(os, this) << " = ";
switch (getKind()) {
case kAdd:
os << "add";
break;
case kSub:
os << "sub";
break;
case kMul:
os << "mul";
break;
case kDiv:
os << "div";
break;
case kRem:
os << "rem";
break;
case kICmpEQ:
os << "icmpeq";
break;
case kICmpNE:
os << "icmpne";
break;
case kICmpLT:
os << "icmplt";
break;
case kICmpGT:
os << "icmpgt";
break;
case kICmpLE:
os << "icmple";
break;
case kICmpGE:
os << "icmpge";
break;
case kFAdd:
os << "fadd";
break;
case kFSub:
os << "fsub";
break;
case kFMul:
os << "fmul";
break;
case kFDiv:
os << "fdiv";
break;
case kFRem:
os << "frem";
break;
case kFCmpEQ:
os << "fcmpeq";
break;
case kFCmpNE:
os << "fcmpne";
break;
case kFCmpLT:
os << "fcmplt";
break;
case kFCmpGT:
os << "fcmpgt";
break;
case kFCmpLE:
os << "fcmple";
break;
case kFCmpGE:
os << "fcmpge";
break;
default:
assert(false);
}
os << ' ';
printOperand(os, getLhs()) << ", ";
printOperand(os, getRhs()) << " : " << *getType();
}
void ReturnInst::print(std::ostream &os) const {
os << "return";
if (auto value = getReturnValue()) {
os << ' ';
printOperand(os, value) << " : " << *value->getType();
}
}
void UncondBrInst::print(std::ostream &os) const {
os << "br ";
printBlockName(os, getBlock());
auto args = getArguments();
auto b = args.begin(), e = args.end();
if (b != e) {
os << '(';
printOperand(os, *b);
for (auto arg : make_range(std::next(b), e)) {
os << ", ";
printOperand(os, arg);
}
os << ')';
}
}
void CondBrInst::print(std::ostream &os) const {
os << "condbr ";
printOperand(os, getCondition()) << ", ";
printBlockName(os, getThenBlock());
{
auto args = getThenArguments();
auto b = args.begin(), e = args.end();
if (b != e) {
os << '(';
printOperand(os, *b);
for (auto arg : make_range(std::next(b), e)) {
os << ", ";
printOperand(os, arg);
}
os << ')';
}
}
os << ", ";
printBlockName(os, getElseBlock());
{
auto args = getElseArguments();
auto b = args.begin(), e = args.end();
if (b != e) {
os << '(';
printOperand(os, *b);
for (auto arg : make_range(std::next(b), e)) {
os << ", ";
printOperand(os, arg);
}
os << ')';
}
}
}
void AllocaInst::print(std::ostream &os) const {
if (getNumDims())
cerr << "not implemented yet\n";
printVarName(os, this) << " = ";
os << "alloca "
<< *static_cast<const PointerType *>(getType())->getBaseType();
os << " : " << *getType();
}
void LoadInst::print(std::ostream &os) const {
if (getNumIndices())
cerr << "not implemented yet\n";
printVarName(os, this) << " = ";
os << "load ";
printOperand(os, getPointer()) << " : " << *getType();
}
void StoreInst::print(std::ostream &os) const {
if (getNumIndices())
cerr << "not implemented yet\n";
os << "store ";
printOperand(os, getValue()) << ", ";
printOperand(os, getPointer()) << " : " << *getValue()->getType();
}
void Function::print(std::ostream &os) const {
auto returnType = getReturnType();
auto paramTypes = getParamTypes();
os << *returnType << ' ';
printFunctionName(os, this) << '(';
auto b = paramTypes.begin(), e = paramTypes.end();
if (b != e) {
os << *(*b);
for (auto type : make_range(std::next(b), e))
os << ", " << *type;
}
os << ") {\n";
for (auto &bb : getBasicBlocks()) {
os << *bb << '\n';
}
os << "}";
}
void Module::print(std::ostream &os) const {
for (auto &value : children)
os << *value << '\n';
}
// ArrayValue *ArrayValue::get(Type *type, const vector<Value *> &values) {
// static map<pair<Type *, size_t>, unique_ptr<ArrayValue>> arrayConstants;
// hash<string> hasher;
// auto key = make_pair(
// type, hasher(string(reinterpret_cast<const char *>(values.data()),
// values.size() * sizeof(Value *))));
// auto iter = arrayConstants.find(key);
// if (iter != arrayConstants.end())
// return iter->second.get();
// auto constant = new ArrayValue(type, values);
// assert(constant);
// auto result = arrayConstants.emplace(key, constant);
// return result.first->second.get();
// }
// ArrayValue *ArrayValue::get(const std::vector<int> &values) {
// vector<Value *> vals(values.size(), nullptr);
// std::transform(values.begin(), values.end(), vals.begin(),
// [](int v) { return ConstantValue::get(v); });
// return get(Type::getIntType(), vals);
// }
// ArrayValue *ArrayValue::get(const std::vector<float> &values) {
// vector<Value *> vals(values.size(), nullptr);
// std::transform(values.begin(), values.end(), vals.begin(),
// [](float v) { return ConstantValue::get(v); });
// return get(Type::getFloatType(), vals);
// }
void User::setOperand(int index, Value *value) {
assert(index < getNumOperands());
operands[index].setValue(value);
}
void User::replaceOperand(int index, Value *value) {
assert(index < getNumOperands());
auto &use = operands[index];
use.getValue()->removeUse(&use);
use.setValue(value);
}
CallInst::CallInst(Function *callee, const std::vector<Value *> &args,
BasicBlock *parent, const std::string &name)
: Instruction(kCall, callee->getReturnType(), parent, name) {
addOperand(callee);
for (auto arg : args)
addOperand(arg);
}
Function *CallInst::getCallee() const {
return dyncast<Function>(getOperand(0));
}
} // namespace sysy

994
src/IR.h Normal file
View File

@@ -0,0 +1,994 @@
#pragma once
#include "range.h"
#include <cassert>
#include <cstdint>
#include <iterator>
#include <list>
#include <map>
#include <memory>
#include <ostream>
#include <string>
#include <type_traits>
#include <vector>
namespace sysy {
/*!
* \defgroup type Types
* The SysY type system is quite simple.
* 1. The base class `Type` is used to represent all primitive scalar types,
* include `int`, `float`, `void`, and the label type representing branch
* targets.
* 2. `PointerType` and `FunctionType` derive from `Type` and represent pointer
* type and function type, respectively.
*
* NOTE `Type` and its derived classes have their ctors declared as 'protected'.
* Users must use Type::getXXXType() methods to obtain `Type` pointers.
* @{
*/
/*!
* `Type` is used to represent all primitive scalar types,
* include `int`, `float`, `void`, and the label type representing branch
* targets
*/
class Type {
public:
enum Kind {
kInt,
kFloat,
kVoid,
kLabel,
kPointer,
kFunction,
};
Kind kind;
protected:
Type(Kind kind) : kind(kind) {}
virtual ~Type() = default;
public:
static Type *getIntType();
static Type *getFloatType();
static Type *getVoidType();
static Type *getLabelType();
static Type *getPointerType(Type *baseType);
static Type *getFunctionType(Type *returnType,
const std::vector<Type *> &paramTypes = {});
public:
Kind getKind() const { return kind; }
bool isInt() const { return kind == kInt; }
bool isFloat() const { return kind == kFloat; }
bool isVoid() const { return kind == kVoid; }
bool isLabel() const { return kind == kLabel; }
bool isPointer() const { return kind == kPointer; }
bool isFunction() const { return kind == kFunction; }
bool isIntOrFloat() const { return kind == kInt or kind == kFloat; }
int getSize() const;
template <typename T>
std::enable_if_t<std::is_base_of_v<Type, T>, T *> as() const {
return dynamic_cast<T *>(const_cast<Type *>(this));
}
void print(std::ostream &os) const;
}; // class Type
//! Pointer type
class PointerType : public Type {
protected:
Type *baseType;
protected:
PointerType(Type *baseType) : Type(kPointer), baseType(baseType) {}
public:
static PointerType *get(Type *baseType);
public:
Type *getBaseType() const { return baseType; }
}; // class PointerType
//! Function type
class FunctionType : public Type {
private:
Type *returnType;
std::vector<Type *> paramTypes;
protected:
FunctionType(Type *returnType, const std::vector<Type *> &paramTypes = {})
: Type(kFunction), returnType(returnType), paramTypes(paramTypes) {}
public:
static FunctionType *get(Type *returnType,
const std::vector<Type *> &paramTypes = {});
public:
Type *getReturnType() const { return returnType; }
auto getParamTypes() const { return make_range(paramTypes); }
int getNumParams() const { return paramTypes.size(); }
}; // class FunctionType
/*!
* @}
*/
/*!
* \defgroup ir IR
*
* The SysY IR is an instruction level language. The IR is orgnized
* as a four-level tree structure, as shown below
*
* \dotfile ir-4level.dot IR Structure
*
* - `Module` corresponds to the top level "CompUnit" syntax structure
* - `GlobalValue` corresponds to the "Decl" syntax structure
* - `Function` corresponds to the "FuncDef" syntax structure
* - `BasicBlock` is a sequence of instructions without branching. A `Function`
* made up by one or more `BasicBlock`s.
* - `Instruction` represents a primitive operation on values, e.g., add or sub.
*
* The fundamental data concept in SysY IR is `Value`. A `Value` is like
* a register and is used by `Instruction`s as input/output operand. Each value
* has an associated `Type` indicating the data type held by the value.
*
* Most `Instruction`s have a three-address signature, i.e., there are at most 2
* input values and at most 1 output value.
*
* The SysY IR adots a Static-Single-Assignment (SSA) design. That is, `Value`
* is defined (as the output operand ) by some instruction, and used (as the
* input operand) by other instructions. While a value can be used by multiple
* instructions, the `definition` occurs only once. As a result, there is a
* one-to-one relation between a value and the instruction defining it. In other
* words, any instruction defines a value can be viewed as the defined value
* itself. So `Instruction` is also a `Value` in SysY IR. See `Value` for the
* type hierachy.
*
* @{
*/
class User;
class Value;
//! `Use` represents the relation between a `Value` and its `User`
class Use {
private:
//! the position of value in the user's operands, i.e.,
//! user->getOperands[index] == value
int index;
User *user;
Value *value;
public:
Use() = default;
Use(int index, User *user, Value *value)
: index(index), user(user), value(value) {}
public:
int getIndex() const { return index; }
User *getUser() const { return user; }
Value *getValue() const { return value; }
void setValue(Value *value) { value = value; }
}; // class Use
template <typename T>
inline std::enable_if_t<std::is_base_of_v<Value, T>, bool>
isa(const Value *value) {
return T::classof(value);
}
template <typename T>
inline std::enable_if_t<std::is_base_of_v<Value, T>, T *>
dyncast(Value *value) {
return isa<T>(value) ? static_cast<T *>(value) : nullptr;
}
template <typename T>
inline std::enable_if_t<std::is_base_of_v<Value, T>, const T *>
dyncast(const Value *value) {
return isa<T>(value) ? static_cast<const T *>(value) : nullptr;
}
//! The base class of all value types
class Value {
public:
enum Kind : uint64_t {
kInvalid,
// Instructions
// Binary
kAdd = 0x1UL << 0,
kSub = 0x1UL << 1,
kMul = 0x1UL << 2,
kDiv = 0x1UL << 3,
kRem = 0x1UL << 4,
kICmpEQ = 0x1UL << 5,
kICmpNE = 0x1UL << 6,
kICmpLT = 0x1UL << 7,
kICmpGT = 0x1UL << 8,
kICmpLE = 0x1UL << 9,
kICmpGE = 0x1UL << 10,
kFAdd = 0x1UL << 14,
kFSub = 0x1UL << 15,
kFMul = 0x1UL << 16,
kFDiv = 0x1UL << 17,
kFRem = 0x1UL << 18,
kFCmpEQ = 0x1UL << 19,
kFCmpNE = 0x1UL << 20,
kFCmpLT = 0x1UL << 21,
kFCmpGT = 0x1UL << 22,
kFCmpLE = 0x1UL << 23,
kFCmpGE = 0x1UL << 24,
// Unary
kNeg = 0x1UL << 25,
kNot = 0x1UL << 26,
kFNeg = 0x1UL << 27,
kFtoI = 0x1UL << 28,
kIToF = 0x1UL << 29,
// call
kCall = 0x1UL << 30,
// terminator
kCondBr = 0x1UL << 31,
kBr = 0x1UL << 32,
kReturn = 0x1UL << 33,
// mem op
kAlloca = 0x1UL << 34,
kLoad = 0x1UL << 35,
kStore = 0x1UL << 36,
kFirstInst = kAdd,
kLastInst = kStore,
// others
kArgument = 0x1UL << 37,
kBasicBlock = 0x1UL << 38,
kFunction = 0x1UL << 39,
kConstant = 0x1UL << 40,
kGlobal = 0x1UL << 41,
};
protected:
Kind kind;
Type *type;
std::string name;
std::list<Use *> uses;
protected:
Value(Kind kind, Type *type, const std::string &name = "")
: kind(kind), type(type), name(name), uses() {}
public:
virtual ~Value() = default;
public:
Kind getKind() const { return kind; }
static bool classof(const Value *) { return true; }
public:
Type *getType() const { return type; }
const std::string &getName() const { return name; }
void setName(const std::string &n) { name = n; }
bool hasName() const { return not name.empty(); }
bool isInt() const { return type->isInt(); }
bool isFloat() const { return type->isFloat(); }
bool isPointer() const { return type->isPointer(); }
const std::list<Use *> &getUses() { return uses; }
void addUse(Use *use) { uses.push_back(use); }
void replaceAllUsesWith(Value *value);
void removeUse(Use *use) { uses.remove(use); }
bool isConstant() const;
public:
virtual void print(std::ostream &os) const {};
}; // class Value
/*!
* Static constants known at compile time.
*
* `ConstantValue`s are not defined by instructions, and do not use any other
* `Value`s. It's type is either `int` or `float`.
*/
class ConstantValue : public Value {
protected:
union {
int iScalar;
float fScalar;
};
protected:
ConstantValue(int value)
: Value(kConstant, Type::getIntType(), ""), iScalar(value) {}
ConstantValue(float value)
: Value(kConstant, Type::getFloatType(), ""), fScalar(value) {}
public:
static ConstantValue *get(int value);
static ConstantValue *get(float value);
public:
static bool classof(const Value *value) {
return value->getKind() == kConstant;
}
public:
int getInt() const {
assert(isInt());
return iScalar;
}
float getFloat() const {
assert(isFloat());
return fScalar;
}
public:
void print(std::ostream &os) const override;
}; // class ConstantValue
class BasicBlock;
/*!
* Arguments of `BasicBlock`s.
*
* SysY IR is an SSA language, however, it does not use PHI instructions as in
* LLVM IR. `Value`s from different predecessor blocks are passed explicitly as
* block arguments. This is also the approach used by MLIR.
* NOTE that `Function` does not own `Argument`s, function arguments are
* implemented as its entry block's arguments.
*/
class Argument : public Value {
protected:
BasicBlock *block;
int index;
public:
Argument(Type *type, BasicBlock *block, int index,
const std::string &name = "");
public:
static bool classof(const Value *value) {
return value->getKind() == kConstant;
}
public:
BasicBlock *getParent() const { return block; }
int getIndex() const { return index; }
public:
void print(std::ostream &os) const override;
};
class Instruction;
class Function;
/*!
* The container for `Instruction` sequence.
*
* `BasicBlock` maintains a list of `Instruction`s, with the last one being
* a terminator (branch or return). Besides, `BasicBlock` stores its arguments
* and records its predecessor and successor `BasicBlock`s.
*/
class BasicBlock : public Value {
friend class Function;
public:
using inst_list = std::list<std::unique_ptr<Instruction>>;
using iterator = inst_list::iterator;
using arg_list = std::vector<std::unique_ptr<Argument>>;
using block_list = std::vector<BasicBlock *>;
protected:
Function *parent;
inst_list instructions;
arg_list arguments;
block_list successors;
block_list predecessors;
protected:
explicit BasicBlock(Function *parent, const std::string &name = "");
public:
static bool classof(const Value *value) {
return value->getKind() == kBasicBlock;
}
public:
int getNumInstructions() const { return instructions.size(); }
int getNumArguments() const { return arguments.size(); }
int getNumPredecessors() const { return predecessors.size(); }
int getNumSuccessors() const { return successors.size(); }
Function *getParent() const { return parent; }
inst_list &getInstructions() { return instructions; }
auto getArguments() const { return make_range(arguments); }
block_list &getPredecessors() { return predecessors; }
block_list &getSuccessors() { return successors; }
iterator begin() { return instructions.begin(); }
iterator end() { return instructions.end(); }
iterator terminator() { return std::prev(end()); }
Argument *createArgument(Type *type, const std::string &name = "") {
auto arg = new Argument(type, this, arguments.size(), name);
assert(arg);
arguments.emplace_back(arg);
return arguments.back().get();
};
public:
void print(std::ostream &os) const override;
}; // class BasicBlock
//! User is the abstract base type of `Value` types which use other `Value` as
//! operands. Currently, there are two kinds of `User`s, `Instruction` and
//! `GlobalValue`.
class User : public Value {
protected:
std::vector<Use> operands;
protected:
User(Kind kind, Type *type, const std::string &name = "")
: Value(kind, type, name), operands() {}
public:
using use_iterator = std::vector<Use>::const_iterator;
struct operand_iterator : public std::vector<Use>::const_iterator {
using Base = std::vector<Use>::const_iterator;
operand_iterator(const Base &iter) : Base(iter) {}
using value_type = Value *;
value_type operator->() { return Base::operator*().getValue(); }
value_type operator*() { return Base::operator*().getValue(); }
};
// struct const_operand_iterator : std::vector<Use>::const_iterator {
// using Base = std::vector<Use>::const_iterator;
// const_operand_iterator(const Base &iter) : Base(iter) {}
// using value_type = Value *;
// value_type operator->() { return operator*().getValue(); }
// };
public:
int getNumOperands() const { return operands.size(); }
operand_iterator operand_begin() const { return operands.begin(); }
operand_iterator operand_end() const { return operands.end(); }
auto getOperands() const {
return make_range(operand_begin(), operand_end());
}
Value *getOperand(int index) const { return operands[index].getValue(); }
void addOperand(Value *value) {
operands.emplace_back(operands.size(), this, value);
value->addUse(&operands.back());
}
template <typename ContainerT> void addOperands(const ContainerT &operands) {
for (auto value : operands)
addOperand(value);
}
void replaceOperand(int index, Value *value);
void setOperand(int index, Value *value);
}; // class User
/*!
* Base of all concrete instruction types.
*/
class Instruction : public User {
public:
// enum Kind : uint64_t {
// kInvalid = 0x0UL,
// // Binary
// kAdd = 0x1UL << 0,
// kSub = 0x1UL << 1,
// kMul = 0x1UL << 2,
// kDiv = 0x1UL << 3,
// kRem = 0x1UL << 4,
// kICmpEQ = 0x1UL << 5,
// kICmpNE = 0x1UL << 6,
// kICmpLT = 0x1UL << 7,
// kICmpGT = 0x1UL << 8,
// kICmpLE = 0x1UL << 9,
// kICmpGE = 0x1UL << 10,
// kFAdd = 0x1UL << 14,
// kFSub = 0x1UL << 15,
// kFMul = 0x1UL << 16,
// kFDiv = 0x1UL << 17,
// kFRem = 0x1UL << 18,
// kFCmpEQ = 0x1UL << 19,
// kFCmpNE = 0x1UL << 20,
// kFCmpLT = 0x1UL << 21,
// kFCmpGT = 0x1UL << 22,
// kFCmpLE = 0x1UL << 23,
// kFCmpGE = 0x1UL << 24,
// // Unary
// kNeg = 0x1UL << 25,
// kNot = 0x1UL << 26,
// kFNeg = 0x1UL << 27,
// kFtoI = 0x1UL << 28,
// kIToF = 0x1UL << 29,
// // call
// kCall = 0x1UL << 30,
// // terminator
// kCondBr = 0x1UL << 31,
// kBr = 0x1UL << 32,
// kReturn = 0x1UL << 33,
// // mem op
// kAlloca = 0x1UL << 34,
// kLoad = 0x1UL << 35,
// kStore = 0x1UL << 36,
// // constant
// // kConstant = 0x1UL << 37,
// };
protected:
Kind kind;
BasicBlock *parent;
protected:
Instruction(Kind kind, Type *type, BasicBlock *parent = nullptr,
const std::string &name = "");
public:
static bool classof(const Value *value) {
return value->getKind() >= kFirstInst and value->getKind() <= kLastInst;
}
public:
Kind getKind() const { return kind; }
BasicBlock *getParent() const { return parent; }
Function *getFunction() const { return parent->getParent(); }
void setParent(BasicBlock *bb) { parent = bb; }
bool isBinary() const {
static constexpr uint64_t BinaryOpMask =
(kAdd | kSub | kMul | kDiv | kRem) |
(kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE) |
(kFAdd | kFSub | kFMul | kFDiv | kFRem) |
(kFCmpEQ | kFCmpNE | kFCmpLT | kFCmpGT | kFCmpLE | kFCmpGE);
return kind & BinaryOpMask;
}
bool isUnary() const {
static constexpr uint64_t UnaryOpMask = kNeg | kNot | kFNeg | kFtoI | kIToF;
return kind & UnaryOpMask;
}
bool isMemory() const {
static constexpr uint64_t MemoryOpMask = kAlloca | kLoad | kStore;
return kind & MemoryOpMask;
}
bool isTerminator() const {
static constexpr uint64_t TerminatorOpMask = kCondBr | kBr | kReturn;
return kind & TerminatorOpMask;
}
bool isCmp() const {
static constexpr uint64_t CmpOpMask =
(kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE) |
(kFCmpEQ | kFCmpNE | kFCmpLT | kFCmpGT | kFCmpLE | kFCmpGE);
return kind & CmpOpMask;
}
bool isBranch() const {
static constexpr uint64_t BranchOpMask = kBr | kCondBr;
return kind & BranchOpMask;
}
bool isCommutative() const {
static constexpr uint64_t CommutativeOpMask =
kAdd | kMul | kICmpEQ | kICmpNE | kFAdd | kFMul | kFCmpEQ | kFCmpNE;
return kind & CommutativeOpMask;
}
bool isUnconditional() const { return kind == kBr; }
bool isConditional() const { return kind == kCondBr; }
}; // class Instruction
class Function;
//! Function call.
class CallInst : public Instruction {
friend class IRBuilder;
protected:
CallInst(Function *callee, const std::vector<Value *> &args = {},
BasicBlock *parent = nullptr, const std::string &name = "");
public:
static bool classof(const Value *value) { return value->getKind() == kCall; }
public:
Function *getCallee() const;
auto getArguments() const {
return make_range(std::next(operand_begin()), operand_end());
}
public:
void print(std::ostream &os) const override;
}; // class CallInst
//! Unary instruction, includes '!', '-' and type conversion.
class UnaryInst : public Instruction {
friend class IRBuilder;
protected:
UnaryInst(Kind kind, Type *type, Value *operand, BasicBlock *parent = nullptr,
const std::string &name = "")
: Instruction(kind, type, parent, name) {
addOperand(operand);
}
public:
static bool classof(const Value *value) {
return Instruction::classof(value) and
static_cast<const Instruction *>(value)->isUnary();
}
public:
Value *getOperand() const { return User::getOperand(0); }
public:
void print(std::ostream &os) const override;
}; // class UnaryInst
//! Binary instruction, e.g., arithmatic, relation, logic, etc.
class BinaryInst : public Instruction {
friend class IRBuilder;
protected:
BinaryInst(Kind kind, Type *type, Value *lhs, Value *rhs, BasicBlock *parent,
const std::string &name = "")
: Instruction(kind, type, parent, name) {
addOperand(lhs);
addOperand(rhs);
}
public:
static bool classof(const Value *value) {
return Instruction::classof(value) and
static_cast<const Instruction *>(value)->isBinary();
}
public:
Value *getLhs() const { return getOperand(0); }
Value *getRhs() const { return getOperand(1); }
public:
void print(std::ostream &os) const override;
}; // class BinaryInst
//! The return statement
class ReturnInst : public Instruction {
friend class IRBuilder;
protected:
ReturnInst(Value *value = nullptr, BasicBlock *parent = nullptr)
: Instruction(kReturn, Type::getVoidType(), parent, "") {
if (value)
addOperand(value);
}
public:
static bool classof(const Value *value) {
return value->getKind() == kReturn;
}
public:
bool hasReturnValue() const { return not operands.empty(); }
Value *getReturnValue() const {
return hasReturnValue() ? getOperand(0) : nullptr;
}
public:
void print(std::ostream &os) const override;
}; // class ReturnInst
//! Unconditional branch
class UncondBrInst : public Instruction {
friend class IRBuilder;
protected:
UncondBrInst(BasicBlock *block, std::vector<Value *> args,
BasicBlock *parent = nullptr)
: Instruction(kCondBr, Type::getVoidType(), parent, "") {
assert(block->getNumArguments() == args.size());
addOperand(block);
addOperands(args);
}
public:
static bool classof(const Value *value) { return value->getKind() == kBr; }
public:
BasicBlock *getBlock() const { return dyncast<BasicBlock>(getOperand(0)); }
auto getArguments() const {
return make_range(std::next(operand_begin()), operand_end());
}
public:
void print(std::ostream &os) const override;
}; // class UncondBrInst
//! Conditional branch
class CondBrInst : public Instruction {
friend class IRBuilder;
protected:
CondBrInst(Value *condition, BasicBlock *thenBlock, BasicBlock *elseBlock,
const std::vector<Value *> &thenArgs,
const std::vector<Value *> &elseArgs, BasicBlock *parent = nullptr)
: Instruction(kCondBr, Type::getVoidType(), parent, "") {
assert(thenBlock->getNumArguments() == thenArgs.size() and
elseBlock->getNumArguments() == elseArgs.size());
addOperand(condition);
addOperand(thenBlock);
addOperand(elseBlock);
addOperands(thenArgs);
addOperands(elseArgs);
}
public:
static bool classof(const Value *value) {
return value->getKind() == kCondBr;
}
public:
Value *getCondition() const { return getOperand(0); }
BasicBlock *getThenBlock() const {
return dyncast<BasicBlock>(getOperand(1));
}
BasicBlock *getElseBlock() const {
return dyncast<BasicBlock>(getOperand(2));
}
auto getThenArguments() const {
auto begin = std::next(operand_begin(), 3);
auto end = std::next(begin, getThenBlock()->getNumArguments());
return make_range(begin, end);
}
auto getElseArguments() const {
auto begin =
std::next(operand_begin(), 3 + getThenBlock()->getNumArguments());
auto end = operand_end();
return make_range(begin, end);
}
public:
void print(std::ostream &os) const override;
}; // class CondBrInst
//! Allocate memory for stack variables, used for non-global variable declartion
class AllocaInst : public Instruction {
friend class IRBuilder;
protected:
AllocaInst(Type *type, const std::vector<Value *> &dims = {},
BasicBlock *parent = nullptr, const std::string &name = "")
: Instruction(kAlloca, type, parent, name) {
addOperands(dims);
}
public:
static bool classof(const Value *value) {
return value->getKind() == kAlloca;
}
public:
int getNumDims() const { return getNumOperands(); }
auto getDims() const { return getOperands(); }
Value *getDim(int index) { return getOperand(index); }
public:
void print(std::ostream &os) const override;
}; // class AllocaInst
//! Load a value from memory address specified by a pointer value
class LoadInst : public Instruction {
friend class IRBuilder;
protected:
LoadInst(Value *pointer, const std::vector<Value *> &indices = {},
BasicBlock *parent = nullptr, const std::string &name = "")
: Instruction(kLoad, pointer->getType()->as<PointerType>()->getBaseType(),
parent, name) {
addOperand(pointer);
addOperands(indices);
}
public:
static bool classof(const Value *value) { return value->getKind() == kLoad; }
public:
int getNumIndices() const { return getNumOperands() - 1; }
Value *getPointer() const { return getOperand(0); }
auto getIndices() const {
return make_range(std::next(operand_begin()), operand_end());
}
Value *getIndex(int index) const { return getOperand(index + 1); }
public:
void print(std::ostream &os) const override;
}; // class LoadInst
//! Store a value to memory address specified by a pointer value
class StoreInst : public Instruction {
friend class IRBuilder;
protected:
StoreInst(Value *value, Value *pointer,
const std::vector<Value *> &indices = {},
BasicBlock *parent = nullptr, const std::string &name = "")
: Instruction(kStore, Type::getVoidType(), parent, name) {
addOperand(value);
addOperand(pointer);
addOperands(indices);
}
public:
static bool classof(const Value *value) { return value->getKind() == kStore; }
public:
int getNumIndices() const { return getNumOperands() - 2; }
Value *getValue() const { return getOperand(0); }
Value *getPointer() const { return getOperand(1); }
auto getIndices() const {
return make_range(std::next(operand_begin(), 2), operand_end());
}
Value *getIndex(int index) const { return getOperand(index + 2); }
public:
void print(std::ostream &os) const override;
}; // class StoreInst
class Module;
//! Function definition
class Function : public Value {
friend class Module;
protected:
Function(Module *parent, Type *type, const std::string &name)
: Value(kFunction, type, name), parent(parent), variableID(0), blocks() {
blocks.emplace_back(new BasicBlock(this, "entry"));
}
public:
static bool classof(const Value *value) {
return value->getKind() == kFunction;
}
public:
using block_list = std::list<std::unique_ptr<BasicBlock>>;
protected:
Module *parent;
int variableID;
int blockID;
block_list blocks;
public:
Type *getReturnType() const {
return getType()->as<FunctionType>()->getReturnType();
}
auto getParamTypes() const {
return getType()->as<FunctionType>()->getParamTypes();
}
auto getBasicBlocks() const { return make_range(blocks); }
BasicBlock *getEntryBlock() const { return blocks.front().get(); }
BasicBlock *addBasicBlock(const std::string &name = "") {
blocks.emplace_back(new BasicBlock(this, name));
return blocks.back().get();
}
void removeBasicBlock(BasicBlock *block) {
blocks.remove_if([&](std::unique_ptr<BasicBlock> &b) -> bool {
return block == b.get();
});
}
int allocateVariableID() { return variableID++; }
int allocateblockID() { return blockID++; }
public:
void print(std::ostream &os) const override;
}; // class Function
// class ArrayValue : public User {
// protected:
// ArrayValue(Type *type, const std::vector<Value *> &values = {})
// : User(type, "") {
// addOperands(values);
// }
// public:
// static ArrayValue *get(Type *type, const std::vector<Value *> &values);
// static ArrayValue *get(const std::vector<int> &values);
// static ArrayValue *get(const std::vector<float> &values);
// public:
// auto getValues() const { return getOperands(); }
// public:
// void print(std::ostream &os) const override{};
// }; // class ConstantArray
//! Global value declared at file scope
class GlobalValue : public User {
friend class Module;
protected:
Module *parent;
bool hasInit;
bool isConst;
protected:
GlobalValue(Module *parent, Type *type, const std::string &name,
const std::vector<Value *> &dims = {}, Value *init = nullptr)
: User(kGlobal, type, name), parent(parent), hasInit(init) {
assert(type->isPointer());
addOperands(dims);
if (init)
addOperand(init);
}
public:
static bool classof(const Value *value) {
return value->getKind() == kGlobal;
}
public:
Value *init() const { return hasInit ? operands.back().getValue() : nullptr; }
int getNumDims() const { return getNumOperands() - (hasInit ? 1 : 0); }
Value *getDim(int index) { return getOperand(index); }
public:
void print(std::ostream &os) const override{};
}; // class GlobalValue
//! IR unit for representing a SysY compile unit
class Module {
protected:
std::vector<std::unique_ptr<Value>> children;
std::map<std::string, Function *> functions;
std::map<std::string, GlobalValue *> globals;
public:
Module() = default;
public:
Function *createFunction(const std::string &name, Type *type) {
if (functions.count(name))
return nullptr;
auto func = new Function(this, type, name);
assert(func);
children.emplace_back(func);
functions.emplace(name, func);
return func;
};
GlobalValue *createGlobalValue(const std::string &name, Type *type,
const std::vector<Value *> &dims = {},
Value *init = nullptr) {
if (globals.count(name))
return nullptr;
auto global = new GlobalValue(this, type, name, dims, init);
assert(global);
children.emplace_back(global);
globals.emplace(name, global);
return global;
}
Function *getFunction(const std::string &name) const {
auto result = functions.find(name);
if (result == functions.end())
return nullptr;
return result->second;
}
GlobalValue *getGlobalValue(const std::string &name) const {
auto result = globals.find(name);
if (result == globals.end())
return nullptr;
return result->second;
}
std::map<std::string, Function *> *getFunctions(){
return &functions;
}
std::map<std::string, GlobalValue *> *getGlobalValues(){
return &globals;
}
public:
void print(std::ostream &os) const;
}; // class Module
/*!
* @}
*/
inline std::ostream &operator<<(std::ostream &os, const Type &type) {
type.print(os);
return os;
}
inline std::ostream &operator<<(std::ostream &os, const Value &value) {
value.print(os);
return os;
}
} // namespace sysy

232
src/IRBuilder.h Normal file
View File

@@ -0,0 +1,232 @@
#pragma once
#include "IR.h"
#include <cassert>
#include <memory>
namespace sysy {
class IRBuilder {
private:
BasicBlock *block;
BasicBlock::iterator position;
public:
IRBuilder() = default;
IRBuilder(BasicBlock *block) : block(block), position(block->end()) {}
IRBuilder(BasicBlock *block, BasicBlock::iterator position)
: block(block), position(position) {}
public:
BasicBlock *getBasicBlock() const { return block; }
BasicBlock::iterator getPosition() const { return position; }
void setPosition(BasicBlock *block, BasicBlock::iterator position) {
this->block = block;
this->position = position;
}
void setPosition(BasicBlock::iterator position) { this->position = position; }
public:
CallInst *createCallInst(Function *callee,
const std::vector<Value *> &args = {},
const std::string &name = "") {
auto inst = new CallInst(callee, args, block, name);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
UnaryInst *createUnaryInst(Instruction::Kind kind, Type *type, Value *operand,
const std::string &name = "") {
auto inst = new UnaryInst(kind, type, operand, block, name);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
UnaryInst *createNegInst(Value *operand, const std::string &name = "") {
return createUnaryInst(Instruction::kNeg, Type::getIntType(), operand,
name);
}
UnaryInst *createNotInst(Value *operand, const std::string &name = "") {
return createUnaryInst(Instruction::kNot, Type::getIntType(), operand,
name);
}
UnaryInst *createFtoIInst(Value *operand, const std::string &name = "") {
return createUnaryInst(Instruction::kFtoI, Type::getIntType(), operand,
name);
}
UnaryInst *createFNegInst(Value *operand, const std::string &name = "") {
return createUnaryInst(Instruction::kFNeg, Type::getFloatType(), operand,
name);
}
UnaryInst *createIToFInst(Value *operand, const std::string &name = "") {
return createUnaryInst(Instruction::kIToF, Type::getFloatType(), operand,
name);
}
BinaryInst *createBinaryInst(Instruction::Kind kind, Type *type, Value *lhs,
Value *rhs, const std::string &name = "") {
auto inst = new BinaryInst(kind, type, lhs, rhs, block, name);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
BinaryInst *createAddInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kAdd, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createSubInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kSub, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createMulInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kMul, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createDivInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kDiv, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createRemInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kRem, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createICmpEQInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kICmpEQ, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createICmpNEInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kICmpNE, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createICmpLTInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kICmpLT, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createICmpLEInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kICmpLE, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createICmpGTInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kICmpGT, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createICmpGEInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kICmpGE, Type::getIntType(), lhs, rhs,
name);
}
BinaryInst *createFAddInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFAdd, Type::getFloatType(), lhs, rhs,
name);
}
BinaryInst *createFSubInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFSub, Type::getFloatType(), lhs, rhs,
name);
}
BinaryInst *createFMulInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFMul, Type::getFloatType(), lhs, rhs,
name);
}
BinaryInst *createFDivInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFDiv, Type::getFloatType(), lhs, rhs,
name);
}
BinaryInst *createFRemInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFRem, Type::getFloatType(), lhs, rhs,
name);
}
BinaryInst *createFCmpEQInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFCmpEQ, Type::getFloatType(), lhs,
rhs, name);
}
BinaryInst *createFCmpNEInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFCmpNE, Type::getFloatType(), lhs,
rhs, name);
}
BinaryInst *createFCmpLTInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFCmpLT, Type::getFloatType(), lhs,
rhs, name);
}
BinaryInst *createFCmpLEInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFCmpLE, Type::getFloatType(), lhs,
rhs, name);
}
BinaryInst *createFCmpGTInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFCmpGT, Type::getFloatType(), lhs,
rhs, name);
}
BinaryInst *createFCmpGEInst(Value *lhs, Value *rhs,
const std::string &name = "") {
return createBinaryInst(Instruction::kFCmpGE, Type::getFloatType(), lhs,
rhs, name);
}
ReturnInst *createReturnInst(Value *value = nullptr) {
auto inst = new ReturnInst(value);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
UncondBrInst *createUncondBrInst(BasicBlock *block,
std::vector<Value *> args) {
auto inst = new UncondBrInst(block, args, block);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
CondBrInst *createCondBrInst(Value *condition, BasicBlock *thenBlock,
BasicBlock *elseBlock,
const std::vector<Value *> &thenArgs,
const std::vector<Value *> &elseArgs) {
auto inst = new CondBrInst(condition, thenBlock, elseBlock, thenArgs,
elseArgs, block);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
AllocaInst *createAllocaInst(Type *type,
const std::vector<Value *> &dims = {},
const std::string &name = "") {
auto inst = new AllocaInst(type, dims, block, name);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
LoadInst *createLoadInst(Value *pointer,
const std::vector<Value *> &indices = {},
const std::string &name = "") {
auto inst = new LoadInst(pointer, indices, block, name);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
StoreInst *createStoreInst(Value *value, Value *pointer,
const std::vector<Value *> &indices = {},
const std::string &name = "") {
auto inst = new StoreInst(value, pointer, indices, block, name);
assert(inst);
block->getInstructions().emplace(position, inst);
return inst;
}
};
} // namespace sysy

122
src/SysY.g4 Normal file
View File

@@ -0,0 +1,122 @@
grammar SysY;
/*===-------------------------------------------===*/
/* Lexer rules */
/*===-------------------------------------------===*/
// fragments
fragment DecDigit: [0-9];
fragment OctDigit: [0-7];
fragment HexDigit: [0-9a-fA-F];
fragment OctPrefix: '0';
fragment HexPrefix: '0' [xX];
fragment NonZeroDecDigit: [1-9];
fragment ESC: '\\"' | '\\\\';
// keywords
INT: 'int';
FLOAT: 'float';
CONST: 'const';
// operators
ADD: '+';
// punctuations
// identifier
IDENT: IDENTNONDIGIT (IDENTNONDIGIT | DecDigit)*;
IDENTNONDIGIT: [a-zA-Z_];
// literals
ILITERAL: INTCONST | FLOATCONST;
// string
STRING: '"' (ESC | .)*? '"';
// white space and comments
WS: [ \t\r\n] -> skip;
LINECOMMENT: '//' .*? '\r'? '\n' -> skip;
BLOCKCOMMENT: '/*' .*? '*/' -> skip;
INTCONST: NonZeroDecDigit DecDigit*
| OctPrefix OctDigit*
| HexPrefix HexDigit+;
DIGITSEQUENCE: DecDigit+;
// Floating point constants
FLOATCONST: DecimalFloatingConstant
| HexadecimalFloatingConstant;
DecimalFloatingConstant: FractionalConstant ExponentPart? // 允许无指数
| DIGITSEQUENCE ExponentPart; // 保留原有形式
HexadecimalFloatingConstant: HexPrefix HexadecimalFractionalConstant BinaryExponentPart
| HexPrefix HexadecimalDigitSequence BinaryExponentPart;
FractionalConstant: DIGITSEQUENCE? '.' DIGITSEQUENCE
| DIGITSEQUENCE '.';
ExponentPart:
[eE] [+-]? DIGITSEQUENCE;
HexadecimalFractionalConstant: HexadecimalDigitSequence? '.' HexadecimalDigitSequence
| HexadecimalDigitSequence '.';
HexadecimalDigitSequence: HexDigit+;
BinaryExponentPart:
[pP] [+-]? DIGITSEQUENCE;
/*===-------------------------------------------===*/
/* Syntax rules */
/*===-------------------------------------------===*/
// module: funcRParams;
// compUnit: (dcl | funcDef)+;
module: (dcl | funcDef)+ | funcRParams;
funcRParams: funcRParam (',' funcRParam)*;
funcRParam: number # expAsRParam | string # stringAsRParam | exp (',' exp)* # expsAsRParam;
// funcRParam: exp (',' exp)*;
number: ILITERAL;
string: STRING;
bType: INT | FLOAT;
dcl: constDecl | varDecl;
constDecl: CONST bType constDef (',' constDef)* ';';
constDef: IDENT ( '[' constExp ']' )* '=' constInitVal;
constInitVal: constExp
| '{' (constInitVal (',' constInitVal)*)? '}';
varDecl: bType varDef (',' varDef)* ';';
varDef: IDENT ( '[' constExp ']' )* initVal?
| IDENT ( '[' constExp ']' )* '=' initVal;
initVal: exp | '{' ( initVal ( ',' initVal )* )? '}';
funcDef: funcType IDENT '(' funcFParams? ')' block;
funcType: bType | 'void';
funcFParams: funcFParam (',' funcFParam )*;
funcFParam: bType IDENT ( '[' ']' ( '[' exp ']' )* )?;
block: '{' blockItem* '}';
blockItem: dcl | stmt;
stmt: lVal '=' exp ';'
| exp? ';'
// | IDENT '(' funcRParams? ')' ';'
| block
| 'if' '(' cond ')' stmt ('else' stmt)?
| 'while' '(' cond ')' stmt
| 'break' ';'
| 'continue' ';'
| 'return' exp? ';';
exp: addExp;
cond: lorExp;
lVal: IDENT ( '[' exp ']' )*;
primaryExp: '(' exp ')' | lVal | number;
// number: INTCONST | FLOATCONST;
unaryExp: primaryExp | IDENT '(' funcRParams? ')'
| unaryOp unaryExp;
unaryOp: '+' | '-' | '!';
mulExp: unaryExp (('*' | '/' | '%') unaryExp)*;
addExp: mulExp | addExp ('+' | '-') mulExp;
relExp: addExp | relExp ('<' | '>' | '<=' | '>=') addExp;
eqExp: relExp | eqExp ('==' | '!=') relExp;
landExp: eqExp | landExp '&&' eqExp;
lorExp: landExp | lorExp '||' landExp;
constExp: addExp;

340
src/SysYFormatter.h Normal file
View File

@@ -0,0 +1,340 @@
#pragma once
#include "SysYBaseVisitor.h"
#include "SysYParser.h"
#include <ostream>
namespace sysy {
class SysYFormatter : public SysYBaseVisitor {
protected:
std::ostream &os;
int indent = 0;
public:
SysYFormatter(std::ostream &os) : os(os), indent(0) {}
protected:
struct Indentor {
static constexpr int TabSize = 2;
int &indent;
Indentor(int &indent) : indent(indent) { indent += TabSize; }
~Indentor() { indent -= TabSize; }
};
std::ostream &space() { return os << std::string(indent, ' '); }
template <typename T>
std::ostream &interleave(const T &container, const std::string sep = ", ") {
auto b = container.begin(), e = container.end();
(*b)->accept(this);
for (b = std::next(b); b != e; b = std::next(b)) {
os << sep;
(*b)->accept(this);
}
return os;
}
public:
// virtual std::any visitModule(SysYParser::ModuleContext *ctx) override {
// return visitChildren(ctx);
// }
virtual std::any visitBtype(SysYParser::BtypeContext *ctx) override {
os << ctx->getText();
return 0;
}
virtual std::any visitDecl(SysYParser::DeclContext *ctx) override {
space();
if (ctx->CONST())
os << ctx->CONST()->getText() << ' ';
ctx->btype()->accept(this);
os << ' ';
interleave(ctx->varDef(), ", ") << ';' << '\n';
return 0;
}
virtual std::any visitVarDef(SysYParser::VarDefContext *ctx) override {
ctx->lValue()->accept(this);
if (ctx->initValue()) {
os << ' ' << '=' << ' ';
ctx->initValue()->accept(this);
}
return 0;
}
virtual std::any visitInitValue(SysYParser::InitValueContext *ctx) override {
if (not ctx->exp()) {
os << '{';
auto values = ctx->initValue();
if (values.size())
interleave(values, ", ");
os << '}';
}
return 0;
}
virtual std::any visitFunc(SysYParser::FuncContext *ctx) override {
ctx->funcType()->accept(this);
os << ' ' << ctx->ID()->getText() << '(';
if (ctx->funcFParams())
ctx->funcFParams()->accept(this);
os << ')' << ' ';
ctx->blockStmt()->accept(this);
os << '\n';
return 0;
}
virtual std::any visitFuncType(SysYParser::FuncTypeContext *ctx) override {
os << ctx->getText();
return 0;
}
virtual std::any
visitFuncFParams(SysYParser::FuncFParamsContext *ctx) override {
interleave(ctx->funcFParam(), ", ");
return 0;
}
virtual std::any
visitFuncFParam(SysYParser::FuncFParamContext *ctx) override {
ctx->btype()->accept(this);
os << ' ' << ctx->ID()->getText();
if (not ctx->LBRACKET().empty()) {
os << '[';
auto exp = ctx->exp();
if (not exp.empty()) {
os << '[';
interleave(exp, "][") << ']';
}
}
return 0;
}
virtual std::any visitBlockStmt(SysYParser::BlockStmtContext *ctx) override {
os << '{' << '\n';
{
Indentor indentor(indent);
auto items = ctx->blockItem();
if (not items.empty())
interleave(items, "");
}
space() << ctx->RBRACE()->getText() << '\n';
return 0;
}
// virtual std::any visitBlockItem(SysYParser::BlockItemContext *ctx)
// override {
// return visitChildren(ctx);
// }
// virtual std::any visitStmt(SysYParser::StmtContext *ctx) override {
// return visitChildren(ctx);
// }
virtual std::any
visitAssignStmt(SysYParser::AssignStmtContext *ctx) override {
space();
ctx->lValue()->accept(this);
os << " = ";
ctx->exp()->accept(this);
os << ';' << '\n';
return 0;
}
virtual std::any visitExpStmt(SysYParser::ExpStmtContext *ctx) override {
space();
ctx->exp()->accept(this);
os << ';' << '\n';
return 0;
}
void wrapBlock(SysYParser::StmtContext *stmt) {
bool isBlock = stmt->blockStmt();
if (isBlock) {
stmt->accept(this);
} else {
os << "{\n";
{
Indentor indentor(indent);
stmt->accept(this);
}
space() << "}\n";
}
};
virtual std::any visitIfStmt(SysYParser::IfStmtContext *ctx) override {
space();
os << ctx->IF()->getText() << " (";
ctx->exp()->accept(this);
os << ") ";
auto stmt = ctx->stmt();
auto ifStmt = stmt[0];
wrapBlock(ifStmt);
if (stmt.size() == 2) {
auto elseStmt = stmt[1];
wrapBlock(elseStmt);
}
return 0;
}
virtual std::any visitWhileStmt(SysYParser::WhileStmtContext *ctx) override {
space();
os << ctx->WHILE()->getText() << " (";
ctx->exp()->accept(this);
os << ") ";
wrapBlock(ctx->stmt());
return 0;
}
virtual std::any visitBreakStmt(SysYParser::BreakStmtContext *ctx) override {
space() << ctx->BREAK()->getText() << ';' << '\n';
return 0;
}
virtual std::any
visitContinueStmt(SysYParser::ContinueStmtContext *ctx) override {
space() << ctx->CONTINUE()->getText() << ';' << '\n';
return 0;
}
virtual std::any
visitReturnStmt(SysYParser::ReturnStmtContext *ctx) override {
space() << ctx->RETURN()->getText();
if (ctx->exp()) {
os << ' ';
ctx->exp()->accept(this);
}
os << ';' << '\n';
return 0;
}
// virtual std::any visitEmptyStmt(SysYParser::EmptyStmtContext *ctx)
// override {
// return visitChildren(ctx);
// }
virtual std::any
visitRelationExp(SysYParser::RelationExpContext *ctx) override {
auto lhs = ctx->exp(0);
auto rhs = ctx->exp(1);
std::string op =
ctx->LT() ? "<" : (ctx->LE() ? "<=" : (ctx->GT() ? ">" : ">="));
lhs->accept(this);
os << ' ' << op << ' ';
rhs->accept(this);
return 0;
}
virtual std::any
visitMultiplicativeExp(SysYParser::MultiplicativeExpContext *ctx) override {
auto lhs = ctx->exp(0);
auto rhs = ctx->exp(1);
std::string op = ctx->MUL() ? "*" : (ctx->DIV() ? "/" : "%");
lhs->accept(this);
os << ' ' << op << ' ';
rhs->accept(this);
return 0;
}
// virtual std::any visitLValueExp(SysYParser::LValueExpContext *ctx)
// override {
// return visitChildren(ctx);
// }
// virtual std::any visitNumberExp(SysYParser::NumberExpContext *ctx)
// override {
// return visitChildren(ctx);
// }
virtual std::any visitAndExp(SysYParser::AndExpContext *ctx) override {
ctx->exp(0)->accept(this);
os << " && ";
ctx->exp(1)->accept(this);
return 0;
}
virtual std::any visitUnaryExp(SysYParser::UnaryExpContext *ctx) override {
std::string op = ctx->ADD() ? "+" : (ctx->SUB() ? "-" : "!");
os << op;
ctx->exp()->accept(this);
return 0;
}
virtual std::any visitParenExp(SysYParser::ParenExpContext *ctx) override {
os << '(';
ctx->exp()->accept(this);
os << ')';
return 0;
}
virtual std::any visitStringExp(SysYParser::StringExpContext *ctx) override {
return visitChildren(ctx);
}
virtual std::any visitOrExp(SysYParser::OrExpContext *ctx) override {
ctx->exp(0)->accept(this);
os << " || ";
ctx->exp(1)->accept(this);
return 0;
}
// virtual std::any visitCallExp(SysYParser::CallExpContext *ctx) override {
// return visitChildren(ctx);
// }
virtual std::any
visitAdditiveExp(SysYParser::AdditiveExpContext *ctx) override {
auto lhs = ctx->exp(0);
auto rhs = ctx->exp(1);
std::string op = ctx->ADD() ? "+" : "-";
lhs->accept(this);
os << ' ' << op << ' ';
rhs->accept(this);
return 0;
}
virtual std::any visitEqualExp(SysYParser::EqualExpContext *ctx) override {
auto lhs = ctx->exp(0);
auto rhs = ctx->exp(1);
std::string op = ctx->EQ() ? "==" : "!=";
lhs->accept(this);
os << ' ' << op << ' ';
rhs->accept(this);
return 0;
}
virtual std::any visitCall(SysYParser::CallContext *ctx) override {
os << ctx->ID()->getText() << '(';
if (ctx->funcRParams())
ctx->funcRParams()->accept(this);
os << ')';
return 0;
}
virtual std::any visitLValue(SysYParser::LValueContext *ctx) override {
os << ctx->ID()->getText();
auto exp = ctx->exp();
if (not exp.empty()) {
os << '[';
interleave(exp, "][") << ']';
}
return 0;
}
virtual std::any visitNumber(SysYParser::NumberContext *ctx) override {
os << ctx->getText();
return 0;
}
virtual std::any visitString(SysYParser::StringContext *ctx) override {
os << ctx->getText();
return 0;
}
virtual std::any
visitFuncRParams(SysYParser::FuncRParamsContext *ctx) override {
interleave(ctx->exp(), ", ");
return 0;
}
};
} // namespace sysy

30
src/SysYIRGenerator.cpp Normal file
View File

@@ -0,0 +1,30 @@
#include "IR.h"
#include <any>
#include <memory>
using namespace std;
#include "SysYIRGenerator.h"
namespace sysy {
any SysYIRGenerator::visitModule(SysYParser::ModuleContext *ctx) {
// create the IR module
auto pModule = new Module();
assert(pModule);
module.reset(pModule);
// generates globals and functions
visitChildren(ctx);
// return the IR module
return pModule;
}
std::any
SysYIRGenerator::visitFuncRParams(SysYParser::FuncRParamsContext *ctx) {
return visitChildren(ctx);
}
std::any SysYIRGenerator::visitNumber(SysYParser::NumberContext *ctx) {
return visitChildren(ctx);
}
std::any SysYIRGenerator::visitString(SysYParser::StringContext *ctx) {
return visitChildren(ctx);
}
} // namespace sysy

29
src/SysYIRGenerator.h Normal file
View File

@@ -0,0 +1,29 @@
#pragma once
#include "IR.h"
#include "IRBuilder.h"
#include "SysYBaseVisitor.h"
#include "SysYParser.h"
#include <memory>
namespace sysy {
class SysYIRGenerator : public SysYBaseVisitor {
private:
std::unique_ptr<Module> module;
IRBuilder builder;
public:
SysYIRGenerator() = default;
public:
Module *get() const { return module.get(); }
public:
std::any visitModule(SysYParser::ModuleContext *ctx) override;
std::any visitFuncRParams(SysYParser::FuncRParamsContext *ctx) override;
std::any visitNumber(SysYParser::NumberContext *ctx) override;
std::any visitString(SysYParser::StringContext *ctx) override;
}; // class SysYIRGenerator
} // namespace sysy

63
src/range.h Normal file
View File

@@ -0,0 +1,63 @@
#pragma once
#include <iterator>
namespace sysy {
/*!
* \defgroup utility Utilities
* @{
*/
/*!
* \brief `range` is an simple wrapper of an iterator pair [begin, end)
*
* Example usage
*
* ```cpp
* vector<int> v = {1,2,3};
* auto rg = make_range(v);
* for (auto v : rg)
* cout << v << '\n';
* ```
*/
template <typename IterT> struct range {
using iterator = IterT;
using value_type = typename std::iterator_traits<iterator>::value_type;
using reference = typename std::iterator_traits<iterator>::reference;
private:
iterator b;
iterator e;
public:
explicit range(iterator b, iterator e) : b(b), e(e) {}
iterator begin() { return b; }
iterator end() { return e; }
iterator begin() const { return b; }
iterator end() const { return e; }
auto size() const { return std::distance(b, e); }
auto empty() const { return b == e; }
};
//! create `range` object from iterator pair [begin, end)
template <typename IterT> range<IterT> make_range(IterT b, IterT e) {
return range(b, e);
}
//! create `range` object from a container who has `begin()` and `end()` methods
template <typename ContainerT>
range<typename ContainerT::iterator> make_range(ContainerT &c) {
return make_range(c.begin(), c.end());
}
//! create `range` object from a container who has `begin()` and `end()` methods
template <typename ContainerT>
range<typename ContainerT::const_iterator> make_range(const ContainerT &c) {
return make_range(c.begin(), c.end());
}
/*!
* @}
*/
} // namespace sysy

96
src/sysyc.cpp Normal file
View File

@@ -0,0 +1,96 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <unistd.h>
using namespace std;
#include "SysYLexer.h"
#include "SysYParser.h"
using namespace antlr4;
#include "ASTPrinter.h"
#include "Backend.h"
#include "SysYIRGenerator.h"
using namespace sysy;
static string argStopAfter;
static string argInputFile;
static bool argFormat = false;
void usage(int code = EXIT_FAILURE) {
const char *msg = "Usage: sysyc [options] inputfile\n\n"
"Supported options:\n"
" -h \tprint help message and exit\n";
" -f \tpretty-format the input file\n";
" -s {ast,ir,asm}\tstop after generating AST/IR/Assembly\n";
cerr << msg;
exit(code);
}
void parseArgs(int argc, char **argv) {
const char *optstr = "hfs:";
int opt = 0;
while ((opt = getopt(argc, argv, optstr)) != -1) {
switch (opt) {
case 'h':
usage(EXIT_SUCCESS);
break;
case 'f':
argFormat = true;
break;
case 's':
argStopAfter = optarg;
break;
default: /* '?' */
usage();
}
}
if (optind >= argc)
usage();
argInputFile = argv[optind];
}
int main(int argc, char **argv) {
parseArgs(argc, argv);
// open the input file
ifstream fin(argInputFile);
if (not fin) {
cerr << "Failed to open file " << argv[1];
return EXIT_FAILURE;
}
// parse sysy source to AST
ANTLRInputStream input(fin);
SysYLexer lexer(&input);
CommonTokenStream tokens(&lexer);
SysYParser parser(&tokens);
auto moduleAST = parser.module();
if (argStopAfter == "ast") {
cout << moduleAST->toStringTree(true) << '\n';
return EXIT_SUCCESS;
}
// pretty format the input file
if (argFormat) {
ASTPrinter printer;
printer.visitModule(moduleAST);
return EXIT_SUCCESS;
}
// visit AST to generate IR
SysYIRGenerator generator;
generator.visitModule(moduleAST);
auto moduleIR = generator.get();
if (argStopAfter == "ir") {
moduleIR->print(cout);
return EXIT_SUCCESS;
}
// generate assembly
CodeGen codegen(moduleIR);
string asmCode = codegen.code_gen();
cout << asmCode << endl;
if (argStopAfter == "asm")
return EXIT_SUCCESS;
return EXIT_SUCCESS;
}