diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..c386b0d --- /dev/null +++ b/TODO.md @@ -0,0 +1,86 @@ +要打通从SysY到RISC-V的完整编译流程,以下是必须实现的核心模块和关键步骤(按编译流程顺序)。在你们当前IR生成阶段,可以优先实现这些基础模块来快速获得可工作的RISC-V汇编输出: + +### 1. **前端必须模块** +- **词法/语法分析**(已完成): + - `SysYLexer`/`SysYParser`:ANTLR生成的解析器 +- **IR生成核心**: + - `SysYIRGenerator`:将AST转换为中间表示(IR) + - `IRBuilder`:构建指令和基本块的工具类(你们正在实现的部分) + +### 2. **中端必要优化(最小集合)** +| 优化阶段 | 关键作用 | 是否必须 | +|-------------------|----------------------------------|----------| +| `Mem2Reg` | 消除冗余内存访问,转换为SSA形式 | ✅ 核心 | +| `DCE` (死代码消除) | 移除无用指令 | ✅ 必要 | +| `DFE` (死函数消除) | 移除未使用的函数 | ✅ 必要 | +| `FuncAnalysis` | 函数调用关系分析 | ✅ 基础 | +| `Global2Local` | 全局变量降级为局部变量 | ✅ 重要 | + +### 3. **后端核心流程(必须实现)** +```mermaid +graph LR + A[IR指令选择] --> B[寄存器分配] + B --> C[指令调度] + C --> D[汇编生成] +``` + +1. **指令选择**(关键步骤): + - `DAGBuilder`:将IR转换为有向无环图(DAG) + - `DAGCoverage`:DAG到目标指令的映射 + - `Mid2End`:IR到机器指令的转换接口 + +2. **寄存器分配**: + - `RegisterAlloc`:基础寄存器分配器(可先实现简单算法如线性扫描) + +3. **汇编生成**: + - `RiscvPrinter`:将机器指令输出为RISC-V汇编 + - 实现基础指令集:`add`/`sub`/`lw`/`sw`/`beq`/`jal`等 + +### 4. **最小可工作流程** +```cpp +// 精简版编译流程(跳过复杂优化) +int main() { + // 1. 前端解析 + auto module = sysy::SysYIRGenerator().genIR(input); + + // 2. 关键中端优化 + sysy::Mem2Reg(module).run(); // 必须 + sysy::Global2Local(module).run(); // 必须 + sysy::DCE(module).run(); // 推荐 + + // 3. 后端代码生成 + auto backendModule = mid2end::CodeGenerater().run(module); + riscv::RiscvPrinter().print("output.s", backendModule); +} +``` + +### 5. **当前开发优先级建议** +1. **完成IR生成**: + - 确保能构建基本块、函数、算术/内存/控制流指令 + - 实现`createCall`/`createLoad`/`createStore`等核心方法 + +2. **实现Mem2Reg**: + - 插入Phi节点 + - 变量重命名(关键算法) + +3. **构建基础后端**: + - 指令选择:实现IR到RISC-V的简单映射(例如:`IRAdd` → `add`) + - 寄存器分配:使用无限寄存器方案(后期替换为真实分配) + - 汇编打印:支持基础指令输出 + +> **注意**:循环优化、函数内联、高级寄存器分配等可在基础流程打通后逐步添加。初期可跳过复杂优化。 + +### 6. 调试建议 +- 添加IR打印模块(`SysYPrinter`)验证前端输出 +- 使用简化测试用例: + ```c + int main() { + int a = 1; + int b = a + 2; + return b; + } + ``` +- 逐步扩展支持: + 1. 算术运算 → 2. 条件分支 → 3. 函数调用 → 4. 数组访问 + +通过聚焦这些核心模块,你们可以快速打通从SysY到RISC-V的基础编译流程,后续再逐步添加优化传递提升代码质量。 \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d130bd8..7e8572c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,7 +20,7 @@ add_executable(sysyc # LLVMIRGenerator.cpp LLVMIRGenerator_1.cpp ) -target_include_directories(sysyc PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(sysyc PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/include) target_compile_options(sysyc PRIVATE -frtti) target_link_libraries(sysyc PRIVATE SysYParser) diff --git a/src/IR.cpp b/src/IR.cpp index f961d53..1564415 100644 --- a/src/IR.cpp +++ b/src/IR.cpp @@ -1,586 +1,310 @@ +#pragma once + #include "IR.h" -#include "range.h" -#include #include -#include -#include -#include -#include -#include #include -#include -#include +#include #include -#include #include -#include -#include -using namespace std; namespace sysy { -template -ostream &interleave(ostream &os, const T &container, const string sep = ", ") { - auto b = container.begin(), e = container.end(); - if (b == e) - return os; - os << *b; - for (b = next(b); b != e; b = next(b)) - os << sep << *b; - return os; -} -static inline ostream &printVarName(ostream &os, const Value *var) { - return os << (dyncast(var) ? '@' : '%') - << var->getName(); -} -static inline ostream &printBlockName(ostream &os, const BasicBlock *block) { - return os << '^' << block->getName(); -} -static inline ostream &printFunctionName(ostream &os, const Function *fn) { - return os << '@' << fn->getName(); -} -static inline ostream &printOperand(ostream &os, const Value *value) { - auto constant = dyncast(value); - if (constant) { - constant->print(os); - return os; - } - return printVarName(os, value); -} -//===----------------------------------------------------------------------===// -// Types -//===----------------------------------------------------------------------===// +class IRBuilder { +private: + unsigned labelIndex; ///< 基本块标签编号 + unsigned tmpIndex; ///< 临时变量编号 -Type *Type::getIntType() { - static Type intType(kInt); - return &intType; -} + BasicBlock *block; ///< 当前基本块 + BasicBlock::iterator position; ///< 当前基本块指令列表位置的迭代器 -Type *Type::getFloatType() { - static Type floatType(kFloat); - return &floatType; -} + std::vector trueBlocks; ///< true分支基本块列表 + std::vector falseBlocks; ///< false分支基本块列表 -Type *Type::getVoidType() { - static Type voidType(kVoid); - return &voidType; -} + std::vector breakBlocks; ///< break目标块列表 + std::vector continueBlocks; ///< continue目标块列表 -Type *Type::getLabelType() { - static Type labelType(kLabel); - return &labelType; -} +public: + IRBuilder() : labelIndex(0), tmpIndex(0), block(nullptr) {} + explicit IRBuilder(BasicBlock *block) : labelIndex(0), tmpIndex(0), block(block), position(block->end()) {} + IRBuilder(BasicBlock *block, BasicBlock::iterator position) + : labelIndex(0), tmpIndex(0), block(block), position(position) {} -Type *Type::getPointerType(Type *baseType) { - // forward to PointerType - return PointerType::get(baseType); -} - -Type *Type::getFunctionType(Type *returnType, - const vector ¶mTypes) { - // forward to FunctionType - return FunctionType::get(returnType, paramTypes); -} - -Type *Type::getArrayType(Type *elementType, const vector &dims) { - // forward to ArrayType - return ArrayType::get(elementType, dims); -} - -ArrayType* Type::asArrayType() const { - return isArray() ? dynamic_cast(const_cast(this)) : nullptr; -} - -int Type::getSize() const { - switch (kind) { - case kInt: - case kFloat: - return 4; - case kLabel: - case kPointer: - case kFunction: - return 8; - case kVoid: - return 0; - case kArray: - return asArrayType()->getArraySize(); - } - return 0; -} - -void Type::print(ostream &os) const { - auto kind = getKind(); - switch (kind) { - case kInt: - os << "int"; - break; - case kFloat: - os << "float"; - break; - case kVoid: - os << "void"; - break; - case kPointer: - static_cast(this)->getBaseType()->print(os); - os << "*"; - break; - case kFunction: - static_cast(this)->getReturnType()->print(os); - os << "("; - interleave(os, static_cast(this)->getParamTypes()); - os << ")"; - break; - case kLabel: - default: - cerr << "Unexpected type!\n"; - break; - } -} - -PointerType *PointerType::get(Type *baseType) { - static std::map> pointerTypes; - auto iter = pointerTypes.find(baseType); - if (iter != pointerTypes.end()) - return iter->second.get(); - auto type = new PointerType(baseType); - assert(type); - auto result = pointerTypes.emplace(baseType, type); - return result.first->second.get(); -} - -FunctionType *FunctionType::get(Type *returnType, - const std::vector ¶mTypes) { - static std::set> functionTypes; - auto iter = - std::find_if(functionTypes.begin(), functionTypes.end(), - [&](const std::unique_ptr &type) -> bool { - if (returnType != type->getReturnType() or - paramTypes.size() != type->getParamTypes().size()) - return false; - return std::equal(paramTypes.begin(), paramTypes.end(), - type->getParamTypes().begin()); - }); - if (iter != functionTypes.end()) - return iter->get(); - auto type = new FunctionType(returnType, paramTypes); - assert(type); - auto result = functionTypes.emplace(type); - return result.first->get(); -} - -void Value::replaceAllUsesWith(Value *value) { - for (auto &use : uses) - use->getUser()->setOperand(use->getIndex(), value); - uses.clear(); -} - -bool Value::isConstant() const { - if (dyncast(this)) - return true; - if (dyncast(this) or - dyncast(this)) - return true; - // if (auto array = dyncast(this)) { - // auto elements = array->getValues(); - // return all_of(elements.begin(), elements.end(), - // [](Value *v) -> bool { return v->isConstant(); }); - // } - return false; -} - - -// 定义静态常量池 -std::unordered_map ConstantValue::constantPool; - -// 常量池实现 -ConstantValue* ConstantValue::get(Type* type, int32_t value) { - ConstantValueKey key = {type, ConstantValVariant(value)}; +public: + unsigned getLabelIndex() { return labelIndex++; } + unsigned getTmpIndex() { return tmpIndex++; } - if (auto it = constantPool.find(key); it != constantPool.end()) { - return it->second; - } + BasicBlock *getBasicBlock() const { return block; } + BasicBlock::iterator getPosition() const { return position; } - ConstantValue* constant = new ConstantInt(type, value); - constantPool[key] = constant; - return constant; -} - -ConstantValue* ConstantValue::get(Type* type, float value) { - ConstantValueKey key = {type, ConstantValVariant(value)}; - - if (auto it = constantPool.find(key); it != constantPool.end()) { - return it->second; + void setPosition(BasicBlock *block, BasicBlock::iterator position) { + this->block = block; + this->position = position; } - - ConstantValue* constant = new ConstantFloat(type, value); - constantPool[key] = constant; - return constant; -} + void setPosition(BasicBlock::iterator position) { this->position = position; } -ConstantValue* ConstantValue::getInt32(int32_t value) { - return get(Type::getIntType(), value); -} - -ConstantValue* ConstantValue::getFloat32(float value) { - return get(Type::getFloatType(), value); -} - -ConstantValue* ConstantValue::getTrue() { - return get(Type::getIntType(), 1); -} - -ConstantValue* ConstantValue::getFalse() { - return get(Type::getIntType(), 0); -} - - - -void ConstantValue::print(std::ostream &os) const { - // 根据类型调用相应的打印实现 - if (auto intConst = dynamic_cast(this)) { - intConst->print(os); - } - else if (auto floatConst = dynamic_cast(this)) { - floatConst->print(os); + // 控制流管理函数 + BasicBlock *getBreakBlock() const { return breakBlocks.back(); } + BasicBlock *popBreakBlock() { + auto result = breakBlocks.back(); + breakBlocks.pop_back(); + return result; } - else { - os << "???"; // 未知常量类型 + BasicBlock *getContinueBlock() const { return continueBlocks.back(); } + BasicBlock *popContinueBlock() { + auto result = continueBlocks.back(); + continueBlocks.pop_back(); + return result; } -} - -void ConstantInt::print(std::ostream &os) const { - os << value; -} -void ConstantFloat::print(std::ostream &os) const { - if (value == static_cast(value)) { - os << value << ".0"; // 确保输出带小数点 - } else { - os << std::fixed << std::setprecision(6) << value; + BasicBlock *getTrueBlock() const { return trueBlocks.back(); } + BasicBlock *getFalseBlock() const { return falseBlocks.back(); } + BasicBlock *popTrueBlock() { + auto result = trueBlocks.back(); + trueBlocks.pop_back(); + return result; } -} + BasicBlock *popFalseBlock() { + auto result = falseBlocks.back(); + falseBlocks.pop_back(); + return result; + } + void pushBreakBlock(BasicBlock *block) { breakBlocks.push_back(block); } + void pushContinueBlock(BasicBlock *block) { continueBlocks.push_back(block); } + void pushTrueBlock(BasicBlock *block) { trueBlocks.push_back(block); } + void pushFalseBlock(BasicBlock *block) { falseBlocks.push_back(block); } -Argument::Argument(Type *type, BasicBlock *block, int index, - const std::string &name) - : Value(kArgument, type, name), block(block), index(index) { - if (not hasName()) - setName(to_string(block->getParent()->allocateVariableID())); -} +public: + // 指令创建函数 + Instruction *insertInst(Instruction *inst) { + assert(inst); + block->getInstructions().emplace(position, inst); + return inst; + } -void Argument::print(std::ostream &os) const { - assert(hasName()); - printVarName(os, this) << ": " << *getType(); -} + UnaryInst *createUnaryInst(Instruction::Kind kind, Type *type, Value *operand, + const std::string &name = "") { + auto inst = new UnaryInst(kind, type, operand, block, name); + return static_cast(insertInst(inst)); + } -BasicBlock::BasicBlock(Function *parent, const std::string &name) - : Value(kBasicBlock, Type::getLabelType(), name), parent(parent), - instructions(), arguments(), successors(), predecessors() { - if (not hasName()) - setName("bb" + to_string(getParent()->allocateblockID())); -} + UnaryInst *createNegInst(Value *operand, const std::string &name = "") { + return createUnaryInst(Instruction::kNeg, Type::getIntType(), operand, name); + } -void BasicBlock::print(std::ostream &os) const { - assert(hasName()); - os << " "; - printBlockName(os, this); - auto args = getArguments(); - auto b = args.begin(), e = args.end(); - if (b != e) { - os << '('; - printVarName(os, b->get()) << ": " << *b->get()->getType(); - for (auto &arg : make_range(std::next(b), e)) { - os << ", "; - printVarName(os, arg.get()) << ": " << *arg->getType(); + UnaryInst *createNotInst(Value *operand, const std::string &name = "") { + return createUnaryInst(Instruction::kNot, Type::getIntType(), operand, name); + } + + UnaryInst *createFtoIInst(Value *operand, const std::string &name = "") { + return createUnaryInst(Instruction::kFtoI, Type::getIntType(), operand, name); + } + + UnaryInst *createBitFtoIInst(Value *operand, const std::string &name = "") { + return createUnaryInst(Instruction::kBitFtoI, Type::getIntType(), operand, name); + } + + UnaryInst *createFNegInst(Value *operand, const std::string &name = "") { + return createUnaryInst(Instruction::kFNeg, Type::getFloatType(), operand, name); + } + + UnaryInst *createFNotInst(Value *operand, const std::string &name = "") { + return createUnaryInst(Instruction::kFNot, Type::getIntType(), operand, name); + } + + UnaryInst *createIToFInst(Value *operand, const std::string &name = "") { + return createUnaryInst(Instruction::kItoF, Type::getFloatType(), operand, name); + } + + UnaryInst *createBitItoFInst(Value *operand, const std::string &name = "") { + return createUnaryInst(Instruction::kBitItoF, Type::getFloatType(), operand, name); + } + + BinaryInst *createBinaryInst(Instruction::Kind kind, Type *type, Value *lhs, + Value *rhs, const std::string &name = "") { + auto inst = new BinaryInst(kind, type, lhs, rhs, block, name); + return static_cast(insertInst(inst)); + } + + BinaryInst *createAddInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kAdd, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createSubInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kSub, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createMulInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kMul, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createDivInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kDiv, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createRemInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kRem, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createICmpEQInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kICmpEQ, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createICmpNEInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kICmpNE, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createICmpLTInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kICmpLT, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createICmpLEInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kICmpLE, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createICmpGTInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kICmpGT, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createICmpGEInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kICmpGE, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createFAddInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFAdd, Type::getFloatType(), lhs, rhs, name); + } + + BinaryInst *createFSubInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFSub, Type::getFloatType(), lhs, rhs, name); + } + + BinaryInst *createFMulInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFMul, Type::getFloatType(), lhs, rhs, name); + } + + BinaryInst *createFDivInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFDiv, Type::getFloatType(), lhs, rhs, name); + } + + BinaryInst *createFCmpEQInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFCmpEQ, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createFCmpNEInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFCmpNE, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createFCmpLTInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFCmpLT, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createFCmpLEInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFCmpLE, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createFCmpGTInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFCmpGT, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createFCmpGEInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kFCmpGE, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createAndInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kAnd, Type::getIntType(), lhs, rhs, name); + } + + BinaryInst *createOrInst(Value *lhs, Value *rhs, const std::string &name = "") { + return createBinaryInst(Instruction::kOr, Type::getIntType(), lhs, rhs, name); + } + + CallInst *createCallInst(Function *callee, const std::vector &args, + const std::string &name = "") { + auto inst = new CallInst(callee, args, block, name); + return static_cast(insertInst(inst)); + } + + ReturnInst *createReturnInst(Value *value = nullptr) { + auto inst = new ReturnInst(value, block); + return static_cast(insertInst(inst)); + } + + UncondBrInst *createUncondBrInst(BasicBlock *thenBlock, + const std::vector &args) { + auto inst = new UncondBrInst(thenBlock, args, block); + return static_cast(insertInst(inst)); + } + + CondBrInst *createCondBrInst(Value *condition, BasicBlock *thenBlock, + BasicBlock *elseBlock, + const std::vector &thenArgs, + const std::vector &elseArgs) { + auto inst = new CondBrInst(condition, thenBlock, elseBlock, thenArgs, elseArgs, block); + return static_cast(insertInst(inst)); + } + + AllocaInst *createAllocaInst(Type *type, const std::vector &dims = {}, + const std::string &name = "") { + auto inst = new AllocaInst(type, dims, block, name); + return static_cast(insertInst(inst)); + } + + AllocaInst *createAllocaInstWithoutInsert(Type *type, + const std::vector &dims = {}, + BasicBlock *parent = nullptr, + const std::string &name = "") { + return new AllocaInst(type, dims, parent, name); + } + + LoadInst *createLoadInst(Value *pointer, const std::vector &indices = {}, + const std::string &name = "") { + auto inst = new LoadInst(pointer, indices, block, name); + return static_cast(insertInst(inst)); + } + + LaInst *createLaInst(Value *pointer, const std::vector &indices = {}, + const std::string &name = "") { + auto inst = new LaInst(pointer, indices, block, name); + return static_cast(insertInst(inst)); + } + + GetSubArrayInst *createGetSubArray(LVal *fatherArray, + const std::vector &indices, + const std::string &name = "") { + assert(fatherArray->getLValNumDims() > indices.size()); + std::vector subDims; + auto dims = fatherArray->getLValDims(); + auto iter = std::next(dims.begin(), indices.size()); + while (iter != dims.end()) { + subDims.emplace_back(*iter); + iter++; } - os << ')'; + + auto fatherArrayValue = dynamic_cast(fatherArray); + AllocaInst * childArray = new AllocaInst(fatherArrayValue->getType(), subDims, block); + auto inst = new GetSubArrayInst(fatherArray, childArray, indices, block ,name); + return static_cast(insertInst(inst)); } - os << ":\n"; - for (auto &inst : instructions) { - os << " " << *inst << '\n'; + + MemsetInst *createMemsetInst(Value *pointer, Value *begin, Value *size, + Value *value, const std::string &name = "") { + auto inst = new MemsetInst(pointer, begin, size, value, block, name); + return static_cast(insertInst(inst)); } -} -Instruction::Instruction(Kind kind, Type *type, BasicBlock *parent, - const std::string &name) - : User(kind, type, name), kind(kind), parent(parent) { - if (not type->isVoid() and not hasName()) - setName(to_string(getFunction()->allocateVariableID())); -} - -void CallInst::print(std::ostream &os) const { - if (not getType()->isVoid()) - printVarName(os, this) << " = call "; - printFunctionName(os, getCallee()) << '('; - auto args = getArguments(); - auto b = args.begin(), e = args.end(); - if (b != e) { - printOperand(os, *b); - for (auto arg : make_range(std::next(b), e)) { - os << ", "; - printOperand(os, arg); - } + StoreInst *createStoreInst(Value *value, Value *pointer, + const std::vector &indices = {}, + const std::string &name = "") { + auto inst = new StoreInst(value, pointer, indices, block, name); + return static_cast(insertInst(inst)); } - os << ") : " << *getType(); -} -void UnaryInst::print(std::ostream &os) const { - printVarName(os, this) << " = "; - switch (getKind()) { - case kNeg: - os << "neg"; - break; - case kNot: - os << "not"; - break; - case kFNeg: - os << "fneg"; - break; - case kFtoI: - os << "ftoi"; - break; - case kIToF: - os << "itof"; - break; - default: - assert(false); + PhiInst *createPhiInst(Type *type, Value *lhs, BasicBlock *parent, + const std::string &name = "") { + auto predNum = parent->getNumPredecessors(); + std::vector rhs(predNum, lhs); + auto inst = new PhiInst(type, lhs, rhs, lhs, parent, name); + parent->getInstructions().emplace(parent->begin(), inst); + return inst; } - printOperand(os, getOperand()) << " : " << *getType(); -} +}; -void BinaryInst::print(std::ostream &os) const { - printVarName(os, this) << " = "; - switch (getKind()) { - case kAdd: - os << "add"; - break; - case kSub: - os << "sub"; - break; - case kMul: - os << "mul"; - break; - case kDiv: - os << "div"; - break; - case kRem: - os << "rem"; - break; - case kICmpEQ: - os << "icmpeq"; - break; - case kICmpNE: - os << "icmpne"; - break; - case kICmpLT: - os << "icmplt"; - break; - case kICmpGT: - os << "icmpgt"; - break; - case kICmpLE: - os << "icmple"; - break; - case kICmpGE: - os << "icmpge"; - break; - case kFAdd: - os << "fadd"; - break; - case kFSub: - os << "fsub"; - break; - case kFMul: - os << "fmul"; - break; - case kFDiv: - os << "fdiv"; - break; - case kFRem: - os << "frem"; - break; - case kFCmpEQ: - os << "fcmpeq"; - break; - case kFCmpNE: - os << "fcmpne"; - break; - case kFCmpLT: - os << "fcmplt"; - break; - case kFCmpGT: - os << "fcmpgt"; - break; - case kFCmpLE: - os << "fcmple"; - break; - case kFCmpGE: - os << "fcmpge"; - break; - default: - assert(false); - } - os << ' '; - printOperand(os, getLhs()) << ", "; - printOperand(os, getRhs()) << " : " << *getType(); -} - -void ReturnInst::print(std::ostream &os) const { - os << "return"; - if (auto value = getReturnValue()) { - os << ' '; - printOperand(os, value) << " : " << *value->getType(); - } -} - -void UncondBrInst::print(std::ostream &os) const { - os << "br "; - printBlockName(os, getBlock()); - auto args = getArguments(); - auto b = args.begin(), e = args.end(); - if (b != e) { - os << '('; - printOperand(os, *b); - for (auto arg : make_range(std::next(b), e)) { - os << ", "; - printOperand(os, arg); - } - os << ')'; - } -} - -void CondBrInst::print(std::ostream &os) const { - os << "condbr "; - printOperand(os, getCondition()) << ", "; - printBlockName(os, getThenBlock()); - { - auto args = getThenArguments(); - auto b = args.begin(), e = args.end(); - if (b != e) { - os << '('; - printOperand(os, *b); - for (auto arg : make_range(std::next(b), e)) { - os << ", "; - printOperand(os, arg); - } - os << ')'; - } - } - os << ", "; - printBlockName(os, getElseBlock()); - { - auto args = getElseArguments(); - auto b = args.begin(), e = args.end(); - if (b != e) { - os << '('; - printOperand(os, *b); - for (auto arg : make_range(std::next(b), e)) { - os << ", "; - printOperand(os, arg); - } - os << ')'; - } - } -} - -void AllocaInst::print(std::ostream &os) const { - if (getNumDims()) - cerr << "not implemented yet\n"; - printVarName(os, this) << " = "; - os << "alloca " - << *static_cast(getType())->getBaseType(); - os << " : " << *getType(); -} - -void LoadInst::print(std::ostream &os) const { - if (getNumIndices()) - cerr << "not implemented yet\n"; - printVarName(os, this) << " = "; - os << "load "; - printOperand(os, getPointer()) << " : " << *getType(); -} - -void StoreInst::print(std::ostream &os) const { - if (getNumIndices()) - cerr << "not implemented yet\n"; - os << "store "; - printOperand(os, getValue()) << ", "; - printOperand(os, getPointer()) << " : " << *getValue()->getType(); -} - -void Function::print(std::ostream &os) const { - auto returnType = getReturnType(); - auto paramTypes = getParamTypes(); - os << *returnType << ' '; - printFunctionName(os, this) << '('; - auto b = paramTypes.begin(), e = paramTypes.end(); - if (b != e) { - os << *(*b); - for (auto type : make_range(std::next(b), e)) - os << ", " << *type; - } - os << ") {\n"; - for (auto &bb : getBasicBlocks()) { - os << *bb << '\n'; - } - os << "}"; -} - -void Module::print(std::ostream &os) const { - for (auto &value : children) - os << *value << '\n'; -} - -// ArrayValue *ArrayValue::get(Type *type, const vector &values) { -// static map, unique_ptr> arrayConstants; -// hash hasher; -// auto key = make_pair( -// type, hasher(string(reinterpret_cast(values.data()), -// values.size() * sizeof(Value *)))); - -// auto iter = arrayConstants.find(key); -// if (iter != arrayConstants.end()) -// return iter->second.get(); -// auto constant = new ArrayValue(type, values); -// assert(constant); -// auto result = arrayConstants.emplace(key, constant); -// return result.first->second.get(); -// } - -// ArrayValue *ArrayValue::get(const std::vector &values) { -// vector vals(values.size(), nullptr); -// std::transform(values.begin(), values.end(), vals.begin(), -// [](int v) { return ConstantValue::get(v); }); -// return get(Type::getIntType(), vals); -// } - -// ArrayValue *ArrayValue::get(const std::vector &values) { -// vector vals(values.size(), nullptr); -// std::transform(values.begin(), values.end(), vals.begin(), -// [](float v) { return ConstantValue::get(v); }); -// return get(Type::getFloatType(), vals); -// } - -void User::setOperand(int index, Value *value) { - assert(index < getNumOperands()); - operands[index].setValue(value); -} - -void User::replaceOperand(int index, Value *value) { - assert(index < getNumOperands()); - auto &use = operands[index]; - use.getValue()->removeUse(&use); - use.setValue(value); -} - -CallInst::CallInst(Function *callee, const std::vector &args, - BasicBlock *parent, const std::string &name) - : Instruction(kCall, callee->getReturnType(), parent, name) { - addOperand(callee); - for (auto arg : args) - addOperand(arg); -} - -Function *CallInst::getCallee() const { - return dyncast(getOperand(0)); -} - -} // namespace sysy \ No newline at end of file +} // namespace sysy diff --git a/src/IR.h b/src/IR.h deleted file mode 100644 index 7d228ad..0000000 --- a/src/IR.h +++ /dev/null @@ -1,1120 +0,0 @@ -#pragma once - -#include "range.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace sysy { - -/*! - * \defgroup type Types - * The SysY type system is quite simple. - * 1. The base class `Type` is used to represent all primitive scalar types, - * include `int`, `float`, `void`, and the label type representing branch - * targets. - * 2. `PointerType` and `FunctionType` derive from `Type` and represent pointer - * type and function type, respectively. - * - * NOTE `Type` and its derived classes have their ctors declared as 'protected'. - * Users must use Type::getXXXType() methods to obtain `Type` pointers. - * @{ - */ - -/*! - * `Type` is used to represent all primitive scalar types, - * include `int`, `float`, `void`, and the label type representing branch - * targets - */ - -class ArrayType; - -class Type { -public: - enum Kind { - kInt, - kFloat, - kVoid, - kLabel, - kPointer, - kFunction, - kArray, - }; - Kind kind; - -protected: - Type(Kind kind) : kind(kind) {} - virtual ~Type() = default; - -public: - static Type *getIntType(); - static Type *getFloatType(); - static Type *getVoidType(); - static Type *getLabelType(); - static Type *getPointerType(Type *baseType); - static Type *getFunctionType(Type *returnType, - const std::vector ¶mTypes = {}); - static Type *getArrayType(Type *elementType, const std::vector &dims = {}); - -public: - Kind getKind() const { return kind; } - bool isInt() const { return kind == kInt; } - bool isFloat() const { return kind == kFloat; } - bool isVoid() const { return kind == kVoid; } - bool isLabel() const { return kind == kLabel; } - bool isPointer() const { return kind == kPointer; } - bool isFunction() const { return kind == kFunction; } - bool isArray() const { return kind == kArray; } - bool isIntOrFloat() const { return kind == kInt or kind == kFloat; } - int getSize() const; - - ArrayType* asArrayType() const; - - template - std::enable_if_t, T *> as() const { - return dynamic_cast(const_cast(this)); - } - void print(std::ostream &os) const; -}; // class Type - -//! Pointer type -class PointerType : public Type { -protected: - Type *baseType; - -protected: - PointerType(Type *baseType) : Type(kPointer), baseType(baseType) {} - -public: - static PointerType *get(Type *baseType); - -public: - Type *getBaseType() const { return baseType; } -}; // class PointerType - -//! Function type -class FunctionType : public Type { -private: - Type *returnType; - std::vector paramTypes; - -protected: - FunctionType(Type *returnType, const std::vector ¶mTypes = {}) - : Type(kFunction), returnType(returnType), paramTypes(paramTypes) {} - -public: - static FunctionType *get(Type *returnType, - const std::vector ¶mTypes = {}); - -public: - Type *getReturnType() const { return returnType; } - auto getParamTypes() const { return make_range(paramTypes); } - int getNumParams() const { return paramTypes.size(); } -}; // class FunctionType - -class ArrayType : public Type { -private: - Type *elementType; // 数组元素类型 - std::vector dimensions; // 维度信息(空向量表示未知大小) - -protected: - ArrayType(Type *elemType, const std::vector &dims = {}) - : Type(kArray), elementType(elemType), dimensions(dims) { - // 确保元素类型有效 - assert(elemType && "Array element type cannot be null"); - assert(!elemType->isVoid() && "Cannot have array of void"); - assert(!elemType->isLabel() && "Cannot have array of labels"); - } - -public: - // 获取数组类型(带缓存机制) - static ArrayType *get(Type *elemType, const std::vector &dims = {}) { - // 实现类型缓存池(避免重复创建) - static std::map>, ArrayType*> cache; - - auto key = std::make_pair(elemType, dims); - if (cache.find(key) == cache.end()) { - cache[key] = new ArrayType(elemType, dims); - } - return cache[key]; - } - - Type *getElementType() const { return elementType; } - const std::vector& getDimensions() const { return dimensions; } - size_t getNumDimensions() const { return dimensions.size(); } - - int getArraySize() const { - int size = elementType->getSize(); - for (int dim : dimensions) { - size *= dim; - } - return size; - } - -};//class ArrayType - -/*! - * @} - */ - -/*! - * \defgroup ir IR - * - * The SysY IR is an instruction level language. The IR is orgnized - * as a four-level tree structure, as shown below - * - * \dotfile ir-4level.dot IR Structure - * - * - `Module` corresponds to the top level "CompUnit" syntax structure - * - `GlobalValue` corresponds to the "Decl" syntax structure - * - `Function` corresponds to the "FuncDef" syntax structure - * - `BasicBlock` is a sequence of instructions without branching. A `Function` - * made up by one or more `BasicBlock`s. - * - `Instruction` represents a primitive operation on values, e.g., add or sub. - * - * The fundamental data concept in SysY IR is `Value`. A `Value` is like - * a register and is used by `Instruction`s as input/output operand. Each value - * has an associated `Type` indicating the data type held by the value. - * - * Most `Instruction`s have a three-address signature, i.e., there are at most 2 - * input values and at most 1 output value. - * - * The SysY IR adots a Static-Single-Assignment (SSA) design. That is, `Value` - * is defined (as the output operand ) by some instruction, and used (as the - * input operand) by other instructions. While a value can be used by multiple - * instructions, the `definition` occurs only once. As a result, there is a - * one-to-one relation between a value and the instruction defining it. In other - * words, any instruction defines a value can be viewed as the defined value - * itself. So `Instruction` is also a `Value` in SysY IR. See `Value` for the - * type hierachy. - * - * @{ - */ - -class User; -class Value; - -//! `Use` represents the relation between a `Value` and its `User` -class Use { -private: - //! the position of value in the user's operands, i.e., - //! user->getOperands[index] == value - int index; - User *user; - Value *value; - -public: - Use() = default; - Use(int index, User *user, Value *value) - : index(index), user(user), value(value) {} - -public: - int getIndex() const { return index; } - User *getUser() const { return user; } - Value *getValue() const { return value; } - void setValue(Value *value) { value = value; } -}; // class Use - -template -inline std::enable_if_t, bool> -isa(const Value *value) { - return T::classof(value); -} - -template -inline std::enable_if_t, T *> -dyncast(Value *value) { - return isa(value) ? static_cast(value) : nullptr; -} - -template -inline std::enable_if_t, const T *> -dyncast(const Value *value) { - return isa(value) ? static_cast(value) : nullptr; -} - -//! The base class of all value types -class Value { -public: - enum Kind : uint64_t { - kInvalid, - // Instructions - // Binary - kAdd = 0x1UL << 0, - kSub = 0x1UL << 1, - kMul = 0x1UL << 2, - kDiv = 0x1UL << 3, - kRem = 0x1UL << 4, - kICmpEQ = 0x1UL << 5, - kICmpNE = 0x1UL << 6, - kICmpLT = 0x1UL << 7, - kICmpGT = 0x1UL << 8, - kICmpLE = 0x1UL << 9, - kICmpGE = 0x1UL << 10, - kFAdd = 0x1UL << 14, - kFSub = 0x1UL << 15, - kFMul = 0x1UL << 16, - kFDiv = 0x1UL << 17, - kFRem = 0x1UL << 18, - kFCmpEQ = 0x1UL << 19, - kFCmpNE = 0x1UL << 20, - kFCmpLT = 0x1UL << 21, - kFCmpGT = 0x1UL << 22, - kFCmpLE = 0x1UL << 23, - kFCmpGE = 0x1UL << 24, - // Unary - kNeg = 0x1UL << 25, - kNot = 0x1UL << 26, - kFNeg = 0x1UL << 27, - kFtoI = 0x1UL << 28, - kIToF = 0x1UL << 29, - // call - kCall = 0x1UL << 30, - // terminator - kCondBr = 0x1UL << 31, - kBr = 0x1UL << 32, - kReturn = 0x1UL << 33, - // mem op - kAlloca = 0x1UL << 34, - kLoad = 0x1UL << 35, - kStore = 0x1UL << 36, - kFirstInst = kAdd, - kLastInst = kStore, - // others - kArgument = 0x1UL << 37, - kBasicBlock = 0x1UL << 38, - kFunction = 0x1UL << 39, - kConstant = 0x1UL << 40, - kGlobal = 0x1UL << 41, - }; - -protected: - Kind kind; - Type *type; - std::string name; - std::list uses; - -protected: - Value(Kind kind, Type *type, const std::string &name = "") - : kind(kind), type(type), name(name), uses() {} - -public: - virtual ~Value() = default; - -public: - Kind getKind() const { return kind; } - static bool classof(const Value *) { return true; } - -public: - Type *getType() const { return type; } - const std::string &getName() const { return name; } - void setName(const std::string &n) { name = n; } - bool hasName() const { return not name.empty(); } - bool isInt() const { return type->isInt(); } - bool isFloat() const { return type->isFloat(); } - bool isPointer() const { return type->isPointer(); } - const std::list &getUses() { return uses; } - void addUse(Use *use) { uses.push_back(use); } - void replaceAllUsesWith(Value *value); - void removeUse(Use *use) { uses.remove(use); } - bool isConstant() const; - -public: - virtual void print(std::ostream &os) const {}; -}; // class Value - -/*! - * Static constants known at compile time. - * - * `ConstantValue`s are not defined by instructions, and do not use any other - * `Value`s. It's type is either `int` or `float`. - */ - -class ConstantInt; -class ConstantFloat; -//常量池优化 - -using ConstantValVariant = std::variant; -using ConstantValueKey = std::pair; - -class ConstantValue : public Value { -protected: - ConstantValue(Type* type) - : Value(kConstant, type, "") {} -public: - struct ConstantValueHash; - struct ConstantValueEqual; - - static std::unordered_map constantPool; - - virtual ~ConstantValue() = default; - - static ConstantValue* get(Type* type, int32_t value); - static ConstantValue* get(Type* type, float value); - - static bool classof(const Value* value) { - return value->getKind() == kConstant; - } - - virtual int32_t getInt() const = 0; - virtual float getFloat() const = 0; - virtual bool isZero() const = 0; - virtual bool isOne() const = 0; - - - static ConstantValue* getInt32(int32_t value); - static ConstantValue* getFloat32(float value); - static ConstantValue* getTrue() ; - static ConstantValue* getFalse(); - - void print(std::ostream &os) const override; -}; - -struct ConstantValue::ConstantValueHash { - std::size_t operator()(const ConstantValueKey& key) const { - std::size_t typeHash = std::hash{}(key.first); - std::size_t valHash = 0; - if (key.first->isInt()) { - valHash = std::hash{}(std::get(key.second)); - } else if (key.first->isFloat()) { - // 修复5: 确保float哈希正确 - valHash = std::hash{}(std::get(key.second)); - } - return typeHash ^ (valHash << 1); - } -}; - -struct ConstantValue::ConstantValueEqual { - bool operator()(const ConstantValueKey& lhs, const ConstantValueKey& rhs) const { - if (lhs.first != rhs.first) return false; - if (lhs.first->isInt()) { - return std::get(lhs.second) == std::get(rhs.second); - } else if (lhs.first->isFloat()) { - // 修复6: 使用浮点比较容差 - const float eps = 1e-6; - return fabs(std::get(lhs.second) - std::get(rhs.second)) < eps; - } - return false; - } -}; - -class ConstantInt : public ConstantValue { - int32_t value; - friend class ConstantValue; - -protected: - ConstantInt(Type* type, int32_t value) - : ConstantValue(type), value(value) { - assert(type->isInt() && "Invalid type for ConstantInt"); - } -public: - static ConstantInt* get(Type* type, int32_t value); - - int32_t getInt() const override { return value; } - float getFloat() const override { return static_cast(value); } - bool isZero() const override { return value == 0; } - bool isOne() const override { return value == 1; } - - void print(std::ostream& os) const override ; -}; - -class ConstantFloat : public ConstantValue { - float value; - friend class ConstantValue; - -protected: - ConstantFloat(Type* type, float value) - : ConstantValue(type), value(value) { - assert(type->isFloat() && "Invalid type for ConstantFloat"); - } -public: - static ConstantFloat* get(Type* type, float value); - - int32_t getInt() const override { return static_cast(value); } - float getFloat() const override { return value; } - bool isZero() const override { return value == 0.0f; } - bool isOne() const override { return value == 1.0f; } - - void print(std::ostream& os) const override; -}; - -class BasicBlock; -/*! - * Arguments of `BasicBlock`s. - * - * SysY IR is an SSA language, however, it does not use PHI instructions as in - * LLVM IR. `Value`s from different predecessor blocks are passed explicitly as - * block arguments. This is also the approach used by MLIR. - * NOTE that `Function` does not own `Argument`s, function arguments are - * implemented as its entry block's arguments. - */ - -class Argument : public Value { -protected: - BasicBlock *block; - int index; - -public: - Argument(Type *type, BasicBlock *block, int index, - const std::string &name = ""); - -public: - static bool classof(const Value *value) { - return value->getKind() == kConstant; - } - -public: - BasicBlock *getParent() const { return block; } - int getIndex() const { return index; } - -public: - void print(std::ostream &os) const override; -}; - -class Instruction; -class Function; -/*! - * The container for `Instruction` sequence. - * - * `BasicBlock` maintains a list of `Instruction`s, with the last one being - * a terminator (branch or return). Besides, `BasicBlock` stores its arguments - * and records its predecessor and successor `BasicBlock`s. - */ -class BasicBlock : public Value { - friend class Function; - -public: - using inst_list = std::list>; - using iterator = inst_list::iterator; - using arg_list = std::vector>; - using block_list = std::vector; - -protected: - Function *parent; - inst_list instructions; - arg_list arguments; - block_list successors; - block_list predecessors; - -protected: - explicit BasicBlock(Function *parent, const std::string &name = ""); - -public: - static bool classof(const Value *value) { - return value->getKind() == kBasicBlock; - } - -public: - int getNumInstructions() const { return instructions.size(); } - int getNumArguments() const { return arguments.size(); } - int getNumPredecessors() const { return predecessors.size(); } - int getNumSuccessors() const { return successors.size(); } - Function *getParent() const { return parent; } - inst_list &getInstructions() { return instructions; } - auto getArguments() const { return make_range(arguments); } - block_list &getPredecessors() { return predecessors; } - block_list &getSuccessors() { return successors; } - iterator begin() { return instructions.begin(); } - iterator end() { return instructions.end(); } - iterator terminator() { return std::prev(end()); } - Argument *createArgument(Type *type, const std::string &name = "") { - auto arg = new Argument(type, this, arguments.size(), name); - assert(arg); - arguments.emplace_back(arg); - return arguments.back().get(); - }; - -public: - void print(std::ostream &os) const override; -}; // class BasicBlock - -//! User is the abstract base type of `Value` types which use other `Value` as -//! operands. Currently, there are two kinds of `User`s, `Instruction` and -//! `GlobalValue`. -class User : public Value { -protected: - std::vector operands; - -protected: - User(Kind kind, Type *type, const std::string &name = "") - : Value(kind, type, name), operands() {} - -public: - using use_iterator = std::vector::const_iterator; - struct operand_iterator : public std::vector::const_iterator { - using Base = std::vector::const_iterator; - operand_iterator(const Base &iter) : Base(iter) {} - using value_type = Value *; - value_type operator->() { return Base::operator*().getValue(); } - value_type operator*() { return Base::operator*().getValue(); } - }; - // struct const_operand_iterator : std::vector::const_iterator { - // using Base = std::vector::const_iterator; - // const_operand_iterator(const Base &iter) : Base(iter) {} - // using value_type = Value *; - // value_type operator->() { return operator*().getValue(); } - // }; - -public: - int getNumOperands() const { return operands.size(); } - operand_iterator operand_begin() const { return operands.begin(); } - operand_iterator operand_end() const { return operands.end(); } - auto getOperands() const { - return make_range(operand_begin(), operand_end()); - } - Value *getOperand(int index) const { return operands[index].getValue(); } - void addOperand(Value *value) { - operands.emplace_back(operands.size(), this, value); - value->addUse(&operands.back()); - } - template void addOperands(const ContainerT &operands) { - for (auto value : operands) - addOperand(value); - } - void replaceOperand(int index, Value *value); - void setOperand(int index, Value *value); -}; // class User - -/*! - * Base of all concrete instruction types. - */ -class Instruction : public User { -public: - // enum Kind : uint64_t { - // kInvalid = 0x0UL, - // // Binary - // kAdd = 0x1UL << 0, - // kSub = 0x1UL << 1, - // kMul = 0x1UL << 2, - // kDiv = 0x1UL << 3, - // kRem = 0x1UL << 4, - // kICmpEQ = 0x1UL << 5, - // kICmpNE = 0x1UL << 6, - // kICmpLT = 0x1UL << 7, - // kICmpGT = 0x1UL << 8, - // kICmpLE = 0x1UL << 9, - // kICmpGE = 0x1UL << 10, - // kFAdd = 0x1UL << 14, - // kFSub = 0x1UL << 15, - // kFMul = 0x1UL << 16, - // kFDiv = 0x1UL << 17, - // kFRem = 0x1UL << 18, - // kFCmpEQ = 0x1UL << 19, - // kFCmpNE = 0x1UL << 20, - // kFCmpLT = 0x1UL << 21, - // kFCmpGT = 0x1UL << 22, - // kFCmpLE = 0x1UL << 23, - // kFCmpGE = 0x1UL << 24, - // // Unary - // kNeg = 0x1UL << 25, - // kNot = 0x1UL << 26, - // kFNeg = 0x1UL << 27, - // kFtoI = 0x1UL << 28, - // kIToF = 0x1UL << 29, - // // call - // kCall = 0x1UL << 30, - // // terminator - // kCondBr = 0x1UL << 31, - // kBr = 0x1UL << 32, - // kReturn = 0x1UL << 33, - // // mem op - // kAlloca = 0x1UL << 34, - // kLoad = 0x1UL << 35, - // kStore = 0x1UL << 36, - // // constant - // // kConstant = 0x1UL << 37, - // }; - -protected: - Kind kind; - BasicBlock *parent; - -protected: - Instruction(Kind kind, Type *type, BasicBlock *parent = nullptr, - const std::string &name = ""); - -public: - static bool classof(const Value *value) { - return value->getKind() >= kFirstInst and value->getKind() <= kLastInst; - } - -public: - Kind getKind() const { return kind; } - BasicBlock *getParent() const { return parent; } - Function *getFunction() const { return parent->getParent(); } - void setParent(BasicBlock *bb) { parent = bb; } - - bool isBinary() const { - static constexpr uint64_t BinaryOpMask = - (kAdd | kSub | kMul | kDiv | kRem) | - (kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE) | - (kFAdd | kFSub | kFMul | kFDiv | kFRem) | - (kFCmpEQ | kFCmpNE | kFCmpLT | kFCmpGT | kFCmpLE | kFCmpGE); - return kind & BinaryOpMask; - } - bool isUnary() const { - static constexpr uint64_t UnaryOpMask = kNeg | kNot | kFNeg | kFtoI | kIToF; - return kind & UnaryOpMask; - } - bool isMemory() const { - static constexpr uint64_t MemoryOpMask = kAlloca | kLoad | kStore; - return kind & MemoryOpMask; - } - bool isTerminator() const { - static constexpr uint64_t TerminatorOpMask = kCondBr | kBr | kReturn; - return kind & TerminatorOpMask; - } - bool isCmp() const { - static constexpr uint64_t CmpOpMask = - (kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE) | - (kFCmpEQ | kFCmpNE | kFCmpLT | kFCmpGT | kFCmpLE | kFCmpGE); - return kind & CmpOpMask; - } - bool isBranch() const { - static constexpr uint64_t BranchOpMask = kBr | kCondBr; - return kind & BranchOpMask; - } - bool isCommutative() const { - static constexpr uint64_t CommutativeOpMask = - kAdd | kMul | kICmpEQ | kICmpNE | kFAdd | kFMul | kFCmpEQ | kFCmpNE; - return kind & CommutativeOpMask; - } - bool isUnconditional() const { return kind == kBr; } - bool isConditional() const { return kind == kCondBr; } -}; // class Instruction - -class Function; -//! Function call. -class CallInst : public Instruction { - friend class IRBuilder; - -protected: - CallInst(Function *callee, const std::vector &args = {}, - BasicBlock *parent = nullptr, const std::string &name = ""); - -public: - static bool classof(const Value *value) { return value->getKind() == kCall; } - -public: - Function *getCallee() const; - auto getArguments() const { - return make_range(std::next(operand_begin()), operand_end()); - } - -public: - void print(std::ostream &os) const override; -}; // class CallInst - -//! Unary instruction, includes '!', '-' and type conversion. -class UnaryInst : public Instruction { - friend class IRBuilder; - -protected: - UnaryInst(Kind kind, Type *type, Value *operand, BasicBlock *parent = nullptr, - const std::string &name = "") - : Instruction(kind, type, parent, name) { - addOperand(operand); - } - -public: - static bool classof(const Value *value) { - return Instruction::classof(value) and - static_cast(value)->isUnary(); - } - -public: - Value *getOperand() const { return User::getOperand(0); } - -public: - void print(std::ostream &os) const override; -}; // class UnaryInst - -//! Binary instruction, e.g., arithmatic, relation, logic, etc. -class BinaryInst : public Instruction { - friend class IRBuilder; - -protected: - BinaryInst(Kind kind, Type *type, Value *lhs, Value *rhs, BasicBlock *parent, - const std::string &name = "") - : Instruction(kind, type, parent, name) { - addOperand(lhs); - addOperand(rhs); - } - -public: - static bool classof(const Value *value) { - return Instruction::classof(value) and - static_cast(value)->isBinary(); - } - -public: - Value *getLhs() const { return getOperand(0); } - Value *getRhs() const { return getOperand(1); } - -public: - void print(std::ostream &os) const override; -}; // class BinaryInst - -//! The return statement -class ReturnInst : public Instruction { - friend class IRBuilder; - -protected: - ReturnInst(Value *value = nullptr, BasicBlock *parent = nullptr) - : Instruction(kReturn, Type::getVoidType(), parent, "") { - if (value) - addOperand(value); - } - -public: - static bool classof(const Value *value) { - return value->getKind() == kReturn; - } - -public: - bool hasReturnValue() const { return not operands.empty(); } - Value *getReturnValue() const { - return hasReturnValue() ? getOperand(0) : nullptr; - } - -public: - void print(std::ostream &os) const override; -}; // class ReturnInst - -//! Unconditional branch -class UncondBrInst : public Instruction { - friend class IRBuilder; - -protected: - UncondBrInst(BasicBlock *block, std::vector args, - BasicBlock *parent = nullptr) - : Instruction(kCondBr, Type::getVoidType(), parent, "") { - assert(block->getNumArguments() == args.size()); - addOperand(block); - addOperands(args); - } - -public: - static bool classof(const Value *value) { return value->getKind() == kBr; } - -public: - BasicBlock *getBlock() const { return dyncast(getOperand(0)); } - auto getArguments() const { - return make_range(std::next(operand_begin()), operand_end()); - } - -public: - void print(std::ostream &os) const override; -}; // class UncondBrInst - -//! Conditional branch -class CondBrInst : public Instruction { - friend class IRBuilder; - -protected: - CondBrInst(Value *condition, BasicBlock *thenBlock, BasicBlock *elseBlock, - const std::vector &thenArgs, - const std::vector &elseArgs, BasicBlock *parent = nullptr) - : Instruction(kCondBr, Type::getVoidType(), parent, "") { - assert(thenBlock->getNumArguments() == thenArgs.size() and - elseBlock->getNumArguments() == elseArgs.size()); - addOperand(condition); - addOperand(thenBlock); - addOperand(elseBlock); - addOperands(thenArgs); - addOperands(elseArgs); - } - -public: - static bool classof(const Value *value) { - return value->getKind() == kCondBr; - } - -public: - Value *getCondition() const { return getOperand(0); } - BasicBlock *getThenBlock() const { - return dyncast(getOperand(1)); - } - BasicBlock *getElseBlock() const { - return dyncast(getOperand(2)); - } - auto getThenArguments() const { - auto begin = std::next(operand_begin(), 3); - auto end = std::next(begin, getThenBlock()->getNumArguments()); - return make_range(begin, end); - } - auto getElseArguments() const { - auto begin = - std::next(operand_begin(), 3 + getThenBlock()->getNumArguments()); - auto end = operand_end(); - return make_range(begin, end); - } - -public: - void print(std::ostream &os) const override; -}; // class CondBrInst - -//! Allocate memory for stack variables, used for non-global variable declartion -class AllocaInst : public Instruction { - friend class IRBuilder; - -protected: - AllocaInst(Type *type, const std::vector &dims = {}, - BasicBlock *parent = nullptr, const std::string &name = "") - : Instruction(kAlloca, type, parent, name) { - addOperands(dims); - } - -public: - static bool classof(const Value *value) { - return value->getKind() == kAlloca; - } - -public: - int getNumDims() const { return getNumOperands(); } - auto getDims() const { return getOperands(); } - Value *getDim(int index) { return getOperand(index); } - -public: - void print(std::ostream &os) const override; -}; // class AllocaInst - -//! Load a value from memory address specified by a pointer value -class LoadInst : public Instruction { - friend class IRBuilder; - -protected: - LoadInst(Value *pointer, const std::vector &indices = {}, - BasicBlock *parent = nullptr, const std::string &name = "") - : Instruction(kLoad, pointer->getType()->as()->getBaseType(), - parent, name) { - addOperand(pointer); - addOperands(indices); - } - -public: - static bool classof(const Value *value) { return value->getKind() == kLoad; } - -public: - int getNumIndices() const { return getNumOperands() - 1; } - Value *getPointer() const { return getOperand(0); } - auto getIndices() const { - return make_range(std::next(operand_begin()), operand_end()); - } - Value *getIndex(int index) const { return getOperand(index + 1); } - -public: - void print(std::ostream &os) const override; -}; // class LoadInst - -//! Store a value to memory address specified by a pointer value -class StoreInst : public Instruction { - friend class IRBuilder; - -protected: - StoreInst(Value *value, Value *pointer, - const std::vector &indices = {}, - BasicBlock *parent = nullptr, const std::string &name = "") - : Instruction(kStore, Type::getVoidType(), parent, name) { - addOperand(value); - addOperand(pointer); - addOperands(indices); - } - -public: - static bool classof(const Value *value) { return value->getKind() == kStore; } - -public: - int getNumIndices() const { return getNumOperands() - 2; } - Value *getValue() const { return getOperand(0); } - Value *getPointer() const { return getOperand(1); } - auto getIndices() const { - return make_range(std::next(operand_begin(), 2), operand_end()); - } - Value *getIndex(int index) const { return getOperand(index + 2); } - -public: - void print(std::ostream &os) const override; -}; // class StoreInst - -class Module; -//! Function definition -class Function : public Value { - friend class Module; - -protected: - Function(Module *parent, Type *type, const std::string &name) - : Value(kFunction, type, name), parent(parent), variableID(0), blocks() { - blocks.emplace_back(new BasicBlock(this, "entry")); - } - -public: - static bool classof(const Value *value) { - return value->getKind() == kFunction; - } - -public: - using block_list = std::list>; - -protected: - Module *parent; - int variableID; - int blockID; - block_list blocks; - -public: - Type *getReturnType() const { - return getType()->as()->getReturnType(); - } - auto getParamTypes() const { - return getType()->as()->getParamTypes(); - } - auto getBasicBlocks() const { return make_range(blocks); } - BasicBlock *getEntryBlock() const { return blocks.front().get(); } - BasicBlock *addBasicBlock(const std::string &name = "") { - blocks.emplace_back(new BasicBlock(this, name)); - return blocks.back().get(); - } - void removeBasicBlock(BasicBlock *block) { - blocks.remove_if([&](std::unique_ptr &b) -> bool { - return block == b.get(); - }); - } - int allocateVariableID() { return variableID++; } - int allocateblockID() { return blockID++; } - -public: - void print(std::ostream &os) const override; -}; // class Function - -// class ArrayValue : public User { -// protected: -// ArrayValue(Type *type, const std::vector &values = {}) -// : User(type, "") { -// addOperands(values); -// } - -// public: -// static ArrayValue *get(Type *type, const std::vector &values); -// static ArrayValue *get(const std::vector &values); -// static ArrayValue *get(const std::vector &values); - -// public: -// auto getValues() const { return getOperands(); } - -// public: -// void print(std::ostream &os) const override{}; -// }; // class ConstantArray - -//! Global value declared at file scope -class GlobalValue : public User { - friend class Module; - -protected: - Module *parent; - bool hasInit; - bool isConst; - -protected: - GlobalValue(Module *parent, Type *type, const std::string &name, - const std::vector &dims = {}, Value *init = nullptr) - : User(kGlobal, type, name), parent(parent), hasInit(init) { - assert(type->isPointer()); - addOperands(dims); - if (init) - addOperand(init); - } - -public: - static bool classof(const Value *value) { - return value->getKind() == kGlobal; - } - -public: - Value *init() const { return hasInit ? operands.back().getValue() : nullptr; } - int getNumDims() const { return getNumOperands() - (hasInit ? 1 : 0); } - Value *getDim(int index) { return getOperand(index); } - -public: - void print(std::ostream &os) const override{}; -}; // class GlobalValue - -//! IR unit for representing a SysY compile unit -class Module { -protected: - std::vector> children; - std::map functions; - std::map globals; - -public: - Module() = default; - -public: - Function *createFunction(const std::string &name, Type *type) { - if (functions.count(name)) - return nullptr; - auto func = new Function(this, type, name); - assert(func); - children.emplace_back(func); - functions.emplace(name, func); - return func; - }; - GlobalValue *createGlobalValue(const std::string &name, Type *type, - const std::vector &dims = {}, - Value *init = nullptr) { - if (globals.count(name)) - return nullptr; - auto global = new GlobalValue(this, type, name, dims, init); - assert(global); - children.emplace_back(global); - globals.emplace(name, global); - return global; - } - Function *getFunction(const std::string &name) const { - auto result = functions.find(name); - if (result == functions.end()) - return nullptr; - return result->second; - } - GlobalValue *getGlobalValue(const std::string &name) const { - auto result = globals.find(name); - if (result == globals.end()) - return nullptr; - return result->second; - } - - std::map *getFunctions(){ - return &functions; - } - std::map *getGlobalValues(){ - return &globals; - } - -public: - void print(std::ostream &os) const; -}; // class Module - -/*! - * @} - */ -inline std::ostream &operator<<(std::ostream &os, const Type &type) { - type.print(os); - return os; -} - -inline std::ostream &operator<<(std::ostream &os, const Value &value) { - value.print(os); - return os; -} - -} // namespace sysy \ No newline at end of file diff --git a/src/SysYIRAnalyser.cpp b/src/SysYIRAnalyser.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp index 1844a53..5a7ccc2 100644 --- a/src/SysYIRGenerator.cpp +++ b/src/SysYIRGenerator.cpp @@ -834,204 +834,4 @@ std::any SysYIRGenerator::visitConstExp(SysYParser::ConstExpContext* ctx) { return res; } -/* begin -std::any SysYIRGenerator::visitConstGlobalDecl(SysYParser::ConstDeclContext *ctx, Type* type) { - std::vector values; - for (auto constDef : ctx->constDef()) { - - auto name = constDef->Ident()->getText(); - // get its dimensions - vector dims; - for (auto dim : constDef->constExp()) - dims.push_back(any_cast(dim->accept(this))); - - if (dims.size() == 0) { - auto init = constDef->ASSIGN() ? any_cast((constDef->constInitVal()->constExp()->accept(this))) - : nullptr; - if (init && isa(init)){ - Type *btype = type->as()->getBaseType(); - if (btype->isInt() && init->getType()->isFloat()) - init = ConstantValue::get((int)dynamic_cast(init)->getFloat()); - else if (btype->isFloat() && init->getType()->isInt()) - init = ConstantValue::get((float)dynamic_cast(init)->getInt()); - } - - auto global_value = module->createGlobalValue(name, type, dims, init); - - symbols_table.insert(name, global_value); - values.push_back(global_value); - } - else{ - auto init = constDef->ASSIGN() ? any_cast(dims[0]) - : nullptr; - auto global_value = module->createGlobalValue(name, type, dims, init); - if (constDef->ASSIGN()) { - d = 0; - n = 0; - path.clear(); - path = vector(dims.size(), 0); - isalloca = false; - current_type = global_value->getType()->as()->getBaseType(); - current_global = global_value; - numdims = global_value->getNumDims(); - for (auto init : constDef->constInitVal()->constInitVal()) - init->accept(this); - // visitConstInitValue(init); - } - symbols_table.insert(name, global_value); - values.push_back(global_value); - } - } - return values; -} - -std::any SysYIRGenerator::visitVarGlobalDecl(SysYParser::VarDeclContext *ctx, Type* type){ - std::vector values; - for (auto varDef : ctx->varDef()) { - - auto name = varDef->Ident()->getText(); - // get its dimensions - vector dims; - for (auto dim : varDef->constExp()) - dims.push_back(any_cast(dim->accept(this))); - - if (dims.size() == 0) { - auto init = varDef->ASSIGN() ? any_cast((varDef->initVal()->exp()->accept(this))) - : nullptr; - if (init && isa(init)){ - Type *btype = type->as()->getBaseType(); - if (btype->isInt() && init->getType()->isFloat()) - init = ConstantValue::get((int)dynamic_cast(init)->getFloat()); - else if (btype->isFloat() && init->getType()->isInt()) - init = ConstantValue::get((float)dynamic_cast(init)->getInt()); - } - - auto global_value = module->createGlobalValue(name, type, dims, init); - - symbols_table.insert(name, global_value); - values.push_back(global_value); - } - else{ - auto init = varDef->ASSIGN() ? any_cast(dims[0]) - : nullptr; - auto global_value = module->createGlobalValue(name, type, dims, init); - if (varDef->ASSIGN()) { - d = 0; - n = 0; - path.clear(); - path = vector(dims.size(), 0); - isalloca = false; - current_type = global_value->getType()->as()->getBaseType(); - current_global = global_value; - numdims = global_value->getNumDims(); - for (auto init : varDef->initVal()->initVal()) - init->accept(this); - // visitInitValue(init); - } - symbols_table.insert(name, global_value); - values.push_back(global_value); - } - } - return values; -} - -std::any SysYIRGenerator::visitConstLocalDecl(SysYParser::ConstDeclContext *ctx, Type* type){ - std::vector values; - // handle variables - for (auto constDef : ctx->constDef()) { - - auto name = constDef->Ident()->getText(); - vector dims; - for (auto dim : constDef->constExp()) - dims.push_back(any_cast(dim->accept(this))); - auto alloca = builder.createAllocaInst(type, dims, name); - symbols_table.insert(name, alloca); - - if (constDef->ASSIGN()) { - if (alloca->getNumDims() == 0) { - - auto value = any_cast(constDef->constInitVal()->constExp()->accept(this)); - - if (isa(value)) { - if (ctx->bType()->INT() && dynamic_cast(value)->isFloat()) - value = ConstantValue::get((int)dynamic_cast(value)->getFloat()); - else if (ctx->bType()->FLOAT() && dynamic_cast(value)->isInt()) - value = ConstantValue::get((float)dynamic_cast(value)->getInt()); - } - else if (alloca->getType()->as()->getBaseType()->isInt() && value->getType()->isFloat()) - value = builder.createFtoIInst(value); - else if (alloca->getType()->as()->getBaseType()->isFloat() && value->getType()->isInt()) - value = builder.createIToFInst(value); - - auto store = builder.createStoreInst(value, alloca); - } - else{ - d = 0; - n = 0; - path.clear(); - path = vector(alloca->getNumDims(), 0); - isalloca = true; - current_alloca = alloca; - current_type = alloca->getType()->as()->getBaseType(); - numdims = alloca->getNumDims(); - for (auto init : constDef->constInitVal()->constInitVal()) - init->accept(this); - } - } - - values.push_back(alloca); - } - return values; -} - -std::any SysYIRGenerator::visitVarLocalDecl(SysYParser::VarDeclContext *ctx, Type* type){ - std::vector values; - for (auto varDef : ctx->varDef()) { - - auto name = varDef->Ident()->getText(); - vector dims; - for (auto dim : varDef->constExp()) - dims.push_back(any_cast(dim->accept(this))); - auto alloca = builder.createAllocaInst(type, dims, name); - symbols_table.insert(name, alloca); - - if (varDef->ASSIGN()) { - if (alloca->getNumDims() == 0) { - - auto value = any_cast(varDef->initVal()->exp()->accept(this)); - - if (isa(value)) { - if (ctx->bType()->INT() && dynamic_cast(value)->isFloat()) - value = ConstantValue::get((int)dynamic_cast(value)->getFloat()); - else if (ctx->bType()->FLOAT() && dynamic_cast(value)->isInt()) - value = ConstantValue::get((float)dynamic_cast(value)->getInt()); - } - else if (alloca->getType()->as()->getBaseType()->isInt() && value->getType()->isFloat()) - value = builder.createFtoIInst(value); - else if (alloca->getType()->as()->getBaseType()->isFloat() && value->getType()->isInt()) - value = builder.createIToFInst(value); - - auto store = builder.createStoreInst(value, alloca); - } - else{ - d = 0; - n = 0; - path.clear(); - path = vector(alloca->getNumDims(), 0); - isalloca = true; - current_alloca = alloca; - current_type = alloca->getType()->as()->getBaseType(); - numdims = alloca->getNumDims(); - for (auto init : varDef->initVal()->initVal()) - init->accept(this); - } - } - - values.push_back(alloca); - } - return values; -} - end -*/ - } // namespace sysy \ No newline at end of file diff --git a/src/ASTPrinter.h b/src/include/ASTPrinter.h similarity index 100% rename from src/ASTPrinter.h rename to src/include/ASTPrinter.h diff --git a/src/Backend.h b/src/include/Backend.h similarity index 100% rename from src/Backend.h rename to src/include/Backend.h diff --git a/src/include/IR.h b/src/include/IR.h new file mode 100644 index 0000000..1f3885b --- /dev/null +++ b/src/include/IR.h @@ -0,0 +1,1319 @@ +#pragma once + +#include "range.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sysy { +/** + * \defgroup type Types + * @brief Sysy的类型系统 + * + * 1. 基类`Type` 用来表示所有的原始标量类型, + * 包括 `int`, `float`, `void`, 和表示跳转目标的标签类型。 + * 2. `PointerType` 和 `FunctionType` 派生自`Type` 并且分别表示指针和函数类型。 + * + * \note `Type`和它的派生类的构造函数声明为'protected'. + * 用户必须使用Type::getXXXType()获得`Type` 指针。 + * @{ + */ + +/** + * + * `Type`用来表示所有的原始标量类型, + * 包括`int`, `float`, `void`, 和表示跳转目标的标签类型。 + */ + +class Type { + public: + /// 定义了原始标量类型种类的枚举类型 + enum Kind { + kInt, + kFloat, + kVoid, + kLabel, + kPointer, + kFunction, + }; + + Kind kind; ///< 表示具体类型的变量 + + protected: + explicit Type(Kind kind) : kind(kind) {} + virtual ~Type() = default; + + public: + static auto getIntType() -> Type *; ///< 返回表示Int类型的Type指针 + static auto getFloatType() -> Type *; ///< 返回表示Float类型的Type指针 + static auto getVoidType() -> Type *; ///< 返回表示Void类型的Type指针 + static auto getLabelType() -> Type *; ///< 返回表示Label类型的Type指针 + static auto getPointerType(Type *baseType) -> Type *; ///< 返回表示指向baseType类型的Pointer类型的Type指针 + static auto getFunctionType(Type *returnType, const std::vector ¶mTypes = {}) -> Type *; + ///< 返回表示返回类型为returnType,形参类型列表为paramTypes的函数类型的Type指针 + + public: + auto getKind() const -> Kind { return kind; } ///< 返回Type对象代表原始标量类型 + auto isInt() const -> bool { return kind == kInt; } ///< 判定是否为Int类型 + auto isFloat() const -> bool { return kind == kFloat; } ///< 判定是否为Float类型 + auto isVoid() const -> bool { return kind == kVoid; } ///< 判定是否为Void类型 + auto isLabel() const -> bool { return kind == kLabel; } ///< 判定是否为Label类型 + auto isPointer() const -> bool { return kind == kPointer; } ///< 判定是否为Pointer类型 + auto isFunction() const -> bool { return kind == kFunction; } ///< 判定是否为Function类型 + auto getSize() const -> unsigned; ///< 返回类型所占的空间大小(字节) + /// 尝试将一个变量转换为给定的Type及其派生类类型的变量 + template + auto as() const -> std::enable_if_t, T *> { + return dynamic_cast(const_cast(this)); + } +}; + +class PointerType : public Type { + protected: + Type *baseType; ///< 所指向的类型 + + protected: + explicit PointerType(Type *baseType) : Type(kPointer), baseType(baseType) {} + + public: + static auto get(Type *baseType) -> PointerType *; ///< 获取指向baseType的Pointer类型 + + public: + auto getBaseType() const -> Type * { return baseType; } ///< 获取指向的类型 +}; + +class FunctionType : public Type { + private: + Type *returnType; ///< 返回值类型 + std::vector paramTypes; ///< 形参类型列表 + + protected: + explicit FunctionType(Type *returnType, std::vector paramTypes = {}) + : Type(kFunction), returnType(returnType), paramTypes(std::move(paramTypes)) {} + + public: + /// 获取返回值类型为returnType, 形参类型列表为paramTypes的Function类型 + static auto get(Type *returnType, const std::vector ¶mTypes = {}) -> FunctionType *; + + public: + auto getReturnType() const -> Type * { return returnType; } ///< 获取返回值类信息 + auto getParamTypes() const { return make_range(paramTypes); } ///< 获取形参类型列表 + auto getNumParams() const -> unsigned { return paramTypes.size(); } ///< 获取形参数量 +}; + +/*! + * @} + */ + +/** + * \defgroup ir IR + * + * TSysY IR 是一种指令级别的语言. 它被组织为四层树型结构,如下所示: + * + * \dot IR Structure + * digraph IRStructure{ + * node [shape="box"] + * a [label="Module"] + * b [label="GlobalValue"] + * c [label="Function"] + * d [label="BasicBlock"] + * e [label="Instruction"] + * a->{b,c} + * c->d->e + * } + * + * \enddot + * + * - `Module` 对应顶层"CompUnit"语法结构 + * - `GlobalValue`对应"globalDecl"语法结构 + * - `Function`对应"FuncDef"语法结构 + * - `BasicBlock` 是一连串没有分支指令的指令。一个 `Function` + * 由一个或多个`BasicBlock`组成 + * - `Instruction` 表示一个原始指令,例如, add 或 sub + * + * SysY IR中基础的数据概念是`Value`。一个 `Value` 像 + * 一个寄存器。它充当`Instruction`的输入输出操作数。每个value + * 都有一个与之相联系的`Type`,以此说明Value所拥有值的类型。 + * + * 大多数`Instruction`具有三地址代码的结构, 例如, 最多拥有两个输入操作数和一个输出操作数。 + * + * SysY IR采用了Static-Single-Assignment (SSA)设计。`Value`作为一个输出操作数 + * 被一些指令所定义, 并被另一些指令当作输入操作数使用。尽管一个Value可以被多个指令使用,其 + * 定义只能发生一次。这导致一个value个定义它的指令存在一一对应关系。换句话说,任何定义一个Value的指令 + * 都可以被看作被定义的指令本身。故在SysY IR中,`Instruction` 也是一个`Value`。查看 `Value` 以获取其继承 + * 关系。 + * + * @{ + */ + + +class User; +class Value; +class AllocaInst; + +//! `Use` 表示`Value`和它的`User`之间的使用关系。 + +class Use { + private: + /** + * value在User操作数中的位置,例如, + * user->getOperands[index] == value + */ + unsigned index; + User *user; ///< 使用者 + Value *value; ///< 被使用的值 + + public: + Use() = default; + Use(unsigned index, User *user, Value *value) : index(index), user(user), value(value) {} + + public: + auto getIndex() const -> unsigned { return index; } ///< 返回value在User操作数中的位置 + auto getUser() const -> User * { return user; } ///< 返回使用者 + auto getValue() const -> Value * { return value; } ///< 返回被使用的值 + void setValue(Value *newValue) { value = newValue; } ///< 将被使用的值设置为newValue +}; + +template +inline std::enable_if_t, bool> +isa(const Value *value) { + return T::classof(value); +} + +template +inline std::enable_if_t, T *> +dyncast(Value *value) { + return isa(value) ? static_cast(value) : nullptr; +} + +template +inline std::enable_if_t, const T *> +dyncast(const Value *value) { + return isa(value) ? static_cast(value) : nullptr; +} + +//! The base class of all value types + +class Value { + protected: + Type *type; ///< 值的类型 + std::string name; ///< 值的名字 + std::list> uses; ///< 值的使用关系列表 + + protected: + explicit Value(Type *type, std::string name = "") : type(type), name(std::move(name)) {} + virtual ~Value() = default; + + public: + void setName(const std::string &newName) { name = newName; } ///< 设置名字 + auto getName() const -> const std::string & { return name; } ///< 获取名字 + auto getType() const -> Type * { return type; } ///< 返回值的类型 + auto isInt() const -> bool { return type->isInt(); } ///< 判定是否为Int类型 + auto isFloat() const -> bool { return type->isFloat(); } ///< 判定是否为Float类型 + auto isPointer() const -> bool { return type->isPointer(); } ///< 判定是否为Pointer类型 + auto getUses() -> std::list> & { return uses; } ///< 获取使用关系列表 + void addUse(const std::shared_ptr &use) { uses.push_back(use); } ///< 添加使用关系 + void replaceAllUsesWith(Value *value); ///< 将原来使用该value的使用者全变为使用给定参数value并修改相应use关系 + void removeUse(const std::shared_ptr &use) { uses.remove(use); } ///< 删除使用关系use +}; + + + +/** + * ValueCounter 需要理解为一个Value *的计数器。 + * 它的主要目的是为了节省存储空间和方便Memset指令的创建。 + * ValueCounter记录了一列Value *的互异元素和每个元素的重复数量。 + * 例如,假设有一列Value *为{v1, v1, v2, v3, v3, v3, v4}, + * 那么ValueCounter将记录为: + * - __counterValues: {v1, v2, v3, v4} + * - __counterNumbers: {2, 1, 3, 1} + * - __size: 7 + * 使得存储空间得到节省,方便Memset指令的创建。 + */ +class ValueCounter { + private: + unsigned __size{}; ///< 总的Value数量 + std::vector __counterValues; ///< 记录的Value *列表(无重复元素) + std::vector __counterNumbers; ///< 记录的Value *重复数量列表 + + public: + ValueCounter() = default; + + public: + auto size() const -> unsigned { return __size; } ///< 返回总的Value数量 + auto getValue(unsigned index) const -> Value * { + if (index >= __size) { + return nullptr; + } + + unsigned num = 0; + for (size_t i = 0; i < __counterNumbers.size(); i++) { + if (num <= index && index < num + __counterNumbers[i]) { + return __counterValues[i]; + } + num += __counterNumbers[i]; + } + + return nullptr; + } ///< 根据位置index获取Value * + auto getValues() const -> const std::vector & { return __counterValues; } ///< 获取互异Value *列表 + auto getNumbers() const -> const std::vector & { return __counterNumbers; } ///< 获取Value *重复数量列表 + void push_back(Value *value, unsigned num = 1) { + if (__size != 0 && __counterValues.back() == value) { + *(__counterNumbers.end() - 1) += num; + } else { + __counterValues.push_back(value); + __counterNumbers.push_back(num); + } + __size += num; + } ///< 向后插入num个value + void clear() { + __size = 0; + __counterValues.clear(); + __counterNumbers.clear(); + } ///< 清空ValueCounter +}; + +/*! + * Static constants known at compile time. + * + * `ConstantValue`s are not defined by instructions, and do not use any other + * `Value`s. It's type is either `int` or `float`. + * `ConstantValue`并不由指令定义, 也不使用任何Value。它的类型为int/float。 + */ + + +class ConstantValue : public Value { + protected: + /// 定义字面量类型的聚合类型 + union { + int iScalar; + float fScalar; + }; + + protected: + explicit ConstantValue(int value, const std::string &name = "") : Value(Type::getIntType(), name), iScalar(value) {} + explicit ConstantValue(float value, const std::string &name = "") + : Value(Type::getFloatType(), name), fScalar(value) {} + + public: + static auto get(int value) -> ConstantValue *; ///< 获取一个int类型的ConstValue *,其值为value + static auto get(float value) -> ConstantValue *; ///< 获取一个float类型的ConstValue *,其值为value + + public: + auto getInt() const -> int { + assert(isInt()); + return iScalar; + } ///< 返回int类型的值 + auto getFloat() const -> float { + assert(isFloat()); + return fScalar; + } ///< 返回float类型的值 + template + auto getValue() const -> T { + if (std::is_same::value && isInt()) { + return getInt(); + } + if (std::is_same::value && isFloat()) { + return getFloat(); + } + throw std::bad_cast(); // 或者其他适当的异常处理 + } ///< 返回值,getInt和getFloat统一化,整数返回整形,浮点返回浮点型 +}; + +class Instruction; +class Function; +class Loop; +class BasicBlock; + +/*! + * Arguments of `BasicBlock`s. + * + * SysY IR is an SSA language, however, it does not use PHI instructions as in + * LLVM IR. `Value`s from different predecessor blocks are passed explicitly as + * block arguments. This is also the approach used by MLIR. + * NOTE that `Function` does not own `Argument`s, function arguments are + * implemented as its entry block's arguments. + */ + +class Argument : public Value { +protected: + BasicBlock *block; + int index; + +public: + Argument(Type *type, BasicBlock *block, int index, + const std::string &name = ""); + +public: + static bool classof(const Value *value) { + return value->getKind() == kArgument; + } + +public: + BasicBlock *getParent() const { return block; } + int getIndex() const { return index; } + +public: + void print(std::ostream &os) const override; +}; + +class Instruction; +class Function; +class Loop; +/*! + * The container for `Instruction` sequence. + * + * `BasicBlock` maintains a list of `Instruction`s, with the last one being + * a terminator (branch or return). Besides, `BasicBlock` stores its arguments + * and records its predecessor and successor `BasicBlock`s. + */ +class BasicBlock : public Value { + friend class Function; + +public: + using inst_list = std::list>; + using iterator = inst_list::iterator; + using arg_list = std::vector>; + using block_list = std::vector; + using block_set = std::unordered_set; + +protected: + Function *parent; + inst_list instructions; + arg_list arguments; + block_list successors; + block_list predecessors; + BasicBlock *idom = nullptr; // 直接支配节点 + block_list sdoms; // 支配树后继 + block_set dominants; // 必经节点集合 + block_set dominant_frontiers; // 支配边界 + bool reachable = false; // 是否可达 + Loop *loopbelong = nullptr; // 所属循环 + int loopdepth = 0; // 循环深度 + +protected: + explicit BasicBlock(Function *parent, const std::string &name = ""); + +public: + static bool classof(const Value *value) { + return value->getKind() == kBasicBlock; + } + +public: + int getNumInstructions() const { return instructions.size(); } + int getNumArguments() const { return arguments.size(); } + int getNumPredecessors() const { return predecessors.size(); } + int getNumSuccessors() const { return successors.size(); } + Function *getParent() const { return parent; } + inst_list &getInstructions() { return instructions; } + auto getArguments() const { return make_range(arguments); } + const block_list &getPredecessors() const { return predecessors; } + block_list &getPredecessors() { return predecessors; } + const block_list &getSuccessors() const { return successors; } + block_list &getSuccessors() { return successors; } + iterator begin() { return instructions.begin(); } + iterator end() { return instructions.end(); } + iterator terminator() { return std::prev(end()); } + Argument *createArgument(Type *type, const std::string &name = "") { + auto arg = new Argument(type, this, arguments.size(), name); + assert(arg); + arguments.emplace_back(arg); + return arguments.back().get(); + }; + + // 控制流分析相关 + BasicBlock *getIdom() const { return idom; } + void setIdom(BasicBlock *dom) { idom = dom; } + const block_list &getSdoms() const { return sdoms; } + void addSdom(BasicBlock *bb) { sdoms.push_back(bb); } + void clearSdoms() { sdoms.clear(); } + const block_set &getDominants() const { return dominants; } + void addDominant(BasicBlock *bb) { dominants.insert(bb); } + void setDominants(const block_set &doms) { dominants = doms; } + const block_set &getDominantFrontiers() const { return dominant_frontiers; } + void setDominantFrontiers(const block_set &df) { dominant_frontiers = df; } + bool isReachable() const { return reachable; } + void setReachable(bool r) { reachable = r; } + + // 循环分析相关 + Loop *getLoop() const { return loopbelong; } + void setLoop(Loop *loop) { loopbelong = loop; } + int getLoopDepth() const { return loopdepth; } + void setLoopDepth(int depth) { loopdepth = depth; } + + void addPredecessor(BasicBlock *bb) { + if (std::find(predecessors.begin(), predecessors.end(), bb) == predecessors.end()) + predecessors.push_back(bb); + } + + void addSuccessor(BasicBlock *bb) { + if (std::find(successors.begin(), successors.end(), bb) == successors.end()) + successors.push_back(bb); + } + + void removePredecessor(BasicBlock *bb) { + auto it = std::find(predecessors.begin(), predecessors.end(), bb); + if (it != predecessors.end()) + predecessors.erase(it); + } + + void removeSuccessor(BasicBlock *bb) { + auto it = std::find(successors.begin(), successors.end(), bb); + if (it != successors.end()) + successors.erase(it); + } + + // 获取支配树中所有子节点 + block_list getChildren() { + std::queue q; + block_list children; + for (auto sdom : sdoms) { + q.push(sdom); + children.push_back(sdom); + } + while (!q.empty()) { + auto block = q.front(); + q.pop(); + for (auto sdom : block->sdoms) { + q.push(sdom); + children.push_back(sdom); + } + } + return children; + } + +public: + void print(std::ostream &os) const override; +}; // class BasicBlock + +//! User is the abstract base type of `Value` types which use other `Value` as +//! operands. Currently, there are two kinds of `User`s, `Instruction` and +//! `GlobalValue`. +class User : public Value { +protected: + std::vector operands; + +protected: + User(Kind kind, Type *type, const std::string &name = "") + : Value(kind, type, name), operands() {} + +public: + using use_iterator = std::vector::const_iterator; + struct operand_iterator : public std::vector::const_iterator { + using Base = std::vector::const_iterator; + operand_iterator(const Base &iter) : Base(iter) {} + using value_type = Value *; + value_type operator->() { return Base::operator*().getValue(); } + value_type operator*() { return Base::operator*().getValue(); } + }; + +public: + int getNumOperands() const { return operands.size(); } + operand_iterator operand_begin() const { return operands.begin(); } + operand_iterator operand_end() const { return operands.end(); } + auto getOperands() const { + return make_range(operand_begin(), operand_end()); + } + Value *getOperand(int index) const { return operands[index].getValue(); } + void addOperand(Value *value) { + operands.emplace_back(operands.size(), this, value); + value->addUse(&operands.back()); + } + template void addOperands(const ContainerT &operands) { + for (auto value : operands) + addOperand(value); + } + void replaceOperand(int index, Value *value); + void setOperand(int index, Value *value); +}; // class User + + +class GetSubArrayInst; +/** + * 左值 具有地址的对象 + */ +class LVal : public User { + friend class GetSubArrayInst; + + protected: + LVal *fatherLVal{}; ///< 父左值 + std::list> childrenLVals; ///< 子左值 + GetSubArrayInst *defineInst{}; /// 定义该左值的GetSubArray指令 + + protected: + LVal() = default; + + public: + virtual ~LVal() = default; + virtual auto getLValDims() const -> std::vector = 0; ///< 获取左值的维度 + virtual auto getLValNumDims() const -> unsigned = 0; ///< 获取左值的维度数量 + + public: + auto getFatherLVal() const -> LVal * { return fatherLVal; } ///< 获取父左值 + auto getChildrenLVals() const -> const std::list> & { + return childrenLVals; + } ///< 获取子左值列表 + auto getAncestorLVal() const -> LVal * { + auto curLVal = const_cast(this); + while (curLVal->getFatherLVal() != nullptr) { + curLVal = curLVal->getFatherLVal(); + } + return curLVal; + } ///< 获取祖先左值 + void setFatherLVal(LVal *father) { fatherLVal = father; } ///< 设置父左值 + void setDefineInst(GetSubArrayInst *inst) { defineInst = inst; } ///< 设置定义指令 + void addChild(LVal *child) { childrenLVals.emplace_back(child); } ///< 添加子左值 + void removeChild(LVal *child) { + auto iter = std::find_if(childrenLVals.begin(), childrenLVals.end(), + [child](const std::unique_ptr &ptr) { return ptr.get() == child; }); + childrenLVals.erase(iter); + } ///< 移除子左值 + auto getDefineInst() const -> GetSubArrayInst * { return defineInst; } ///< 获取定义指令 +}; + +/*! + * Base of all concrete instruction types. + */ +class Instruction : public User { +public: + // 指令种类定义已移至Value::Kind + +protected: + BasicBlock *parent; + +protected: + Instruction(Kind kind, Type *type, BasicBlock *parent = nullptr, + const std::string &name = ""); + +public: + static bool classof(const Value *value) { + return value->getKind() >= kFirstInst and value->getKind() <= kLastInst; + } + +public: + Kind getKind() const { return kind; } + BasicBlock *getParent() const { return parent; } + Function *getFunction() const { return parent->getParent(); } + void setParent(BasicBlock *bb) { parent = bb; } + + bool isBinary() const { + static constexpr uint64_t BinaryOpMask = + (kAdd | kSub | kMul | kDiv | kRem | kAnd | kOr) | + (kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE) | + (kFAdd | kFSub | kFMul | kFDiv | kFRem) | + (kFCmpEQ | kFCmpNE | kFCmpLT | kFCmpGT | kFCmpLE | kFCmpGE); + return kind & BinaryOpMask; + } + bool isUnary() const { + static constexpr uint64_t UnaryOpMask = + kNeg | kNot | kFNeg | kFNot | kFtoI | kItoF | kBitFtoI | kBitItoF; + return kind & UnaryOpMask; + } + bool isMemory() const { + static constexpr uint64_t MemoryOpMask = + kAlloca | kLoad | kStore | kLa | kMemset | kGetSubArray; + return kind & MemoryOpMask; + } + bool isTerminator() const { + static constexpr uint64_t TerminatorOpMask = kCondBr | kBr | kReturn; + return kind & TerminatorOpMask; + } + bool isCmp() const { + static constexpr uint64_t CmpOpMask = + (kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE) | + (kFCmpEQ | kFCmpNE | kFCmpLT | kFCmpGT | kFCmpLE | kFCmpGE); + return kind & CmpOpMask; + } + bool isBranch() const { + static constexpr uint64_t BranchOpMask = kBr | kCondBr; + return kind & BranchOpMask; + } + bool isCommutative() const { + static constexpr uint64_t CommutativeOpMask = + kAdd | kMul | kICmpEQ | kICmpNE | kFAdd | kFMul | kFCmpEQ | kFCmpNE | kAnd | kOr; + return kind & CommutativeOpMask; + } + bool isUnconditional() const { return kind == kBr; } + bool isConditional() const { return kind == kCondBr; } + bool isPhi() const { return kind == kPhi; } + bool isAlloca() const { return kind == kAlloca; } + bool isLoad() const { return kind == kLoad; } + bool isStore() const { return kind == kStore; } + bool isLa() const { return kind == kLa; } + bool isMemset() const { return kind == kMemset; } + bool isGetSubArray() const { return kind == kGetSubArray; } + bool isCall() const { return kind == kCall; } + bool isReturn() const { return kind == kReturn; } +}; // class Instruction + +class Function; +//! Function call. +class CallInst : public Instruction { + friend class IRBuilder; + +protected: + CallInst(Function *callee, const std::vector &args = {}, + BasicBlock *parent = nullptr, const std::string &name = ""); + +public: + static bool classof(const Value *value) { return value->getKind() == kCall; } + +public: + Function *getCallee() const; + auto getArguments() const { + return make_range(std::next(operand_begin()), operand_end()); + } + +public: + void print(std::ostream &os) const override; +}; // class CallInst + +//! Unary instruction, includes '!', '-' and type conversion. +class UnaryInst : public Instruction { + friend class IRBuilder; + +protected: + UnaryInst(Kind kind, Type *type, Value *operand, BasicBlock *parent = nullptr, + const std::string &name = "") + : Instruction(kind, type, parent, name) { + addOperand(operand); + } + +public: + static bool classof(const Value *value) { + return Instruction::classof(value) and + static_cast(value)->isUnary(); + } + +public: + Value *getOperand() const { return User::getOperand(0); } + +public: + void print(std::ostream &os) const override; +}; // class UnaryInst + +//! Binary instruction, e.g., arithmatic, relation, logic, etc. +class BinaryInst : public Instruction { + friend class IRBuilder; + +protected: + BinaryInst(Kind kind, Type *type, Value *lhs, Value *rhs, BasicBlock *parent, + const std::string &name = "") + : Instruction(kind, type, parent, name) { + addOperand(lhs); + addOperand(rhs); + } + +public: + static bool classof(const Value *value) { + return Instruction::classof(value) and + static_cast(value)->isBinary(); + } + +public: + Value *getLhs() const { return getOperand(0); } + Value *getRhs() const { return getOperand(1); } + +public: + void print(std::ostream &os) const override; +}; // class BinaryInst + +//! The return statement +class ReturnInst : public Instruction { + friend class IRBuilder; + +protected: + ReturnInst(Value *value = nullptr, BasicBlock *parent = nullptr) + : Instruction(kReturn, Type::getVoidType(), parent, "") { + if (value) + addOperand(value); + } + +public: + static bool classof(const Value *value) { + return value->getKind() == kReturn; + } + +public: + bool hasReturnValue() const { return not operands.empty(); } + Value *getReturnValue() const { + return hasReturnValue() ? getOperand(0) : nullptr; + } + +public: + void print(std::ostream &os) const override; +}; // class ReturnInst + +//! Unconditional branch +class UncondBrInst : public Instruction { + friend class IRBuilder; + +protected: + UncondBrInst(BasicBlock *block, std::vector args, + BasicBlock *parent = nullptr) + : Instruction(kBr, Type::getVoidType(), parent, "") { + // assert(block->getNumArguments() == args.size()); + addOperand(block); + addOperands(args); + } + +public: + static bool classof(const Value *value) { return value->getKind() == kBr; } + +public: + BasicBlock *getBlock() const { return dyncast(getOperand(0)); } + auto getArguments() const { + return make_range(std::next(operand_begin()), operand_end()); + } + +public: + void print(std::ostream &os) const override; +}; // class UncondBrInst + +//! Conditional branch +class CondBrInst : public Instruction { + friend class IRBuilder; + +protected: + CondBrInst(Value *condition, BasicBlock *thenBlock, BasicBlock *elseBlock, + const std::vector &thenArgs, + const std::vector &elseArgs, BasicBlock *parent = nullptr) + : Instruction(kCondBr, Type::getVoidType(), parent, "") { + // assert(thenBlock->getNumArguments() == thenArgs.size() and + // elseBlock->getNumArguments() == elseArgs.size()); + addOperand(condition); + addOperand(thenBlock); + addOperand(elseBlock); + addOperands(thenArgs); + addOperands(elseArgs); + } + +public: + static bool classof(const Value *value) { + return value->getKind() == kCondBr; + } + +public: + Value *getCondition() const { return getOperand(0); } + BasicBlock *getThenBlock() const { + return dyncast(getOperand(1)); + } + BasicBlock *getElseBlock() const { + return dyncast(getOperand(2)); + } + auto getThenArguments() const { + auto begin = std::next(operand_begin(), 3); + auto end = std::next(begin, getThenBlock()->getNumArguments()); + return make_range(begin, end); + } + auto getElseArguments() const { + auto begin = + std::next(operand_begin(), 3 + getThenBlock()->getNumArguments()); + auto end = operand_end(); + return make_range(begin, end); + } + +public: + void print(std::ostream &os) const override; +}; // class CondBrInst + +//! Allocate memory for stack variables, used for non-global variable declartion +class AllocaInst : public Instruction { + friend class IRBuilder; + +protected: + AllocaInst(Type *type, const std::vector &dims = {}, + BasicBlock *parent = nullptr, const std::string &name = "") + : Instruction(kAlloca, type, parent, name) { + addOperands(dims); + } + +public: + static bool classof(const Value *value) { + return value->getKind() == kAlloca; + } + +public: + int getNumDims() const { return getNumOperands(); } + auto getDims() const { return getOperands(); } + Value *getDim(int index) { return getOperand(index); } + +public: + void print(std::ostream &os) const override; +}; // class AllocaInst + +//! Load a value from memory address specified by a pointer value +class LoadInst : public Instruction { + friend class IRBuilder; + +protected: + LoadInst(Value *pointer, const std::vector &indices = {}, + BasicBlock *parent = nullptr, const std::string &name = "") + : Instruction(kLoad, pointer->getType()->as()->getBaseType(), + parent, name) { + addOperand(pointer); + addOperands(indices); + } + +public: + static bool classof(const Value *value) { return value->getKind() == kLoad; } + +public: + int getNumIndices() const { return getNumOperands() - 1; } + Value *getPointer() const { return getOperand(0); } + auto getIndices() const { + return make_range(std::next(operand_begin()), operand_end()); + } + Value *getIndex(int index) const { return getOperand(index + 1); } + +public: + void print(std::ostream &os) const override; +}; // class LoadInst + +//! Store a value to memory address specified by a pointer value +class StoreInst : public Instruction { + friend class IRBuilder; + +protected: + StoreInst(Value *value, Value *pointer, + const std::vector &indices = {}, + BasicBlock *parent = nullptr, const std::string &name = "") + : Instruction(kStore, Type::getVoidType(), parent, name) { + addOperand(value); + addOperand(pointer); + addOperands(indices); + } + +public: + static bool classof(const Value *value) { return value->getKind() == kStore; } + +public: + int getNumIndices() const { return getNumOperands() - 2; } + Value *getValue() const { return getOperand(0); } + Value *getPointer() const { return getOperand(1); } + auto getIndices() const { + return make_range(std::next(operand_begin(), 2), operand_end()); + } + Value *getIndex(int index) const { return getOperand(index + 2); } + +public: + void print(std::ostream &os) const override; +}; // class StoreInst + +//! Get address instruction +class LaInst : public Instruction { + friend class IRBuilder; + +protected: + LaInst(Value *pointer, const std::vector &indices = {}, + BasicBlock *parent = nullptr, const std::string &name = "") + : Instruction(kLa, pointer->getType(), parent, name) { + assert(pointer); + addOperand(pointer); + addOperands(indices); + } + +public: + static bool classof(const Value *value) { return value->getKind() == kLa; } + +public: + int getNumIndices() const { return getNumOperands() - 1; } + Value *getPointer() const { return getOperand(0); } + auto getIndices() const { + return make_range(std::next(operand_begin()), operand_end()); + } + Value *getIndex(int index) const { return getOperand(index + 1); } + +public: + void print(std::ostream &os) const override; +}; + +//! Memset instruction +class MemsetInst : public Instruction { + friend class IRBuilder; + +protected: + MemsetInst(Value *pointer, Value *begin, Value *size, Value *value, + BasicBlock *parent = nullptr, const std::string &name = "") + : Instruction(kMemset, Type::getVoidType(), parent, name) { + addOperand(pointer); + addOperand(begin); + addOperand(size); + addOperand(value); + } + +public: + static bool classof(const Value *value) { return value->getKind() == kMemset; } + +public: + Value *getPointer() const { return getOperand(0); } + Value *getBegin() const { return getOperand(1); } + Value *getSize() const { return getOperand(2); } + Value *getValue() const { return getOperand(3); } + +public: + void print(std::ostream &os) const override; +}; + +//! Get subarray instruction +class GetSubArrayInst : public Instruction { + friend class IRBuilder; + +protected: + GetSubArrayInst(Value *fatherArray, Value *childArray, + const std::vector &indices, + BasicBlock *parent = nullptr, const std::string &name = "") + : Instruction(kGetSubArray, Type::getVoidType(), parent, name) { + addOperand(fatherArray); + addOperand(childArray); + addOperands(indices); + } + +public: + static bool classof(const Value *value) { + return value->getKind() == kGetSubArray; + } + +public: + Value *getFatherArray() const { return getOperand(0); } + Value *getChildArray() const { return getOperand(1); } + int getNumIndices() const { return getNumOperands() - 2; } + auto getIndices() const { + return make_range(std::next(operand_begin(), 2), operand_end()); + } + Value *getIndex(int index) const { return getOperand(index + 2); } + +public: + void print(std::ostream &os) const override; +}; + +//! Phi instruction for SSA form +class PhiInst : public Instruction { + friend class IRBuilder; + +protected: + Value *map_val; // 旧的映射关系 + + PhiInst(Type *type, Value *lhs, const std::vector &rhs, + Value *mval, BasicBlock *parent, const std::string &name = "") + : Instruction(kPhi, type, parent, name), map_val(mval) { + addOperand(lhs); + addOperands(rhs); + } + +public: + static bool classof(const Value *value) { return value->getKind() == kPhi; } + +public: + Value *getMapVal() const { return map_val; } + Value *getPointer() const { return getOperand(0); } + auto getValues() const { + return make_range(std::next(operand_begin()), operand_end()); + } + Value *getValue(unsigned index) const { return getOperand(index + 1); } + +public: + void print(std::ostream &os) const override; +}; + +class Loop { +public: + using block_list = std::vector; + using block_set = std::unordered_set; + using Loop_list = std::vector; + +protected: + Function *parent; // 所属函数 + block_list blocksInLoop; // 循环内的基本块 + BasicBlock *preheaderBlock = nullptr; // 前驱块 + BasicBlock *headerBlock = nullptr; // 循环头 + block_list latchBlock; // 回边块 + block_set exitingBlocks; // 退出块 + block_set exitBlocks; // 退出目标块 + Loop *parentloop = nullptr; // 父循环 + Loop_list subLoops; // 子循环 + size_t loopID; // 循环ID + unsigned loopDepth; // 循环深度 + + Instruction *indCondVar = nullptr; // 循环条件变量 + Instruction::Kind IcmpKind; // 比较类型 + Value *indEnd = nullptr; // 循环结束值 + AllocaInst *IndPhi = nullptr; // 循环变量 + + ConstantValue *indBegin = nullptr; // 循环起始值 + ConstantValue *indStep = nullptr; // 循环步长 + + int StepType = 0; // 循环步长类型 + bool parallelable = false; // 是否可并行 + +public: + explicit Loop(BasicBlock *header, const std::string &name = "") + : headerBlock(header) { + blocksInLoop.push_back(header); + } + + void setloopID() { + static unsigned loopCount = 0; + loopCount = loopCount + 1; + loopID = loopCount; + } + + BasicBlock *getHeader() const { return headerBlock; } + BasicBlock *getPreheaderBlock() const { return preheaderBlock; } + block_list &getLatchBlocks() { return latchBlock; } + block_set &getExitingBlocks() { return exitingBlocks; } + block_set &getExitBlocks() { return exitBlocks; } + Loop *getParentLoop() const { return parentloop; } + void setParentLoop(Loop *parent) { parentloop = parent; } + void addBasicBlock(BasicBlock *bb) { blocksInLoop.push_back(bb); } + void addSubLoop(Loop *loop) { subLoops.push_back(loop); } + void setLoopDepth(unsigned depth) { loopDepth = depth; } + block_list &getBasicBlocks() { return blocksInLoop; } + Loop_list &getSubLoops() { return subLoops; } + unsigned getLoopDepth() const { return loopDepth; } + + bool contains(BasicBlock *bb) const { + return std::find(blocksInLoop.begin(), blocksInLoop.end(), bb) != blocksInLoop.end(); + } + + void addExitingBlock(BasicBlock *bb) { exitingBlocks.insert(bb); } + void addExitBlock(BasicBlock *bb) { exitBlocks.insert(bb); } + void addLatchBlock(BasicBlock *bb) { latchBlock.push_back(bb); } + void setPreheaderBlock(BasicBlock *bb) { preheaderBlock = bb; } + + void setIndexCondInstr(Instruction *instr) { indCondVar = instr; } + void setIcmpKind(Instruction::Kind kind) { IcmpKind = kind; } + Instruction::Kind getIcmpKind() const { return IcmpKind; } + + void setIndEnd(Value *value) { indEnd = value; } + void setIndPhi(AllocaInst *phi) { IndPhi = phi; } + Value *getIndEnd() const { return indEnd; } + AllocaInst *getIndPhi() const { return IndPhi; } + Instruction *getIndCondVar() const { return indCondVar; } + + void setParallelable(bool flag) { parallelable = flag; } + bool isParallelable() const { return parallelable; } +}; + +class Module; +//! Function definition +class Function : public Value { + friend class Module; + +protected: + Function(Module *parent, Type *type, const std::string &name) + : Value(kFunction, type, name), parent(parent), variableID(0), blockID(0), blocks() { + blocks.emplace_back(new BasicBlock(this, "entry")); + } + +public: + static bool classof(const Value *value) { + return value->getKind() == kFunction; + } + +public: + using block_list = std::list>; + using Loop_list = std::list>; + +protected: + Module *parent; + int variableID; + int blockID; + block_list blocks; + /*是放在module中还是新建分析器呢?*/ + Loop_list loops; // 循环列表 + Loop_list topLoops; // 顶层循环 + std::unordered_map basicblock2Loop; // 基本块到循环的映射 + + // 数据流分析相关 + std::unordered_map value2AllocBlocks; + std::unordered_map> value2DefBlocks; + std::unordered_map> value2UseBlocks; + +public: + Type *getReturnType() const { + return getType()->as()->getReturnType(); + } + auto getParamTypes() const { + return getType()->as()->getParamTypes(); + } + auto getBasicBlocks() const { return make_range(blocks); } + BasicBlock *getEntryBlock() const { return blocks.front().get(); } + BasicBlock *addBasicBlock(const std::string &name = "") { + blocks.emplace_back(new BasicBlock(this, name)); + return blocks.back().get(); + } + void removeBasicBlock(BasicBlock *block) { + blocks.remove_if([&](std::unique_ptr &b) -> bool { + return block == b.get(); + }); + } + int allocateVariableID() { return variableID++; } + int allocateblockID() { return blockID++; } + + // 循环分析 + void addLoop(Loop *loop) { loops.emplace_back(loop); } + void addTopLoop(Loop *loop) { topLoops.emplace_back(loop); } + Loop_list &getLoops() { return loops; } + Loop_list &getTopLoops() { return topLoops; } + Loop *getLoopOfBasicBlock(BasicBlock *bb) { + return basicblock2Loop.count(bb) ? basicblock2Loop[bb] : nullptr; + } + void addBBToLoop(BasicBlock *bb, Loop *loop) { basicblock2Loop[bb] = loop; } + + // 数据流分析 + void addValue2AllocBlocks(Value *value, BasicBlock *block) { + value2AllocBlocks[value] = block; + } + BasicBlock *getAllocBlockByValue(Value *value) { + return value2AllocBlocks.count(value) ? value2AllocBlocks[value] : nullptr; + } + void addValue2DefBlocks(Value *value, BasicBlock *block) { + ++value2DefBlocks[value][block]; + } + void addValue2UseBlocks(Value *value, BasicBlock *block) { + ++value2UseBlocks[value][block]; + } + +public: + void print(std::ostream &os) const override; +}; // class Function + +//! Global value declared at file scope +class GlobalValue : public User { + friend class Module; + +protected: + Module *parent; + std::vector initValues; // 初始值列表 + bool isConst; + +protected: + GlobalValue(Module *parent, Type *type, const std::string &name, + const std::vector &dims = {}, + const std::vector &initValues = {}, + bool isConst = false) + : User(kGlobal, type, name), parent(parent), + initValues(initValues), isConst(isConst) { + assert(type->isPointer()); + addOperands(dims); + } + +public: + static bool classof(const Value *value) { + return value->getKind() == kGlobal; + } + +public: + const std::vector& getInitValues() const { return initValues; } + int getNumDims() const { return getNumOperands(); } + Value *getDim(int index) { return getOperand(index); } + bool isConstant() const { return isConst; } + +public: + void print(std::ostream &os) const override{}; +}; // class GlobalValue + +//! IR unit for representing a SysY compile unit +class Module { +protected: + std::vector> children; + std::map functions; + std::map globals; + std::map externalFunctions; // 外部函数声明 + +public: + Module() = default; + +public: + Function *createFunction(const std::string &name, Type *type) { + if (functions.count(name)) + return nullptr; + auto func = new Function(this, type, name); + assert(func); + children.emplace_back(func); + functions.emplace(name, func); + return func; + }; + + Function *createExternalFunction(const std::string &name, Type *type) { + if (externalFunctions.count(name)) + return nullptr; + auto func = new Function(this, type, name); + assert(func); + children.emplace_back(func); + externalFunctions.emplace(name, func); + return func; + } + + GlobalValue *createGlobalValue(const std::string &name, Type *type, + const std::vector &dims = {}, + const std::vector &initValues = {}, + bool isConst = false) { + if (globals.count(name)) + return nullptr; + auto global = new GlobalValue(this, type, name, dims, initValues, isConst); + assert(global); + children.emplace_back(global); + globals.emplace(name, global); + return global; + } + + Function *getFunction(const std::string &name) const { + auto result = functions.find(name); + if (result == functions.end()) + return nullptr; + return result->second; + } + + Function *getExternalFunction(const std::string &name) const { + auto result = externalFunctions.find(name); + if (result == externalFunctions.end()) + return nullptr; + return result->second; + } + + GlobalValue *getGlobalValue(const std::string &name) const { + auto result = globals.find(name); + if (result == globals.end()) + return nullptr; + return result->second; + } + + std::map *getFunctions() { return &functions; } + std::map *getGlobalValues() { return &globals; } + std::map *getExternalFunctions() { return &externalFunctions; } + +public: + void print(std::ostream &os) const; +}; // class Module + +/*! + * @} + */ +inline std::ostream &operator<<(std::ostream &os, const Type &type) { + type.print(os); + return os; +} + +inline std::ostream &operator<<(std::ostream &os, const Value &value) { + value.print(os); + return os; +} + +} // namespace sysy diff --git a/src/IRBuilder.h b/src/include/IRBuilder.h similarity index 91% rename from src/IRBuilder.h rename to src/include/IRBuilder.h index 60cb092..c6d12da 100644 --- a/src/IRBuilder.h +++ b/src/include/IRBuilder.h @@ -8,14 +8,23 @@ namespace sysy { class IRBuilder { private: - BasicBlock *block; - BasicBlock::iterator position; + unsigned labelIndex; ///< 基本块标签编号 + unsigned tmpIndex; ///< 临时变量编号 + + BasicBlock *block; ///< 当前基本块 + BasicBlock::iterator position; ///< 当前基本块指令列表位置的迭代器 + + std::vector trueBlocks; ///< true分支基本块列表 + std::vector falseBlocks; ///< false分支基本块列表 + + std::vector breakBlocks; ///< break目标块列表 + std::vector continueBlocks; ///< continue目标块列表 public: - IRBuilder() = default; - IRBuilder(BasicBlock *block) : block(block), position(block->end()) {} + IRBuilder() : labelIndex(0), tmpIndex(0), block(nullptr) {} + explicit IRBuilder(BasicBlock *block) : labelIndex(0), tmpIndex(0), block(block), position(block->end()) {} IRBuilder(BasicBlock *block, BasicBlock::iterator position) - : block(block), position(position) {} + : labelIndex(0), tmpIndex(0), block(block), position(position) {} public: BasicBlock *getBasicBlock() const { return block; } @@ -60,7 +69,7 @@ public: name); } UnaryInst *createIToFInst(Value *operand, const std::string &name = "") { - return createUnaryInst(Instruction::kIToF, Type::getFloatType(), operand, + return createUnaryInst(Instruction::kItoF, Type::getFloatType(), operand, name); } BinaryInst *createBinaryInst(Instruction::Kind kind, Type *type, Value *lhs, diff --git a/src/LLVMIRGenerator.h b/src/include/LLVMIRGenerator.h similarity index 100% rename from src/LLVMIRGenerator.h rename to src/include/LLVMIRGenerator.h diff --git a/src/LLVMIRGenerator_1.h b/src/include/LLVMIRGenerator_1.h similarity index 100% rename from src/LLVMIRGenerator_1.h rename to src/include/LLVMIRGenerator_1.h diff --git a/src/SysYFormatter.h b/src/include/SysYFormatter.h similarity index 100% rename from src/SysYFormatter.h rename to src/include/SysYFormatter.h diff --git a/src/include/SysYIRAnalyser.h b/src/include/SysYIRAnalyser.h new file mode 100644 index 0000000..e69de29 diff --git a/src/SysYIRGenerator.h b/src/include/SysYIRGenerator.h similarity index 90% rename from src/SysYIRGenerator.h rename to src/include/SysYIRGenerator.h index 3c89ce0..638986a 100644 --- a/src/SysYIRGenerator.h +++ b/src/include/SysYIRGenerator.h @@ -134,15 +134,6 @@ public: std::any visitLOrExp(SysYParser::LOrExpContext *ctx) override; std::any visitConstExp(SysYParser::ConstExpContext *ctx) override; -private: - std::any visitConstGlobalDecl(SysYParser::ConstDeclContext *ctx, Type* type); - std::any visitVarGlobalDecl(SysYParser::VarDeclContext *ctx, Type* type); - std::any visitConstLocalDecl(SysYParser::ConstDeclContext *ctx, Type* type); - std::any visitVarLocalDecl(SysYParser::VarDeclContext *ctx, Type* type); - Type *getArithmeticResultType(Type *lhs, Type *rhs) { - assert(lhs->isIntOrFloat() and rhs->isIntOrFloat()); - return lhs == rhs ? lhs : Type::getFloatType(); - } }; // class SysYIRGenerator diff --git a/src/range.h b/src/include/range.h similarity index 100% rename from src/range.h rename to src/include/range.h