fix(frontend): 修复部分实现
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
add_subdirectory(utils)
|
||||
add_subdirectory(ir)
|
||||
add_subdirectory(frontend)
|
||||
add_subdirectory(sem)
|
||||
add_subdirectory(irgen)
|
||||
add_subdirectory(mir)
|
||||
|
||||
@@ -11,6 +12,7 @@ add_executable(compiler
|
||||
)
|
||||
target_link_libraries(compiler PRIVATE
|
||||
frontend
|
||||
sem
|
||||
irgen
|
||||
mir
|
||||
utils
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// 调用 ANTLR 生成的 Lexer/Parser,返回 parse tree。
|
||||
// 调用前端解析流程,返回语法树。
|
||||
#include "frontend/AntlrDriver.h"
|
||||
|
||||
#include <fstream>
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
add_library(frontend STATIC
|
||||
AntlrDriver.cpp
|
||||
SyntaxTreePrinter.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(frontend PUBLIC
|
||||
|
||||
183
src/frontend/SyntaxTreePrinter.cpp
Normal file
183
src/frontend/SyntaxTreePrinter.cpp
Normal file
@@ -0,0 +1,183 @@
|
||||
#include "frontend/SyntaxTreePrinter.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "SysYParser.h"
|
||||
|
||||
namespace {
|
||||
|
||||
std::string GetTokenName(const antlr4::Token* tok, antlr4::Parser* parser) {
|
||||
if (!tok || !parser) {
|
||||
return "UNKNOWN";
|
||||
}
|
||||
const int token_type = tok->getType();
|
||||
const auto& vocab = parser->getVocabulary();
|
||||
std::string token_name(vocab.getSymbolicName(token_type));
|
||||
if (token_name.empty()) {
|
||||
token_name = std::string(vocab.getLiteralName(token_type));
|
||||
}
|
||||
if (token_name.empty()) {
|
||||
token_name = std::to_string(token_type);
|
||||
}
|
||||
return token_name;
|
||||
}
|
||||
|
||||
bool KeepImportantToken(const std::string& token_name) {
|
||||
return token_name == "Ident" || token_name == "Number" ||
|
||||
token_name == "Assign" || token_name == "AddOp";
|
||||
}
|
||||
|
||||
std::string PrettyPrimary(SysYParser::PrimaryContext* primary) {
|
||||
if (!primary) {
|
||||
return "";
|
||||
}
|
||||
if (primary->Number()) {
|
||||
return primary->Number()->getText();
|
||||
}
|
||||
if (primary->Ident()) {
|
||||
return primary->Ident()->getText();
|
||||
}
|
||||
if (primary->exp()) {
|
||||
return "(" + primary->exp()->getText() + ")";
|
||||
}
|
||||
return primary->getText();
|
||||
}
|
||||
|
||||
std::string PrettyAddExp(SysYParser::AddExpContext* add_exp) {
|
||||
if (!add_exp) {
|
||||
return "";
|
||||
}
|
||||
const auto terms = add_exp->primary();
|
||||
if (terms.empty()) {
|
||||
return "";
|
||||
}
|
||||
std::string out = PrettyPrimary(terms[0]);
|
||||
for (size_t i = 1; i < terms.size(); ++i) {
|
||||
out += " + " + PrettyPrimary(terms[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::string PrettyExp(SysYParser::ExpContext* exp) {
|
||||
if (!exp || !exp->addExp()) {
|
||||
return "";
|
||||
}
|
||||
return PrettyAddExp(exp->addExp());
|
||||
}
|
||||
|
||||
std::string PrettyRuleText(antlr4::ParserRuleContext* rule) {
|
||||
if (!rule) {
|
||||
return "";
|
||||
}
|
||||
if (auto* var_decl = dynamic_cast<SysYParser::VarDeclContext*>(rule)) {
|
||||
std::string out = "int " + var_decl->Ident()->getText();
|
||||
if (var_decl->exp()) {
|
||||
out += " = " + PrettyExp(var_decl->exp());
|
||||
}
|
||||
out += ";";
|
||||
return out;
|
||||
}
|
||||
if (auto* ret = dynamic_cast<SysYParser::ReturnStmtContext*>(rule)) {
|
||||
return "return " + PrettyExp(ret->exp()) + ";";
|
||||
}
|
||||
if (dynamic_cast<SysYParser::FuncDefContext*>(rule) != nullptr) {
|
||||
return "int main()";
|
||||
}
|
||||
if (auto* stmt = dynamic_cast<SysYParser::StmtContext*>(rule)) {
|
||||
if (stmt->varDecl()) {
|
||||
return PrettyRuleText(stmt->varDecl());
|
||||
}
|
||||
if (stmt->returnStmt()) {
|
||||
return PrettyRuleText(stmt->returnStmt());
|
||||
}
|
||||
}
|
||||
if (auto* exp = dynamic_cast<SysYParser::ExpContext*>(rule)) {
|
||||
return PrettyExp(exp);
|
||||
}
|
||||
if (auto* add_exp = dynamic_cast<SysYParser::AddExpContext*>(rule)) {
|
||||
return PrettyAddExp(add_exp);
|
||||
}
|
||||
if (auto* primary = dynamic_cast<SysYParser::PrimaryContext*>(rule)) {
|
||||
return PrettyPrimary(primary);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
bool HasVisibleNode(antlr4::tree::ParseTree* node, antlr4::Parser* parser) {
|
||||
auto* terminal = dynamic_cast<antlr4::tree::TerminalNode*>(node);
|
||||
if (terminal) {
|
||||
const std::string token_name = GetTokenName(terminal->getSymbol(), parser);
|
||||
return KeepImportantToken(token_name);
|
||||
}
|
||||
for (auto* child : node->children) {
|
||||
if (HasVisibleNode(child, parser)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string RuleName(antlr4::tree::ParseTree* node, antlr4::Parser* parser) {
|
||||
auto* rule = dynamic_cast<antlr4::ParserRuleContext*>(node);
|
||||
if (!parser || !rule) {
|
||||
return "unknown";
|
||||
}
|
||||
const int idx = rule->getRuleIndex();
|
||||
const auto& names = parser->getRuleNames();
|
||||
if (idx >= 0 && idx < static_cast<int>(names.size())) {
|
||||
return names[static_cast<size_t>(idx)];
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
std::string NodeLabel(antlr4::tree::ParseTree* node, antlr4::Parser* parser) {
|
||||
auto* terminal = dynamic_cast<antlr4::tree::TerminalNode*>(node);
|
||||
if (terminal) {
|
||||
return GetTokenName(terminal->getSymbol(), parser) + ": " + node->getText();
|
||||
}
|
||||
|
||||
const std::string rule_name = RuleName(node, parser);
|
||||
auto* rule = dynamic_cast<antlr4::ParserRuleContext*>(node);
|
||||
const std::string pretty = PrettyRuleText(rule);
|
||||
if (!pretty.empty()) {
|
||||
return rule_name + " (" + pretty + ")";
|
||||
}
|
||||
return rule_name;
|
||||
}
|
||||
|
||||
void PrintSyntaxTreeImpl(antlr4::tree::ParseTree* node, antlr4::Parser* parser,
|
||||
std::ostream& os, const std::string& prefix,
|
||||
bool is_last, bool is_root) {
|
||||
if (!HasVisibleNode(node, parser)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_root) {
|
||||
os << NodeLabel(node, parser) << "\n";
|
||||
} else {
|
||||
os << prefix << (is_last ? "└── " : "├── ") << NodeLabel(node, parser)
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
std::vector<antlr4::tree::ParseTree*> children;
|
||||
for (auto* child : node->children) {
|
||||
if (HasVisibleNode(child, parser)) {
|
||||
children.push_back(child);
|
||||
}
|
||||
}
|
||||
|
||||
const std::string child_prefix =
|
||||
is_root ? "" : prefix + (is_last ? " " : "│ ");
|
||||
for (size_t i = 0; i < children.size(); ++i) {
|
||||
PrintSyntaxTreeImpl(children[i], parser, os, child_prefix,
|
||||
i + 1 == children.size(), false);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void PrintSyntaxTree(antlr4::tree::ParseTree* tree, antlr4::Parser* parser,
|
||||
std::ostream& os) {
|
||||
PrintSyntaxTreeImpl(tree, parser, os, "", true, true);
|
||||
}
|
||||
9
src/frontend/SyntaxTreePrinter.h
Normal file
9
src/frontend/SyntaxTreePrinter.h
Normal file
@@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <iosfwd>
|
||||
|
||||
#include "antlr4-runtime.h"
|
||||
|
||||
// 以树状缩进形式打印语法树(仅保留关键节点/记号)。
|
||||
void PrintSyntaxTree(antlr4::tree::ParseTree* tree, antlr4::Parser* parser,
|
||||
std::ostream& os);
|
||||
@@ -162,7 +162,7 @@ class Function : public Value {
|
||||
|
||||
class Module {
|
||||
public:
|
||||
// 创建函数时显式传入返回类型,便于在 IRGen 中根据 AST 选择类型。
|
||||
// 创建函数时显式传入返回类型,便于在 IRGen 中根据语法树信息选择类型。
|
||||
Function* CreateFunction(const std::string& name,
|
||||
std::shared_ptr<Type> ret_type);
|
||||
const std::vector<std::unique_ptr<Function>>& functions() const {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// 将 ANTLR parse tree 翻译为极简 IR。
|
||||
// 将语法树翻译为极简 IR。
|
||||
// 实现拆分在 IRGenFunc/IRGenStmt/IRGenExp/IRGenDecl。
|
||||
|
||||
#pragma once
|
||||
@@ -32,7 +32,7 @@ class IRGenImpl {
|
||||
private:
|
||||
void GenFuncDef(SysYParser::FuncDefContext& func);
|
||||
void GenBlock(SysYParser::BlockContext& block);
|
||||
void GenStmt(SysYParser::StmtContext& stmt);
|
||||
bool GenStmt(SysYParser::StmtContext& stmt);
|
||||
void GenVarDecl(SysYParser::VarDeclContext& decl);
|
||||
void GenReturnStmt(SysYParser::ReturnStmtContext& ret);
|
||||
|
||||
|
||||
@@ -6,27 +6,22 @@
|
||||
#include "ir/IR.h"
|
||||
|
||||
void IRGenImpl::GenBlock(SysYParser::BlockContext& block) {
|
||||
for (auto* stmt : block.stmt()) {
|
||||
if (stmt && stmt->varDecl()) {
|
||||
const std::string name = stmt->varDecl()->Ident()->getText();
|
||||
auto* slot = builder_.CreateAllocaI32(ir::DefaultContext().NextTemp());
|
||||
locals_[name] = slot;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto* stmt : block.stmt()) {
|
||||
if (stmt) {
|
||||
GenStmt(*stmt);
|
||||
if (GenStmt(*stmt)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IRGenImpl::GenVarDecl(SysYParser::VarDeclContext& decl) {
|
||||
const std::string name = decl.Ident()->getText();
|
||||
auto it = locals_.find(name);
|
||||
if (it == locals_.end()) {
|
||||
throw std::runtime_error("[irgen] 变量栈槽未创建: " + name);
|
||||
if (locals_.find(name) != locals_.end()) {
|
||||
throw std::runtime_error("[irgen] 重复定义变量: " + name);
|
||||
}
|
||||
auto* slot = builder_.CreateAllocaI32(ir::DefaultContext().NextTemp());
|
||||
locals_[name] = slot;
|
||||
|
||||
ir::Value* init = nullptr;
|
||||
if (decl.exp()) {
|
||||
@@ -34,5 +29,5 @@ void IRGenImpl::GenVarDecl(SysYParser::VarDeclContext& decl) {
|
||||
} else {
|
||||
init = ir::DefaultContext().GetConstInt(0);
|
||||
}
|
||||
builder_.CreateStore(init, it->second);
|
||||
builder_.CreateStore(init, slot);
|
||||
}
|
||||
|
||||
@@ -9,12 +9,12 @@
|
||||
|
||||
std::unique_ptr<ir::Module> GenerateIR(antlr4::tree::ParseTree* tree) {
|
||||
if (!tree) {
|
||||
throw std::runtime_error("[irgen] parse tree 为空");
|
||||
throw std::runtime_error("[irgen] 语法树为空");
|
||||
}
|
||||
|
||||
auto* cu = dynamic_cast<SysYParser::CompUnitContext*>(tree);
|
||||
if (!cu) {
|
||||
throw std::runtime_error("[irgen] parse tree 根节点不是 compUnit");
|
||||
throw std::runtime_error("[irgen] 语法树根节点不是 compUnit");
|
||||
}
|
||||
|
||||
auto module = std::make_unique<ir::Module>();
|
||||
|
||||
@@ -5,14 +5,14 @@
|
||||
#include "SysYParser.h"
|
||||
#include "ir/IR.h"
|
||||
|
||||
void IRGenImpl::GenStmt(SysYParser::StmtContext& stmt) {
|
||||
bool IRGenImpl::GenStmt(SysYParser::StmtContext& stmt) {
|
||||
if (stmt.varDecl()) {
|
||||
GenVarDecl(*stmt.varDecl());
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
if (stmt.returnStmt()) {
|
||||
GenReturnStmt(*stmt.returnStmt());
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
throw std::runtime_error("[irgen] 暂不支持的语句类型");
|
||||
}
|
||||
|
||||
11
src/main.cpp
11
src/main.cpp
@@ -1,10 +1,13 @@
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "frontend/AntlrDriver.h"
|
||||
#include "frontend/SyntaxTreePrinter.h"
|
||||
#include "ir/IR.h"
|
||||
#include "irgen/IRGen.h"
|
||||
#include "mir/MIR.h"
|
||||
#include "sem/Sema.h"
|
||||
#include "utils/CLI.h"
|
||||
#include "utils/Log.h"
|
||||
|
||||
@@ -19,10 +22,16 @@ int main(int argc, char** argv) {
|
||||
auto antlr = ParseFileWithAntlr(opts.input);
|
||||
bool need_blank_line = false;
|
||||
if (opts.emit_parse_tree) {
|
||||
std::cout << antlr.tree->toStringTree(antlr.parser.get()) << "\n";
|
||||
PrintSyntaxTree(antlr.tree, antlr.parser.get(), std::cout);
|
||||
need_blank_line = true;
|
||||
}
|
||||
|
||||
auto* comp_unit = dynamic_cast<SysYParser::CompUnitContext*>(antlr.tree);
|
||||
if (!comp_unit) {
|
||||
throw std::runtime_error("[main] 语法树根节点不是 compUnit");
|
||||
}
|
||||
RunSema(*comp_unit);
|
||||
|
||||
auto module = GenerateIR(antlr.tree);
|
||||
if (opts.emit_ir) {
|
||||
ir::IRPrinter printer;
|
||||
|
||||
@@ -6,5 +6,5 @@ add_library(sem STATIC
|
||||
|
||||
target_link_libraries(sem PUBLIC
|
||||
build_options
|
||||
ast
|
||||
${ANTLR4_RUNTIME_TARGET}
|
||||
)
|
||||
|
||||
@@ -1,58 +1,75 @@
|
||||
// 极简语义分析:只检查变量是否先声明再使用。
|
||||
// 如需扩展,可在此基础上加入:
|
||||
// - 常量折叠/类型检查
|
||||
// - 函数签名/参数数量校验
|
||||
// - 控制流相关检查(return 覆盖、break/continue 合法性等)
|
||||
#include "sem/Sema.h"
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "ast/AstNodes.h"
|
||||
#include "sem/SymbolTable.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class SemaVisitor {
|
||||
public:
|
||||
explicit SemaVisitor(SymbolTable& table) : table_(table) {}
|
||||
void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table);
|
||||
|
||||
void CheckBlock(const ast::Block& block) {
|
||||
for (const auto& item : block.items) {
|
||||
if (auto decl = dynamic_cast<ast::VarDecl*>(item.get())) {
|
||||
table_.Add(decl->name);
|
||||
if (decl->init) CheckExpr(*decl->init);
|
||||
continue;
|
||||
}
|
||||
if (auto ret = dynamic_cast<ast::ReturnStmt*>(item.get())) {
|
||||
CheckExpr(*ret->value);
|
||||
}
|
||||
}
|
||||
void CheckPrimary(SysYParser::PrimaryContext& primary,
|
||||
const SymbolTable& table) {
|
||||
if (primary.Number()) {
|
||||
return;
|
||||
}
|
||||
|
||||
void CheckExpr(const ast::Expr& expr) {
|
||||
if (auto var = dynamic_cast<const ast::VarExpr*>(&expr)) {
|
||||
if (!table_.Contains(var->name)) {
|
||||
throw std::runtime_error("[sema] 使用了未定义的变量: " + var->name);
|
||||
}
|
||||
} else if (auto bin = dynamic_cast<const ast::BinaryExpr*>(&expr)) {
|
||||
CheckExpr(*bin->lhs);
|
||||
CheckExpr(*bin->rhs);
|
||||
|
||||
if (primary.Ident()) {
|
||||
const std::string name = primary.Ident()->getText();
|
||||
if (!table.Contains(name)) {
|
||||
throw std::runtime_error("[sema] 使用了未定义的变量: " + name);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
SymbolTable& table_;
|
||||
};
|
||||
if (primary.exp()) {
|
||||
CheckExpr(*primary.exp(), table);
|
||||
return;
|
||||
}
|
||||
|
||||
throw std::runtime_error("[sema] 暂不支持的 primary 形式");
|
||||
}
|
||||
|
||||
void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table) {
|
||||
if (!exp.addExp()) {
|
||||
throw std::runtime_error("[sema] 非法表达式");
|
||||
}
|
||||
const auto& terms = exp.addExp()->primary();
|
||||
for (auto* term : terms) {
|
||||
CheckPrimary(*term, table);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::shared_ptr<ast::CompUnit> RunSema(std::shared_ptr<ast::CompUnit> ast) {
|
||||
if (!ast || !ast->func || !ast->func->body) return ast;
|
||||
void RunSema(SysYParser::CompUnitContext& comp_unit) {
|
||||
auto* func = comp_unit.funcDef();
|
||||
if (!func || !func->block()) {
|
||||
throw std::runtime_error("[sema] 缺少 main 函数定义");
|
||||
}
|
||||
|
||||
SymbolTable table;
|
||||
SemaVisitor visitor(table);
|
||||
visitor.CheckBlock(*ast->func->body);
|
||||
return ast;
|
||||
|
||||
for (auto* stmt : func->block()->stmt()) {
|
||||
if (!stmt) {
|
||||
continue;
|
||||
}
|
||||
if (auto* decl = stmt->varDecl()) {
|
||||
const std::string name = decl->Ident()->getText();
|
||||
if (table.Contains(name)) {
|
||||
throw std::runtime_error("[sema] 重复定义变量: " + name);
|
||||
}
|
||||
if (decl->exp()) {
|
||||
CheckExpr(*decl->exp(), table);
|
||||
}
|
||||
table.Add(name);
|
||||
continue;
|
||||
}
|
||||
if (auto* ret = stmt->returnStmt()) {
|
||||
CheckExpr(*ret->exp(), table);
|
||||
break;
|
||||
}
|
||||
throw std::runtime_error("[sema] 暂不支持的语句类型");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
// 语义检查
|
||||
// 基于语法树的极简语义检查。
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "SysYParser.h"
|
||||
|
||||
namespace ast {
|
||||
struct CompUnit;
|
||||
}
|
||||
|
||||
// 返回经过检查的 AST(当前直接返回原 AST)。
|
||||
std::shared_ptr<ast::CompUnit> RunSema(std::shared_ptr<ast::CompUnit> ast);
|
||||
// 目前仅检查:
|
||||
// - 变量先声明后使用
|
||||
// - 局部变量不允许重复定义
|
||||
void RunSema(SysYParser::CompUnitContext& comp_unit);
|
||||
|
||||
@@ -14,7 +14,7 @@ void PrintHelp(std::ostream& os) {
|
||||
<< "\n"
|
||||
<< "选项:\n"
|
||||
<< " -h, --help 打印帮助信息并退出\n"
|
||||
<< " --emit-parse-tree 仅在显式模式下启用 ANTLR 语法树输出\n"
|
||||
<< " --emit-parse-tree 仅在显式模式下启用语法树输出\n"
|
||||
<< " --emit-ir 仅在显式模式下启用 IR 输出\n"
|
||||
<< " --emit-asm 仅在显式模式下启用 AArch64 汇编输出\n"
|
||||
<< "\n"
|
||||
|
||||
Reference in New Issue
Block a user