语法/前端解析,构建AST
This commit is contained in:
@@ -1,35 +1,73 @@
|
||||
// SysY 子集语法:支持形如
|
||||
// int main() { int a = 1; int b = 2; return a + b; }
|
||||
// 的最小返回表达式编译。
|
||||
|
||||
// 后续需要自行添加
|
||||
grammar SysY;
|
||||
|
||||
// 说明:
|
||||
// - 这是一个“最小可用”的 SysY.g4,用于避免空文件导致的 ANTLR 解析报错。
|
||||
// - 后续请按 SysY 语言规范逐步补全 lexer/parser 规则。
|
||||
// - 本工程约定:ANTLR 生成的 C++ 源码/头文件不进入仓库,统一生成到构建目录(例如 build/generated/antlr4/)。
|
||||
compUnit
|
||||
: funcDef EOF
|
||||
;
|
||||
|
||||
compilationUnit
|
||||
: (statement)* EOF
|
||||
;
|
||||
funcDef
|
||||
: Int Main L_PAREN R_PAREN block
|
||||
;
|
||||
|
||||
statement
|
||||
: 'return' expression? ';'
|
||||
| ';'
|
||||
;
|
||||
block
|
||||
: L_BRACE stmt* R_BRACE
|
||||
;
|
||||
|
||||
expression
|
||||
: IntegerLiteral
|
||||
| Identifier
|
||||
;
|
||||
stmt
|
||||
: varDecl
|
||||
| returnStmt
|
||||
;
|
||||
|
||||
// -------- lexer --------
|
||||
varDecl
|
||||
: Int Ident (Assign exp)? Semi
|
||||
;
|
||||
|
||||
IntegerLiteral
|
||||
: [0-9]+
|
||||
;
|
||||
returnStmt
|
||||
: Return exp Semi
|
||||
;
|
||||
|
||||
Identifier
|
||||
: [a-zA-Z_] [a-zA-Z0-9_]*
|
||||
;
|
||||
exp
|
||||
: addExp
|
||||
;
|
||||
|
||||
Whitespace
|
||||
: [ \t\r\n]+ -> skip
|
||||
;
|
||||
addExp
|
||||
: primary (AddOp primary)*
|
||||
;
|
||||
|
||||
primary
|
||||
: Number
|
||||
| Ident
|
||||
| L_PAREN exp R_PAREN
|
||||
;
|
||||
|
||||
Int : 'int';
|
||||
Return : 'return';
|
||||
Main : 'main';
|
||||
|
||||
AddOp : '+';
|
||||
Assign : '=';
|
||||
Semi : ';';
|
||||
L_PAREN : '(';
|
||||
R_PAREN : ')';
|
||||
L_BRACE : '{';
|
||||
R_BRACE : '}';
|
||||
|
||||
Ident
|
||||
: [a-zA-Z_][a-zA-Z_0-9]*
|
||||
;
|
||||
|
||||
Number
|
||||
: [0-9]+
|
||||
;
|
||||
|
||||
WS
|
||||
: [ \t\r\n]+ -> skip
|
||||
;
|
||||
|
||||
COMMENT
|
||||
: '//' ~[\r\n]* -> skip
|
||||
;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
|
||||
// AST 节点定义与实现:
|
||||
// - 表达式、语句、声明、函数、类型等节点
|
||||
// - 支持后续阶段在节点上附加信息(类型、符号绑定、常量值等)
|
||||
|
||||
#include "ast/AstNodes.h"
|
||||
|
||||
70
src/ast/AstNodes.h
Normal file
70
src/ast/AstNodes.h
Normal file
@@ -0,0 +1,70 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ast {
|
||||
|
||||
enum class BinaryOp { Add, Sub, Mul, Div };
|
||||
|
||||
struct Expr {
|
||||
virtual ~Expr() = default;
|
||||
};
|
||||
|
||||
struct NumberExpr : Expr {
|
||||
int value{};
|
||||
explicit NumberExpr(int v) : value(v) {}
|
||||
};
|
||||
|
||||
struct VarExpr : Expr {
|
||||
std::string name;
|
||||
explicit VarExpr(std::string n) : name(std::move(n)) {}
|
||||
};
|
||||
|
||||
struct BinaryExpr : Expr {
|
||||
BinaryOp op;
|
||||
std::shared_ptr<Expr> lhs;
|
||||
std::shared_ptr<Expr> rhs;
|
||||
BinaryExpr(BinaryOp op, std::shared_ptr<Expr> lhs, std::shared_ptr<Expr> rhs)
|
||||
: op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
|
||||
};
|
||||
|
||||
struct Stmt {
|
||||
virtual ~Stmt() = default;
|
||||
};
|
||||
|
||||
struct ReturnStmt : Stmt {
|
||||
std::shared_ptr<Expr> value;
|
||||
explicit ReturnStmt(std::shared_ptr<Expr> v) : value(std::move(v)) {}
|
||||
};
|
||||
|
||||
struct VarDecl {
|
||||
std::string name;
|
||||
std::shared_ptr<Expr> init; // nullptr if no initializer
|
||||
VarDecl(std::string n, std::shared_ptr<Expr> i)
|
||||
: name(std::move(n)), init(std::move(i)) {}
|
||||
};
|
||||
|
||||
struct Block {
|
||||
std::vector<std::shared_ptr<VarDecl>> varDecls;
|
||||
std::vector<std::shared_ptr<Stmt>> stmts;
|
||||
};
|
||||
|
||||
struct FuncDef {
|
||||
std::string name;
|
||||
std::shared_ptr<Block> body;
|
||||
FuncDef(std::string n, std::shared_ptr<Block> b)
|
||||
: name(std::move(n)), body(std::move(b)) {}
|
||||
};
|
||||
|
||||
struct CompUnit {
|
||||
std::shared_ptr<FuncDef> func;
|
||||
explicit CompUnit(std::shared_ptr<FuncDef> f) : func(std::move(f)) {}
|
||||
};
|
||||
|
||||
// 调试打印
|
||||
void PrintAST(const CompUnit& cu);
|
||||
|
||||
} // namespace ast
|
||||
@@ -1,4 +1,72 @@
|
||||
// AST 调试打印:
|
||||
// - 以可读形式打印 AST 结构
|
||||
// - 用于验证 AST 构建与语义分析结果,便于定位问题
|
||||
// 简单 AST 调试打印,便于前端验证。
|
||||
|
||||
#include "ast/AstNodes.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace ast {
|
||||
|
||||
static void PrintExpr(const Expr* expr);
|
||||
|
||||
static void PrintIndent(int depth) {
|
||||
for (int i = 0; i < depth; ++i) std::cout << " ";
|
||||
}
|
||||
|
||||
static void PrintExpr(const Expr* expr) {
|
||||
if (auto num = dynamic_cast<const NumberExpr*>(expr)) {
|
||||
std::cout << num->value;
|
||||
} else if (auto var = dynamic_cast<const VarExpr*>(expr)) {
|
||||
std::cout << var->name;
|
||||
} else if (auto bin = dynamic_cast<const BinaryExpr*>(expr)) {
|
||||
std::cout << "(";
|
||||
PrintExpr(bin->lhs.get());
|
||||
const char* op = "?";
|
||||
switch (bin->op) {
|
||||
case BinaryOp::Add:
|
||||
op = "+";
|
||||
break;
|
||||
case BinaryOp::Sub:
|
||||
op = "-";
|
||||
break;
|
||||
case BinaryOp::Mul:
|
||||
op = "*";
|
||||
break;
|
||||
case BinaryOp::Div:
|
||||
op = "/";
|
||||
break;
|
||||
}
|
||||
std::cout << " " << op << " ";
|
||||
PrintExpr(bin->rhs.get());
|
||||
std::cout << ")";
|
||||
}
|
||||
}
|
||||
|
||||
void PrintAST(const CompUnit& cu) {
|
||||
if (!cu.func) return;
|
||||
std::cout << "func " << cu.func->name << " () {\n";
|
||||
const auto& body = cu.func->body;
|
||||
if (!body) {
|
||||
std::cout << "}\n";
|
||||
return;
|
||||
}
|
||||
for (const auto& decl : body->varDecls) {
|
||||
PrintIndent(1);
|
||||
std::cout << "var " << decl->name;
|
||||
if (decl->init) {
|
||||
std::cout << " = ";
|
||||
PrintExpr(decl->init.get());
|
||||
}
|
||||
std::cout << ";\n";
|
||||
}
|
||||
for (const auto& stmt : body->stmts) {
|
||||
if (auto ret = dynamic_cast<ReturnStmt*>(stmt.get())) {
|
||||
PrintIndent(1);
|
||||
std::cout << "return ";
|
||||
PrintExpr(ret->value.get());
|
||||
std::cout << ";\n";
|
||||
}
|
||||
}
|
||||
std::cout << "}\n";
|
||||
}
|
||||
|
||||
} // namespace ast
|
||||
|
||||
@@ -1,5 +1,34 @@
|
||||
// 前端解析驱动:
|
||||
// - 读取源代码
|
||||
// - 调用 ANTLR 生成的 lexer/parser 得到 parse tree
|
||||
// - 对外提供“可用的解析入口”(语法正确性由测试保证)
|
||||
// 调用 ANTLR 生成的 Lexer/Parser,返回 parse tree。
|
||||
#include "frontend/AntlrDriver.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "SysYLexer.h"
|
||||
#include "SysYParser.h"
|
||||
#include "antlr4-runtime.h"
|
||||
|
||||
AntlrResult ParseFileWithAntlr(const std::string& path) {
|
||||
std::ifstream fin(path);
|
||||
if (!fin.is_open()) {
|
||||
throw std::runtime_error("无法打开输入文件: " + path);
|
||||
}
|
||||
std::ostringstream ss;
|
||||
ss << fin.rdbuf();
|
||||
|
||||
auto input = std::make_unique<antlr4::ANTLRInputStream>(ss.str());
|
||||
auto lexer = std::make_unique<SysYLexer>(input.get());
|
||||
auto tokens = std::make_unique<antlr4::CommonTokenStream>(lexer.get());
|
||||
auto parser = std::make_unique<SysYParser>(tokens.get());
|
||||
parser->removeErrorListeners();
|
||||
auto tree = parser->compUnit();
|
||||
|
||||
AntlrResult result;
|
||||
result.input = std::move(input);
|
||||
result.lexer = std::move(lexer);
|
||||
result.tokens = std::move(tokens);
|
||||
result.parser = std::move(parser);
|
||||
result.tree = tree;
|
||||
return result;
|
||||
}
|
||||
|
||||
20
src/frontend/AntlrDriver.h
Normal file
20
src/frontend/AntlrDriver.h
Normal file
@@ -0,0 +1,20 @@
|
||||
// 包装 ANTLR4,提供简易的解析入口。
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "SysYLexer.h"
|
||||
#include "SysYParser.h"
|
||||
#include "antlr4-runtime.h"
|
||||
|
||||
struct AntlrResult {
|
||||
std::unique_ptr<antlr4::ANTLRInputStream> input;
|
||||
std::unique_ptr<SysYLexer> lexer;
|
||||
std::unique_ptr<antlr4::CommonTokenStream> tokens;
|
||||
std::unique_ptr<SysYParser> parser;
|
||||
antlr4::tree::ParseTree* tree = nullptr; // owned by parser
|
||||
};
|
||||
|
||||
// 解析指定文件,发生错误时抛出 std::runtime_error。
|
||||
AntlrResult ParseFileWithAntlr(const std::string& path);
|
||||
@@ -1,4 +1,114 @@
|
||||
// AST 构建:
|
||||
// - 将 ANTLR parse tree 转换为 AST(对应 src/ast/*)
|
||||
// - 在 AST 节点上保留必要的定位信息(可选,用于调试/日志)
|
||||
// 将 parse tree 转换为 AST。
|
||||
#include "frontend/AstBuilder.h"
|
||||
|
||||
#include <any>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "SysYBaseVisitor.h"
|
||||
#include "SysYParser.h"
|
||||
#include "ast/AstNodes.h"
|
||||
#include "antlr4-runtime.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using ast::BinaryExpr;
|
||||
using ast::BinaryOp;
|
||||
using ast::Block;
|
||||
using ast::CompUnit;
|
||||
using ast::FuncDef;
|
||||
using ast::NumberExpr;
|
||||
using ast::ReturnStmt;
|
||||
using ast::VarDecl;
|
||||
using ast::VarExpr;
|
||||
|
||||
template <typename T>
|
||||
T Take(std::any&& value) {
|
||||
if (auto* ptr = std::any_cast<T>(&value)) {
|
||||
return std::move(*ptr);
|
||||
}
|
||||
throw std::runtime_error("AST 构建失败:类型不匹配");
|
||||
}
|
||||
|
||||
class Builder : public SysYBaseVisitor {
|
||||
public:
|
||||
std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override {
|
||||
auto func = Take<std::shared_ptr<FuncDef>>(visit(ctx->funcDef()));
|
||||
return std::make_shared<CompUnit>(std::move(func));
|
||||
}
|
||||
|
||||
std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override {
|
||||
auto body = Take<std::shared_ptr<Block>>(visit(ctx->block()));
|
||||
return std::make_shared<FuncDef>("main", std::move(body));
|
||||
}
|
||||
|
||||
std::any visitBlock(SysYParser::BlockContext* ctx) override {
|
||||
auto block = std::make_shared<Block>();
|
||||
for (auto stmtCtx : ctx->stmt()) {
|
||||
if (stmtCtx->varDecl()) {
|
||||
block->varDecls.emplace_back(
|
||||
Take<std::shared_ptr<VarDecl>>(visit(stmtCtx->varDecl())));
|
||||
} else if (stmtCtx->returnStmt()) {
|
||||
block->stmts.emplace_back(
|
||||
Take<std::shared_ptr<ReturnStmt>>(visit(stmtCtx->returnStmt())));
|
||||
}
|
||||
}
|
||||
return block;
|
||||
}
|
||||
|
||||
std::any visitVarDecl(SysYParser::VarDeclContext* ctx) override {
|
||||
std::shared_ptr<ast::Expr> init;
|
||||
if (ctx->exp()) {
|
||||
init = Take<std::shared_ptr<ast::Expr>>(visit(ctx->exp()));
|
||||
}
|
||||
return std::make_shared<VarDecl>(ctx->Ident()->getText(), std::move(init));
|
||||
}
|
||||
|
||||
std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override {
|
||||
auto expr = Take<std::shared_ptr<ast::Expr>>(visit(ctx->exp()));
|
||||
return std::make_shared<ReturnStmt>(std::move(expr));
|
||||
}
|
||||
|
||||
std::any visitExp(SysYParser::ExpContext* ctx) override {
|
||||
return visit(ctx->addExp());
|
||||
}
|
||||
|
||||
std::any visitAddExp(SysYParser::AddExpContext* ctx) override {
|
||||
auto node = Take<std::shared_ptr<ast::Expr>>(visit(ctx->primary(0)));
|
||||
for (size_t i = 1; i < ctx->primary().size(); ++i) {
|
||||
auto rhs = Take<std::shared_ptr<ast::Expr>>(visit(ctx->primary(i)));
|
||||
auto opToken = ctx->AddOp(i - 1);
|
||||
BinaryOp op = BinaryOp::Add;
|
||||
if (opToken->getText() == "-") op = BinaryOp::Sub;
|
||||
node = std::make_shared<BinaryExpr>(op, std::move(node), std::move(rhs));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
std::any visitPrimary(SysYParser::PrimaryContext* ctx) override {
|
||||
if (ctx->Number()) {
|
||||
std::shared_ptr<ast::Expr> expr =
|
||||
std::make_shared<NumberExpr>(std::stoi(ctx->Number()->getText()));
|
||||
return expr;
|
||||
}
|
||||
if (ctx->Ident()) {
|
||||
std::shared_ptr<ast::Expr> expr =
|
||||
std::make_shared<VarExpr>(ctx->Ident()->getText());
|
||||
return expr;
|
||||
}
|
||||
return visit(ctx->exp());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::shared_ptr<ast::CompUnit> BuildAst(antlr4::tree::ParseTree* tree) {
|
||||
if (!tree) {
|
||||
throw std::runtime_error("parse tree 为空");
|
||||
}
|
||||
Builder visitor;
|
||||
auto result = visitor.visit(tree);
|
||||
return Take<std::shared_ptr<ast::CompUnit>>(std::move(result));
|
||||
}
|
||||
|
||||
16
src/frontend/AstBuilder.h
Normal file
16
src/frontend/AstBuilder.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// 将 ANTLR parse tree 转换为内部 AST。
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace antlr4 {
|
||||
namespace tree {
|
||||
class ParseTree;
|
||||
}
|
||||
} // namespace antlr4
|
||||
|
||||
namespace ast {
|
||||
struct CompUnit;
|
||||
}
|
||||
|
||||
std::shared_ptr<ast::CompUnit> BuildAst(antlr4::tree::ParseTree* tree);
|
||||
Reference in New Issue
Block a user