vig/src/parser.zig

823 lines
23 KiB
Zig

const std = @import("std");
const scanners = @import("scanner.zig");
const main = @import("main.zig");
const ast = @import("ast.zig");
const tokens = @import("tokens.zig");
const err = @import("errors.zig");
const printer = @import("ast_printer.zig");
const Allocator = std.mem.Allocator;
const Scanner = scanners.Scanner;
const Token = tokens.Token;
const TokenType = tokens.TokenType;
const Result = main.Result;
const Node = ast.Node;
const Expr = ast.Expr;
const Stmt = ast.Stmt;
const TokenList = std.ArrayList(Token);
pub const Parser = struct {
allocator: *Allocator,
scanner: *Scanner,
tokens: TokenList,
hadError: bool = false,
pub fn init(allocator: *Allocator, scanner: *Scanner) Parser {
return Parser{
.allocator = allocator,
.scanner = scanner,
.tokens = TokenList.init(allocator),
};
}
pub fn deinit(self: *@This()) void {
self.tokens.deinit();
}
fn doError(self: *Parser, comptime fmt: []const u8, args: ...) void {
self.hadError = true;
std.debug.warn("parser error at line {}\n\t", self.scanner.line);
std.debug.warn(fmt, args);
std.debug.warn("\n");
}
fn peek(self: *Parser) Token {
return self.tokens.at(self.tokens.len - 1);
}
fn previous(self: *Parser) Token {
return self.tokens.at(self.tokens.len - 2);
}
fn tokenError(self: *Parser, token: Token, msg: []const u8) Result!void {
if (token.ttype == .EOF) {
err.report(token.line, " at end", msg);
} else {
err.reportFmt(token.line, " at '{}': {}", token.lexeme, msg);
}
return Result.CompileError;
}
fn isAtEnd(self: *Parser) bool {
return self.peek().ttype == .EOF;
}
fn check(self: *Parser, ttype: TokenType) bool {
if (self.isAtEnd()) return false;
return self.peek().ttype == ttype;
}
fn nextToken(self: *Parser) !Token {
var token: Token = undefined;
while (true) {
var next_token_opt = try self.scanner.nextToken();
if (next_token_opt) |token_nice| {
token = token_nice;
break;
}
}
try self.tokens.append(token);
std.debug.warn("skip to {}\n", token);
return token;
}
fn consume(self: *Parser, ttype: TokenType, comptime msg: []const u8) !Token {
if (self.check(ttype)) return try self.nextToken();
try self.tokenError(self.peek(), msg);
return Result.CompileError;
}
fn consumeSingle(self: *Parser, ttype: TokenType) !Token {
std.debug.warn("consume {}..?", ttype);
if (self.check(ttype)) {
var cur = self.peek();
_ = try self.nextToken();
std.debug.warn(" now has {}\n", self.peek());
return cur;
}
var buf_main: [1000]u8 = undefined;
var buf = try std.fmt.bufPrint(
buf_main[0..],
"expected {}, got {}",
ttype,
self.peek().ttype,
);
try self.tokenError(self.peek(), buf);
return Result.CompileError;
}
fn compareAnyOf(self: *@This(), ttypes: []TokenType) bool {
for (ttypes) |ttype| {
if (self.check(ttype)) return true;
}
return false;
}
fn mkFnDecl(
self: *Parser,
name: Token,
params: ast.ParamList,
return_type: Token,
block: ast.StmtList,
) !*ast.Node {
var node = try self.allocator.create(Node);
node.* = Node{
.FnDecl = ast.FnDecl{
.func_name = name,
.params = params,
.return_type = return_type,
.body = block,
},
};
return node;
}
fn mkConstDecl(self: *Parser, consts: ast.ConstList) !*ast.Node {
var node = try self.allocator.create(Node);
node.* = Node{ .ConstDecl = consts };
return node;
}
fn mkBlock(self: *Parser, stmts: ast.StmtList) !*ast.Node {
var node = try self.allocator.create(Node);
node.* = Node{ .Block = stmts };
return node;
}
fn mkExpr(self: *Parser, expr: *Expr) !*ast.Node {
var node = try self.allocator.create(Node);
node.* = Node{ .Expr = expr };
return node;
}
fn mkStmt(self: *Parser, stmt: *Stmt) !*ast.Node {
var node = try self.allocator.create(Node);
node.* = Node{ .Stmt = stmt };
return node;
}
fn mkStmtExpr(self: *Parser, expr: *Expr) !*Stmt {
var stmt = try self.allocator.create(Stmt);
stmt.* = Stmt{ .Expr = expr };
return stmt;
}
fn mkGrouping(self: *Parser, expr: *Expr) !*ast.Expr {
var grouping = try self.allocator.create(Expr);
grouping.* = Expr{ .Grouping = expr };
return grouping;
}
fn mkUnary(self: *Parser, op: Token, right: *Expr) !*Expr {
std.debug.warn("Unary\n");
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Unary = ast.UnaryExpr{
.op = op,
.right = right,
},
};
return expr;
}
fn mkBinary(self: *Parser, left: *Expr, op: Token, right: *Expr) !*Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Binary = ast.BinaryExpr{
.left = left,
.op = op,
.right = right,
},
};
return expr;
}
fn mkLogical(self: *Parser, left: *Expr, op: Token, right: *Expr) !*Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Logical = ast.LogicalExpr{
.left = left,
.op = op,
.right = right,
},
};
return expr;
}
fn mkAssign(self: *Parser, name: Token, value: *Expr) !*Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Assign = ast.AssignExpr{
.name = name,
.value = value,
},
};
return expr;
}
fn mkVarDecl(self: *@This(), name: Token, value: *Expr, mutable: bool) !*Expr {
var vardecl = try self.allocator.create(Expr);
vardecl.* = Expr{
.VarDecl = ast.VarDecl{
.assign = ast.AssignExpr{
.name = name,
.value = value,
},
.mutable = mutable,
},
};
return vardecl;
}
fn mkCall(self: *@This(), callee: *Expr, paren: Token, args: ast.ExprList) !*Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Call = ast.CallExpr{
.callee = callee,
.paren = paren,
.arguments = args,
},
};
return expr;
}
fn mkStructExpr(self: *@This(), name: Token, args: ast.StructInitList) !*Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Struct = ast.StructExpr{
.name = name,
.inits = args,
},
};
return expr;
}
fn mkBool(self: *Parser, val: bool) !*ast.Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Literal = ast.LiteralExpr{
.Bool = val,
},
};
return expr;
}
fn mkInteger(self: *Parser, val: []const u8) !*ast.Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Literal = ast.LiteralExpr{
.Integer = val,
},
};
return expr;
}
fn mkFloat(self: *Parser, val: []const u8) !*ast.Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Literal = ast.LiteralExpr{
.Float = val,
},
};
return expr;
}
fn mkString(self: *Parser, val: []const u8) !*ast.Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{
.Literal = ast.LiteralExpr{
.String = val,
},
};
return expr;
}
fn mkVariable(self: *Parser, variable: Token) !*ast.Expr {
var expr = try self.allocator.create(Expr);
expr.* = Expr{ .Variable = variable };
return expr;
}
pub fn parse(self: *Parser) !*ast.Node {
var root = try Node.mkRoot(self.allocator);
var token_opt: ?Token = null;
while (true) {
if (token_opt == null) {
token_opt = try self.nextToken();
} else {
token_opt = self.peek();
}
var token = token_opt.?;
if (token.ttype == .EOF) break;
var node = try self.parseTopDecl();
try root.Root.append(node);
}
if (self.hadError) {
return error.ParseError;
}
return root;
}
fn parseFnDecl(self: *@This()) !*Node {
var param_list = ast.ParamList.init(self.allocator);
errdefer param_list.deinit();
_ = try self.consumeSingle(.Fn);
const name = try self.consumeSingle(.Identifier);
_ = try self.consumeSingle(.LeftParen);
while (self.peek().ttype != .RightParen) {
const param_name = try self.consumeSingle(.Identifier);
// TODO dedicated function to consume a type
const param_type = try self.consumeSingle(.Identifier);
try param_list.append(ast.ParamDecl{
.name = param_name,
.typ = param_type,
});
}
_ = try self.consumeSingle(.RightParen);
// TODO dedicated function to consume a type
const return_type = try self.consumeSingle(.Identifier);
var block_node = try self.parseBlock();
return try self.mkFnDecl(name, param_list, return_type, block_node.Block);
}
fn parseConstDecl(self: *@This()) !*Node {
var consts = ast.ConstList.init(self.allocator);
errdefer consts.deinit();
_ = try self.consumeSingle(.Const);
_ = try self.consumeSingle(.LeftParen);
while (self.peek().ttype != .RightParen) {
const const_name = try self.consumeSingle(.Identifier);
_ = try self.consumeSingle(.Equal);
var expr = try self.parseExpr();
try consts.append(ast.SingleConst{
.name = const_name,
.expr = expr.Expr,
});
}
_ = try self.consumeSingle(.RightParen);
return self.mkConstDecl(consts);
}
fn parseStructDecl(self: *@This()) !*Node {
var fields = ast.FieldList.init(self.allocator);
errdefer fields.deinit();
_ = try self.consumeSingle(.Struct);
var name = try self.consumeSingle(.Identifier);
_ = try self.consumeSingle(.LeftBrace);
while (!self.check(.RightBrace)) {
// TODO mut and pub
const field_name = try self.consumeSingle(.Identifier);
const field_type = try self.consumeSingle(.Identifier);
try fields.append(ast.StructField{
.name = field_name,
.typ = field_type,
});
}
_ = try self.consumeSingle(.RightBrace);
return Node.mkStructDecl(self.allocator, name, fields);
}
fn parseTopDecl(self: *@This()) !*Node {
return switch (self.peek().ttype) {
.Fn => try self.parseFnDecl(),
.Const => try self.parseConstDecl(),
.Struct => try self.parseStructDecl(),
else => |ttype| blk: {
self.doError("(basic) expected fn/const, got {}\n", ttype);
return Result.CompileError;
},
};
}
fn parseBlock(self: *@This()) !*Node {
var stmts = ast.StmtList.init(self.allocator);
errdefer stmts.deinit();
_ = try self.consumeSingle(.LeftBrace);
while (self.peek().ttype != .RightBrace) {
var stmt = try self.parseDecl();
printer.printNode(try self.mkStmt(stmt), 0);
try stmts.append(stmt);
}
_ = try self.consumeSingle(.RightBrace);
return try self.mkBlock(stmts);
}
fn parseDecl(self: *@This()) !*Stmt {
return try self.parseStmt();
}
fn parseStmt(self: *@This()) anyerror!*Stmt {
return switch (self.peek().ttype) {
.If => try self.parseIfStmt(),
.Loop => try self.parseLoop(),
.Println => try self.parsePrintln(),
.Return => try self.parseReturn(),
// TODO make newlines tokens and consume newline?
else => try self.parseStmtExpr(),
};
}
/// Copy of parseBlock for blocks in statements
fn parseStmtBlock(self: *@This()) !ast.Block {
var block = ast.Block.init(self.allocator);
errdefer block.deinit();
_ = try self.consumeSingle(.LeftBrace);
while (self.peek().ttype != .RightBrace) {
var stmt = try self.parseDecl();
printer.printNode(try self.mkStmt(stmt), 0);
try block.append(stmt);
}
_ = try self.consumeSingle(.RightBrace);
return block;
}
fn parseIfStmt(self: *@This()) !*Stmt {
_ = try self.consumeSingle(.If);
var condition = (try self.parseExpr()).Expr;
const then_branch = try self.parseStmtBlock();
var else_branch: ?ast.Block = null;
if (self.check(.Else)) {
_ = try self.consumeSingle(.Else);
else_branch = try self.parseStmtBlock();
}
return try Stmt.mkIfStmt(
self.allocator,
condition,
then_branch,
else_branch,
);
}
fn parseLoop(self: *@This()) !*Stmt {
_ = try self.consumeSingle(.Loop);
var expr: ?*Expr = null;
var body: ast.Block = undefined;
// infinite loop
if (self.check(.LeftBrace)) {
body = try self.parseStmtBlock();
} else {
expr = (try self.parseExpr()).Expr;
body = try self.parseStmtBlock();
}
return try Stmt.mkLoop(self.allocator, expr, body);
}
fn parseReturn(self: *@This()) !*Stmt {
const tok = try self.consumeSingle(.Return);
const expr = (try self.parseExpr()).Expr;
return try Stmt.mkReturn(self.allocator, tok, expr);
}
fn parsePrintln(self: *@This()) !*Stmt {
_ = try self.consumeSingle(.Println);
_ = try self.consumeSingle(.LeftParen);
var expr = (try self.parseExpr()).Expr;
_ = try self.consumeSingle(.RightParen);
return try Stmt.mkPrintln(self.allocator, expr);
}
fn parseStmtExpr(self: *@This()) !*Stmt {
var expr = (try self.parseExpr()).Expr;
return try self.mkStmtExpr(expr);
}
fn parseExpr(self: *@This()) anyerror!*Node {
var expr: *Expr = try self.parseAssignment();
return self.mkExpr(expr);
}
fn parseAssignment(self: *@This()) anyerror!*Expr {
// there can be two types coming out of this function:
// - a mutable/immutable variable declaration with :=
// - an assignment to a variable with =
// one is a statement, other is an expression. since the normal result
// of this is an Expr, we wrap variable assignments in an Expr as well.
var mutable: bool = false;
std.debug.warn("start assignment pass with cur={}\n", self.peek());
if (self.check(.Mut)) {
_ = try self.consumeSingle(.Mut);
mutable = true;
}
var expr = try self.parseOr();
std.debug.warn("lvalue: {}, cur: {}\n", expr, self.peek());
var value: *Expr = undefined;
var op: Token = undefined;
if (self.check(.ColonEqual) or self.check(.Equal)) {
op = self.peek();
_ = try self.nextToken();
value = try self.parseAssignment();
if (ast.ExprType(expr.*) != .Variable) {
self.doError("Invalid assignment target");
return Result.CompileError;
}
switch (op.ttype) {
.ColonEqual => return try self.mkVarDecl(expr.Variable, value, mutable),
.Equal => return try self.mkAssign(expr.Variable, value),
else => unreachable,
}
}
return expr;
}
fn parseOr(self: *@This()) !*Expr {
var expr = try self.parseAnd();
while (self.check(.Or)) {
var op = self.peek();
_ = try self.nextToken();
var right = try self.parseAnd();
expr = try self.mkLogical(expr, op, right);
}
return expr;
}
fn parseAnd(self: *@This()) !*Expr {
var expr = try self.parseEquality();
while (self.check(.And)) {
var op = self.peek();
_ = try self.nextToken();
var right = try self.parseEquality();
expr = try self.mkLogical(expr, op, right);
}
return expr;
}
fn parseEquality(self: *@This()) !*Expr {
var expr = try self.parseComparison();
while (self.check(.EqualEqual)) {
var op = self.peek();
_ = try self.nextToken();
var right = try self.parseComparison();
expr = try self.mkBinary(expr, op, right);
}
return expr;
}
fn parseComparison(self: *@This()) !*Expr {
var expr = try self.parseAddition();
while (self.compareAnyOf(&[_]TokenType{
.Greater,
.GreaterEqual,
.Less,
.LessEqual,
})) {
var op = self.peek();
_ = try self.nextToken();
var right = try self.parseAddition();
expr = try self.mkBinary(expr, op, right);
}
return expr;
}
fn parseAddition(self: *@This()) !*Expr {
var expr = try self.parseMultiplication();
while (self.compareAnyOf(&[_]TokenType{
.Minus, .Plus,
})) {
var op = self.peek();
_ = try self.nextToken();
var right = try self.parseMultiplication();
expr = try self.mkBinary(expr, op, right);
}
return expr;
}
fn parseMultiplication(self: *@This()) !*Expr {
var expr = try self.parseUnary();
while (self.compareAnyOf(&[_]TokenType{
.Star, .Slash,
})) {
var op = self.peek();
_ = try self.nextToken();
var right = try self.parseUnary();
expr = try self.mkBinary(expr, op, right);
}
return expr;
}
fn parseUnary(self: *@This()) anyerror!*Expr {
if (self.compareAnyOf(&[_]TokenType{ .Bang, .Minus })) {
var op = self.previous();
var right = try self.parseUnary();
return try self.mkUnary(op, right);
}
var expr = try self.parseCall();
return expr;
}
fn parseCall(self: *@This()) !*Expr {
// we parse a primary expression instead of consuming a .Identifier
// since parseCall is connected to the rest of the parser. doing
// identifiers would break the rest of the rules that want primaries.
// nothing stops us from ensuring expr is a Variable though ;)
var expr = try self.parsePrimary();
while (true) {
std.debug.warn("maybe fncall / struct: {}\n", self.peek().ttype);
printer.printExpr(expr);
if (self.check(.LeftParen)) {
if (ast.ExprType(expr.*) != .Variable) {
self.doError("cannot call non-variable {}", ast.ExprType(expr.*));
return Result.CompileError;
}
_ = try self.consumeSingle(.LeftParen);
expr = try self.finishCall(expr);
} else if (self.check(.Dot)) {
_ = try self.consumeSingle(.Dot);
_ = try self.consumeSingle(.LeftBrace);
expr = try self.finishStructVal(expr);
} else {
break;
}
}
return expr;
}
fn finishCall(self: *@This(), callee: *Expr) !*Expr {
var args = ast.ExprList.init(self.allocator);
errdefer args.deinit();
if (!self.check(.RightParen)) {
// emulating do-while really badly
var arg = (try self.parseExpr()).Expr;
try args.append(arg);
while (self.check(.Comma)) {
_ = try self.consumeSingle(.Comma);
arg = (try self.parseExpr()).Expr;
try args.append(arg);
}
}
var paren = try self.consume(.RightParen, "Expected ')' after arguments");
return self.mkCall(callee, paren, args);
}
fn finishStructVal(self: *@This(), expr: *Expr) !*Expr {
// <expr>{a: 10 b: 10}
// for this to work properly, <expr> must be Variable, since its a type.
if (ast.ExprType(expr.*) != .Variable) {
self.doError("Expected variable for struct type, got {}", ast.ExprType(expr.*));
return Result.CompileError;
}
var inits = ast.StructInitList.init(self.allocator);
errdefer inits.deinit();
while (!self.check(.RightBrace)) {
const field_name = try self.consumeSingle(.Identifier);
// TODO check .Comma for the quick initialization {val,val,val}
_ = try self.consumeSingle(.Colon);
const field_value = (try self.parseExpr()).Expr;
try inits.append(ast.StructInit{
.field = field_name,
.expr = field_value,
});
}
_ = try self.consumeSingle(.RightBrace);
return try self.mkStructExpr(expr.Variable, inits);
}
fn parsePrimary(self: *@This()) !*Expr {
const curtype = self.peek().ttype;
const lexeme = self.peek().lexeme;
var expr = switch (curtype) {
.False => try self.mkBool(false),
.True => try self.mkBool(true),
.Integer => try self.mkInteger(lexeme),
.Float => try self.mkFloat(lexeme),
.String => try self.mkString(lexeme),
.Identifier => try self.mkVariable(self.peek()),
.LeftParen => blk: {
_ = try self.nextToken();
var expr = (try self.parseExpr()).Expr;
_ = try self.consume(.RightParen, "Expected ')' after expression");
// for groupings, we don't want to skip tokens as we already
// consumed RightParen.
return try self.mkGrouping(expr);
},
else => blk: {
self.doError("expected literal, got {}", curtype);
return Result.CompileError;
},
};
_ = try self.nextToken();
return expr;
}
};