From f70f2ea1aecf43eeac2f61cc50d3b526ab5dde89 Mon Sep 17 00:00:00 2001 From: Luna Date: Wed, 5 Jun 2019 22:06:12 -0300 Subject: [PATCH] add basic parser --- src/ast.zig | 10 ++ src/errors.zig | 14 +++ src/main.zig | 4 + src/parser.zig | 237 ++++++++++++++++++++++++++++++++++++++++++------ src/runner.zig | 30 +++--- src/scanner.zig | 6 ++ src/tokens.zig | 3 + 7 files changed, 263 insertions(+), 41 deletions(-) create mode 100644 src/errors.zig diff --git a/src/ast.zig b/src/ast.zig index 7588cc7..c9d44e8 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -6,6 +6,9 @@ pub const ExprType = enum { Binary, Grouping, Number, + Bool, + Nil, + String, }; pub const Expr = union(ExprType) { @@ -13,6 +16,9 @@ pub const Expr = union(ExprType) { Binary: BinaryExpr, Grouping: Grouping, Number: Number, + Bool: Bool, + Nil: Nil, + String: String, }; pub const UnaryExpr = struct { @@ -91,3 +97,7 @@ pub fn mkNum(comptime T: type, num: T) Expr { return expr; } + +pub const Bool = bool; +pub const Nil = void; +pub const String = []u8; diff --git a/src/errors.zig b/src/errors.zig new file mode 100644 index 0000000..201afb0 --- /dev/null +++ b/src/errors.zig @@ -0,0 +1,14 @@ +const std = @import("std"); +pub fn report(line: usize, where: []const u8, message: []const u8) void { + std.debug.warn("[line {}] Error{}: {}", line, where, message); +} + +pub fn reportN(line: usize, message: []const u8) void { + report(line, "", message); +} + +pub fn reportFmt(line: usize, comptime fmt: []const u8, args: ...) void { + std.debug.warn("[line {}] Error", line); + std.debug.warn(fmt, args); + std.debug.warn("\n"); +} diff --git a/src/main.zig b/src/main.zig index b9ca981..96c4c58 100644 --- a/src/main.zig +++ b/src/main.zig @@ -6,6 +6,7 @@ const Allocator = std.mem.Allocator; pub const Result = error{ Ok, + ScannerError, CompileError, }; @@ -53,6 +54,9 @@ fn runPrompt(allocator: *Allocator) !void { run(allocator, line) catch |err| { switch (err) { Result.Ok => {}, + Result.ScannerError => blk: { + try stdout.print("scanner error.\n"); + }, Result.CompileError => blk: { try stdout.print("compile error.\n"); }, diff --git a/src/parser.zig b/src/parser.zig index 5144bb5..2e84369 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -3,19 +3,22 @@ const scanners = @import("scanner.zig"); const main = @import("main.zig"); const ast = @import("ast.zig"); const tokens = @import("tokens.zig"); +const err = @import("errors.zig"); const Allocator = std.mem.Allocator; const Scanner = scanners.Scanner; -const AstNode = ast.AstNode; const Token = tokens.Token; const TokenType = tokens.TokenType; const Result = main.Result; +const Expr = ast.Expr; + pub const Parser = struct { allocator: *Allocator, scanner: *Scanner, - current: Token = undefined, - root: AstNode = undefined, + + tokens: []Token = undefined, + current: usize = 0, pub fn init(allocator: *Allocator, scanner: *Scanner) Parser { return Parser{ .allocator = allocator, .scanner = scanner }; @@ -29,45 +32,223 @@ pub const Parser = struct { return Result.CompileError; } - fn advance(self: *Parser) !void { - var tok_opt = try self.scanner.nextToken(); - if (tok_opt) |tok| { - self.current = tok; - } + fn peek(self: *Parser) Token { + return self.tokens[self.current]; } - fn accept(self: *Parser, ttype: TokenType) !bool { - if (self.current.ttype == ttype) { - try self.advance(); - return true; + fn previous(self: *Parser) Token { + return self.tokens[self.current - 1]; + } + + fn tokenError(self: *Parser, token: Token, msg: []const u8) Result!void { + if (token.ttype == .EOF) { + err.report(token.line, " at end", msg); } else { - return false; + err.reportFmt(token.line, " at '{}': {}", token.lexeme, msg); + } + + return Result.CompileError; + } + + fn synchronize(self: *Parser) void { + _ = self.advance(); + while (!self.isAtEnd()) { + if (self.previous().ttype == .Semicolon) return; + + switch (self.peek().ttype) { + .Struct, .Fn, .For, .If, .Return => return, + else => {}, + } + + _ = self.advance(); } } - fn expect(self: *Parser, ttype: TokenType) !void { - if (!try self.accept(ttype)) { - try self.doError("expected {x}, got {}", ttype, self.current.ttype); + fn isAtEnd(self: *Parser) bool { + return self.peek().ttype == .EOF; + } + + fn advance(self: *Parser) Token { + if (!self.isAtEnd()) self.current += 1; + return self.previous(); + } + + fn check(self: *Parser, ttype: TokenType) bool { + if (self.isAtEnd()) return false; + return self.peek().ttype == ttype; + } + + fn match(self: *Parser, ttypes: []TokenType) bool { + for (ttypes) |ttype| { + if (self.check(ttype)) { + _ = self.advance(); + return true; + } } + + return false; } - fn statement(self: *Parser) !void {} + fn matchSingle(self: *Parser, ttype: TokenType) bool { + if (self.check(ttype)) { + _ = self.advance(); + return true; + } - fn block(self: *Parser) !void {} - - fn program(self: *Parser) !void { - try self.advance(); - try self.block(); - try self.expect(.EOF); + return false; } - pub fn parse(self: *Parser) !AstNode { - self.root = AstNode{ - .Program = try self.allocator.alloc(AstNode, 0), + fn consume(self: *Parser, ttype: TokenType, comptime msg: []const u8) Result!Token { + if (self.check(ttype)) return self.advance(); + + try self.tokenError(self.peek(), msg); + return Result.CompileError; + } + + fn equality(self: *Parser) !Expr { + var expr = try self.comparison(); + + while (self.match(&[]TokenType{ TokenType.BangEqual, TokenType.EqualEqual })) { + var operator = self.previous(); + var right = try self.comparison(); + + expr = ast.mkBinary(&expr, operator, &right); + } + + return expr; + } + + fn comparison(self: *Parser) !Expr { + var expr = try self.addition(); + + while (self.match(&[]TokenType{ + TokenType.Greater, + TokenType.GreaterEqual, + TokenType.Less, + TokenType.LessEqual, + })) { + var operator = self.previous(); + var right = try self.addition(); + expr = ast.mkBinary(&expr, operator, &right); + } + + return expr; + } + + fn addition(self: *Parser) !Expr { + var expr = try self.multiplication(); + + while (self.match(&[]TokenType{ TokenType.Minus, TokenType.Plus })) { + var operator = self.previous(); + var right = try self.multiplication(); + expr = ast.mkBinary(&expr, operator, &right); + } + + return expr; + } + + fn multiplication(self: *Parser) anyerror!Expr { + var expr = try self.unary(); + + while (self.match(&[]TokenType{ TokenType.Slash, TokenType.Star })) { + var operator = self.previous(); + var right = try self.unary(); + expr = ast.mkBinary(&expr, operator, &right); + } + + return expr; + } + + fn unary(self: *Parser) anyerror!Expr { + if (self.match(&[]TokenType{ TokenType.Bang, TokenType.Minus })) { + var operator = self.previous(); + var right = try self.unary(); + return ast.mkUnary(operator, &right); + } + + return try self.primary(); + } + + fn doInt(self: *Parser) !Expr { + var token = self.previous(); + + // try to parse it as an i32 first, if that fails, do i64. + + var num_32 = std.fmt.parseInt(i32, token.lexeme, 10) catch |pi_err| { + if (pi_err == error.Overflow) { + var num_64 = std.fmt.parseInt(i64, token.lexeme, 10) catch |pi_err2| { + if (pi_err2 == error.Overflow) { + try self.tokenError(token, "Failed to parse number: overflow"); + return Result.CompileError; + } else { + return pi_err2; + } + }; + + return ast.mkNum(i64, num_64); + } else { + return pi_err; + } }; - try self.program(); + return ast.mkNum(i32, num_32); + } - return self.root; + fn primary(self: *Parser) !Expr { + if (self.matchSingle(TokenType.False)) return ast.Expr{ .Bool = false }; + if (self.matchSingle(TokenType.True)) return ast.Expr{ .Bool = true }; + if (self.matchSingle(TokenType.None)) return ast.Expr{ .Nil = {} }; + + if (self.matchSingle(TokenType.Integer)) { + return try self.doInt(); + } + + if (self.matchSingle(TokenType.String)) { + var lexeme = self.previous().lexeme; + var slice = try self.allocator.alloc(u8, lexeme.len); + std.mem.copy(u8, slice, lexeme); + + return ast.Expr{ .String = slice }; + } + + if (self.matchSingle(TokenType.LeftParen)) { + var expr = try self.expression(); + _ = try self.consume(.RightParen, "Expect ')' after expression"); + + return ast.mkGrouping(&expr); + } + + try self.tokenError(self.peek(), "Expect expression"); + return Result.CompileError; + } + + fn expression(self: *Parser) !Expr { + return try self.equality(); + } + + pub fn parse(self: *Parser) !?*Expr { + self.tokens = try self.allocator.alloc(Token, 0); + var i: usize = 0; + + while (true) { + var tok_opt = try self.scanner.nextToken(); + if (tok_opt) |token| { + self.tokens = try self.allocator.realloc(self.tokens, i + 1); + self.tokens[i] = token; + i += 1; + + if (token.ttype == .EOF) break; + } + } + + std.debug.warn("{} tokens\n", i); + + //return Expr{ .Number = ast.Number{ .Integer32 = 69 } }; + //return self.root; + var expr = self.expression() catch |parse_err| { + return null; + }; + + return &expr; } }; diff --git a/src/runner.zig b/src/runner.zig index 209842d..c33eef5 100644 --- a/src/runner.zig +++ b/src/runner.zig @@ -28,7 +28,7 @@ pub const Runner = struct { err, ); - return Result.CompileError; + return Result.ScannerError; }; if (tok_opt) |tok| { @@ -43,20 +43,24 @@ pub const Runner = struct { try self.testScanner(&scanner); scanner = scanners.Scanner.init(self.allocator, code); - //var parser = Parser.init(self.allocator, &scanner); - //var tree = try parser.parse(); + var parser = Parser.init(self.allocator, &scanner); + var expr_opt = try parser.parse(); - var expr = ast.mkBinary( - &ast.mkUnary( - tokens.Token{ .ttype = .Minus, .lexeme = "-", .line = 1 }, - &ast.mkNum(i32, 123), - ), - tokens.Token{ .ttype = .Star, .lexeme = "*", .line = 1 }, - &ast.mkGrouping(&ast.mkNum(f32, 45.67)), - ); + //var expr = ast.mkBinary( + // &ast.mkUnary( + // tokens.Token{ .ttype = .Minus, .lexeme = "-", .line = 1 }, + // &ast.mkNum(i32, 123), + // ), + // tokens.Token{ .ttype = .Star, .lexeme = "*", .line = 1 }, + // &ast.mkGrouping(&ast.mkNum(f32, 45.67)), + //); - printer.printAst(&expr); - std.debug.warn("\n"); + if (expr_opt) |expr_ptr| { + printer.printAst(expr_ptr); + std.debug.warn("\n"); + } else { + return Result.CompileError; + } return Result.Ok; } diff --git a/src/scanner.zig b/src/scanner.zig index 68a0c20..0a632ee 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -46,6 +46,9 @@ const keywords = [][]const u8{ "return", "struct", "type", + "true", + "false", + "None", }; const keyword_ttypes = []TokenType{ @@ -70,6 +73,9 @@ const keyword_ttypes = []TokenType{ .Return, .Struct, .Type, + .True, + .False, + .None, }; fn getKeyword(keyword: []const u8) ?TokenType { diff --git a/src/tokens.zig b/src/tokens.zig index 20ab7e3..57973de 100644 --- a/src/tokens.zig +++ b/src/tokens.zig @@ -67,6 +67,9 @@ pub const TokenType = enum { Return, Struct, Type, + True, + False, + None, EOF, };