From 927c0f6a1c4d11eff9084d8bc20c7bb5403ad680 Mon Sep 17 00:00:00 2001 From: Luna Date: Wed, 18 Sep 2019 15:59:11 -0300 Subject: [PATCH] add vig's parser --- src/ast.zig | 311 ++++++++++++ src/ast_printer.zig | 288 +++++++++++ src/errors.zig | 15 + src/main.zig | 16 +- src/parsers.zig | 1107 +++++++++++++++++++++++++++++++++++++++++++ src/scanners.zig | 6 + 6 files changed, 1737 insertions(+), 6 deletions(-) create mode 100644 src/ast.zig create mode 100644 src/ast_printer.zig create mode 100644 src/errors.zig create mode 100644 src/parsers.zig diff --git a/src/ast.zig b/src/ast.zig new file mode 100644 index 0000000..5818a37 --- /dev/null +++ b/src/ast.zig @@ -0,0 +1,311 @@ +const std = @import("std"); +const tokens = @import("tokens.zig"); +const Token = tokens.Token; + +pub const NodeList = std.ArrayList(*Node); +pub const StmtList = std.ArrayList(*Stmt); +pub const ExprList = std.ArrayList(*Expr); +pub const TokenList = std.ArrayList(Token); +pub const ParamList = std.ArrayList(ParamDecl); +pub const ConstList = std.ArrayList(SingleConst); + +pub const NodeType = enum { + Root, + FnDecl, + ConstDecl, + Struct, + Enum, + Block, + Stmt, +}; + +pub const ParamDecl = struct { + name: Token, + typ: Token, +}; + +pub const MethodData = struct { + variable: Token, + typ: Token, + mutable: bool, +}; + +pub const FnDecl = struct { + func_name: Token, + params: ParamList, + return_type: Token, + body: StmtList, + method: ?*MethodData, +}; + +pub const SingleConst = struct { + name: Token, + expr: *Expr, +}; + +pub const BinaryExpr = struct { + left: *Expr, + op: Token, + right: *Expr, +}; + +pub const UnaryExpr = struct { + op: Token, + right: *Expr, +}; + +// looks like a BinaryExpr, but is not a BinaryExpr +pub const LogicalExpr = struct { + left: *Expr, + op: Token, + right: *Expr, +}; + +pub const LiteralExpr = union(enum) { + Bool: bool, + Integer: []const u8, + Float: []const u8, + String: []const u8, + Array: ExprList, +}; + +pub const AssignExpr = struct { + name: Token, + value: *Expr, +}; + +pub const ExprType = enum { + Assign, + + // vardecls as expressions is a hack + VarDecl, + + Binary, + Unary, + Logical, + Literal, + Variable, + Call, + Struct, + + Grouping, + Get, + Set, +}; + +pub const VarDecl = struct { + assign: AssignExpr, + mutable: bool = false, +}; + +pub const CallExpr = struct { + callee: *Expr, + paren: Token, + arguments: ExprList, +}; + +pub const StructInit = struct { + field: Token, + expr: *Expr, +}; + +pub const StructInitList = std.ArrayList(StructInit); + +pub const StructExpr = struct { + name: Token, + inits: StructInitList, +}; + +pub const GetExpr = struct { + struc: *Expr, + name: Token, +}; + +pub const SetExpr = struct { + struc: *Expr, + field: Token, + value: *Expr, +}; + +pub const Expr = union(ExprType) { + Assign: AssignExpr, + VarDecl: VarDecl, + + Binary: BinaryExpr, + Unary: UnaryExpr, + Logical: LogicalExpr, + Literal: LiteralExpr, + Struct: StructExpr, + + Variable: Token, + Grouping: *Expr, + Call: CallExpr, + + Get: GetExpr, + Set: SetExpr, +}; + +pub const Block = std.ArrayList(*Stmt); + +pub const IfStmt = struct { + condition: *Expr, + then_branch: Block, + else_branch: ?Block, +}; + +pub const LoopStmt = struct { + condition: ?*Expr, + then_branch: Block, +}; + +pub const ForStmt = struct { + index: ?Token, + value: Token, + array: Token, + block: Block, +}; + +pub const Stmt = union(enum) { + Expr: *Expr, + Println: *Expr, + + If: IfStmt, + Loop: LoopStmt, + For: ForStmt, + + Return: ReturnStmt, + + pub const ReturnStmt = struct { + keyword: Token, + value: *Expr, + }; + + pub fn mkPrintln(allocator: *std.mem.Allocator, expr: *Expr) !*Stmt { + var stmt = try allocator.create(Stmt); + stmt.* = Stmt{ .Println = expr }; + return stmt; + } + + pub fn mkIfStmt( + allocator: *std.mem.Allocator, + condition: *Expr, + then: Block, + else_branch: ?Block, + ) !*Stmt { + var stmt = try allocator.create(Stmt); + stmt.* = Stmt{ + .If = IfStmt{ + .condition = condition, + .then_branch = then, + .else_branch = else_branch, + }, + }; + + return stmt; + } + + pub fn mkLoop( + allocator: *std.mem.Allocator, + condition: ?*Expr, + then: Block, + ) !*Stmt { + var stmt = try allocator.create(Stmt); + stmt.* = Stmt{ + .Loop = LoopStmt{ + .condition = condition, + .then_branch = then, + }, + }; + + return stmt; + } + + pub fn mkFor(allocator: *std.mem.Allocator, index: ?Token, value: Token, array: Token, block: Block) !*Stmt { + var stmt = try allocator.create(Stmt); + stmt.* = Stmt{ + .For = ForStmt{ + .index = index, + .value = value, + .array = array, + .block = block, + }, + }; + + return stmt; + } + + pub fn mkReturn(allocator: *std.mem.Allocator, tok: Token, value: *Expr) !*Stmt { + var stmt = try allocator.create(Stmt); + stmt.* = Stmt{ + .Return = ReturnStmt{ + .keyword = tok, + .value = value, + }, + }; + + return stmt; + } +}; + +pub const FieldList = std.ArrayList(StructField); + +pub const StructField = struct { + name: Token, + typ: Token, + + mutable: bool = false, + public: bool = false, + mutable_outside: bool = false, +}; + +pub const Struct = struct { + name: Token, + fields: FieldList, +}; + +pub const Enum = struct { + name: Token, + fields: TokenList, +}; + +pub const Node = union(NodeType) { + Root: NodeList, + FnDecl: FnDecl, + ConstDecl: ConstList, + Struct: Struct, + Enum: Enum, + + Block: StmtList, + + Stmt: *Stmt, + + pub fn mkRoot(allocator: *std.mem.Allocator) !*Node { + var node = try allocator.create(Node); + node.* = Node{ .Root = NodeList.init(allocator) }; + return node; + } + + pub fn mkStructDecl(allocator: *std.mem.Allocator, name: Token, fields: FieldList) !*Node { + var node = try allocator.create(Node); + node.* = Node{ + .Struct = Struct{ + .name = name, + .fields = fields, + }, + }; + + return node; + } + + pub fn mkEnumDecl(allocator: *std.mem.Allocator, name: Token, fields: TokenList) !*Node { + var node = try allocator.create(Node); + node.* = Node{ + .Enum = Enum{ + .name = name, + .fields = fields, + }, + }; + + return node; + } +}; diff --git a/src/ast_printer.zig b/src/ast_printer.zig new file mode 100644 index 0000000..b770f4e --- /dev/null +++ b/src/ast_printer.zig @@ -0,0 +1,288 @@ +const std = @import("std"); +const tokens = @import("tokens.zig"); +const Token = tokens.Token; + +usingnamespace @import("ast.zig"); + +const warn = std.debug.warn; + +fn printIdent(ident: usize) void { + var i: usize = 0; + while (i < ident) : (i += 1) { + std.debug.warn("\t"); + } +} + +fn print(ident: usize, comptime fmt: []const u8, args: ...) void { + printIdent(ident); + std.debug.warn(fmt, args); +} + +fn printBlock(ident: usize, block: var, endNewline: bool) void { + std.debug.warn("(\n"); + + for (block.toSlice()) |stmt| { + printIdent(ident); + printStmt(ident, stmt); + std.debug.warn("\n"); + } + + if (endNewline) { + print(ident - 1, ")\n"); + } else { + print(ident - 1, ")"); + } +} + +pub fn printNode(node: *Node, ident: usize) void { + switch (node.*) { + .FnDecl => |decl| { + const name = decl.func_name.lexeme; + + printIdent(ident); + + const ret_type = decl.return_type.lexeme; + + if (decl.method) |method| { + const vari = method.variable.lexeme; + const typ = method.typ.lexeme; + + if (method.mutable) { + warn("(method mut {} {} {} {} ", vari, typ, name, ret_type); + } else { + warn("(method {} {} {} {} ", vari, typ, name, ret_type); + } + } else { + warn("(fn {} {} (", name, ret_type); + } + + for (decl.params.toSlice()) |param| { + warn("({} {}) ", param.name.lexeme, param.typ.lexeme); + } + + printBlock(ident + 1, decl.body, false); + warn(")\n"); + }, + + .ConstDecl => |consts| { + print(ident, "(const (\n"); + + for (consts.toSlice()) |const_decl| { + print( + ident + 1, + "({} ", + const_decl.name.lexeme, + ); + + printExpr(const_decl.expr); + std.debug.warn(")\n"); + } + + print(ident, "))\n"); + }, + + .Enum => |decl| { + print(ident, "(enum {} (\n", decl.name.lexeme); + + for (decl.fields.toSlice()) |field| { + print( + ident + 1, + "{}\n", + field.lexeme, + ); + } + + print(ident, "))\n"); + }, + + .Root => { + for (node.Root.toSlice()) |child| { + printNode(child, ident + 1); + } + }, + + .Stmt => |stmt| { + printIdent(ident); + printStmt(ident, stmt); + std.debug.warn("\n"); + }, + + .Struct => |struc| { + print(ident, "(struct {} (\n", struc.name.lexeme); + for (struc.fields.toSlice()) |field| { + printIdent(ident + 1); + if (field.mutable) { + std.debug.warn("(mut "); + } else { + std.debug.warn("("); + } + + if (field.public) { + std.debug.warn("pub "); + } + + if (field.mutable_outside) { + std.debug.warn("MUT_OUT "); + } + + std.debug.warn("{} {})\n", field.name.lexeme, field.typ.lexeme); + } + print(ident, "))\n"); + }, + + else => { + print(ident, "unknown node: {}\n", node); + }, + } +} + +fn parenthetize(name: []const u8, exprs: []*Expr) void { + std.debug.warn("({}", name); + + for (exprs) |expr| { + std.debug.warn(" "); + printExpr(expr); + } + + std.debug.warn(")"); +} + +pub fn printExpr(expr: *Expr) void { + switch (expr.*) { + .Binary => |binary| parenthetize(binary.op.lexeme, &[_]*Expr{ binary.left, binary.right }), + .Logical => |binary| parenthetize(binary.op.lexeme, &[_]*Expr{ binary.left, binary.right }), + .Unary => |unary| parenthetize(unary.op.lexeme, &[_]*Expr{unary.right}), + .Grouping => |expr_ptr| parenthetize("group", &[_]*Expr{expr_ptr}), + + .Literal => |literal| { + switch (literal) { + .Bool => |val| std.debug.warn("{}", val), + .Integer => |val| std.debug.warn("{}", val), + .Float => |val| std.debug.warn("{}", val), + .String => |val| std.debug.warn("'{}'", val), + .Array => |exprs| { + parenthetize("array", exprs.toSlice()); + }, + else => |typ| std.debug.warn("UnknownLiteral-{}", typ), + } + }, + + .Variable => |token| std.debug.warn("{}", token.lexeme), + + .VarDecl => |decl| { + if (decl.mutable) { + std.debug.warn("(mut "); + } else { + std.debug.warn("("); + } + + std.debug.warn("let {} ", decl.assign.name.lexeme); + printExpr(decl.assign.value); + std.debug.warn(")"); + }, + + .Assign => |assign| { + std.debug.warn("(set "); + std.debug.warn("{} ", assign.name.lexeme); + printExpr(assign.value); + std.debug.warn(")"); + }, + + .Call => |call| { + std.debug.warn("("); + printExpr(call.callee); + + for (call.arguments.toSlice()) |arg| { + std.debug.warn(" "); + printExpr(arg); + } + + std.debug.warn(")"); + }, + + .Struct => |val| { + std.debug.warn("({} (", val.name.lexeme); + + for (val.inits.toSlice()) |init| { + std.debug.warn(" ({} ", init.field.lexeme); + printExpr(init.expr); + std.debug.warn(")"); + } + + std.debug.warn("))"); + }, + + .Get => |get| { + warn("("); + printExpr(get.struc); + warn(".{})", get.name.lexeme); + }, + + .Set => |set| { + warn("(set "); + printExpr(set.struc); + warn(" {} ", set.field.lexeme); + printExpr(set.value); + warn(")"); + }, + + else => std.debug.warn("UnknownExpr-{}", @tagName(expr.*)), + } +} + +pub fn printStmt(ident: usize, stmt: *Stmt) void { + switch (stmt.*) { + .Println => |expr| parenthetize("println", &[_]*Expr{expr}), + .Expr => |expr| printExpr(expr), + + .If => |ifstmt| { + std.debug.warn("(if "); + printExpr(ifstmt.condition); + std.debug.warn(" "); + + printBlock(ident + 1, ifstmt.then_branch, false); + if (ifstmt.else_branch) |else_branch| { + std.debug.warn(" else "); + printBlock(ident + 1, else_branch, false); + } + + std.debug.warn(")\n"); + }, + + .Loop => |loop| { + std.debug.warn("(loop "); + if (loop.condition) |cond| { + printExpr(cond); + } else { + std.debug.warn("true"); + } + std.debug.warn(" "); + + printBlock(ident + 1, loop.then_branch, false); + std.debug.warn(")\n"); + }, + + .For => |forstmt| { + std.debug.warn("(for "); + + if (forstmt.index) |index| { + std.debug.warn("({} {}) ", index.lexeme, forstmt.value.lexeme); + } else { + std.debug.warn("{} ", forstmt.value.lexeme); + } + + std.debug.warn("{} ", forstmt.array.lexeme); + + printBlock(ident + 1, forstmt.block, false); + std.debug.warn(")\n"); + }, + + .Return => |ret| { + std.debug.warn("(return "); + printExpr(ret.value); + std.debug.warn(")\n"); + }, + + else => std.debug.warn("UnknownStmt-{}", @tagName(stmt.*)), + } +} diff --git a/src/errors.zig b/src/errors.zig new file mode 100644 index 0000000..c7e9c2b --- /dev/null +++ b/src/errors.zig @@ -0,0 +1,15 @@ +const std = @import("std"); + +pub fn report(line: usize, where: []const u8, message: []const u8) void { + std.debug.warn("[line {}] Error{}: {}", line, where, message); +} + +pub fn reportN(line: usize, message: []const u8) void { + report(line, "", message); +} + +pub fn reportFmt(line: usize, comptime fmt: []const u8, args: ...) void { + std.debug.warn("[line {}] Error", line); + std.debug.warn(fmt, args); + std.debug.warn("\n"); +} diff --git a/src/main.zig b/src/main.zig index 655520e..cedf273 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,6 +1,8 @@ const std = @import("std"); const scanners = @import("scanners.zig"); +const parsers = @import("parsers.zig"); +const printer = @import("ast_printer.zig"); pub const Result = enum { Ok, @@ -9,9 +11,8 @@ pub const Result = enum { CompileError, }; -pub fn run(allocator: *std.mem.Allocator, slice: []const u8) Result { +pub fn run(allocator: *std.mem.Allocator, slice: []const u8) !Result { var scan = scanners.Scanner.init(allocator, slice); - //defer scan.deinit(); // do a full scan pass, then reset, then do it again (with parser) while (true) { @@ -35,8 +36,12 @@ pub fn run(allocator: *std.mem.Allocator, slice: []const u8) Result { // scan.reset(); - //var parser = parsers.Parser.init(allocator, scan); - //defer parser.deinit(); + var parser = parsers.Parser.init(allocator, &scan); + var root = try parser.parse(); + var it = root.Root.iterator(); + + std.debug.warn("parse tree\n"); + //printer.printNode(root, 0); return Result.Ok; } @@ -58,8 +63,7 @@ pub fn main() anyerror!void { _ = try file.read(slice); - const result = run(allocator, slice); - //const result = try run(allocator, slice); + const result = try run(allocator, slice); switch (result) { .Ok => std.os.exit(0), diff --git a/src/parsers.zig b/src/parsers.zig new file mode 100644 index 0000000..04e49d6 --- /dev/null +++ b/src/parsers.zig @@ -0,0 +1,1107 @@ +const std = @import("std"); +const scanners = @import("scanners.zig"); +const main = @import("main.zig"); +const ast = @import("ast.zig"); +const tokens = @import("tokens.zig"); +const err = @import("errors.zig"); +const printer = @import("ast_printer.zig"); + +const Allocator = std.mem.Allocator; +const Scanner = scanners.Scanner; +const Token = tokens.Token; +const TokenType = tokens.TokenType; + +pub const ParseError = error{CompileError}; + +const Node = ast.Node; +const Expr = ast.Expr; +const Stmt = ast.Stmt; + +const TokenList = std.ArrayList(Token); + +const FieldState = struct { + public: bool = false, + mutable: bool = false, + mutable_outside: bool = false, +}; + +pub const Parser = struct { + allocator: *Allocator, + scanner: *Scanner, + tokens: TokenList, + + hadError: bool = false, + + pub fn init(allocator: *Allocator, scanner: *Scanner) Parser { + return Parser{ + .allocator = allocator, + .scanner = scanner, + .tokens = TokenList.init(allocator), + }; + } + + pub fn deinit(self: *@This()) void { + self.tokens.deinit(); + } + + fn doError(self: *Parser, comptime fmt: []const u8, args: ...) void { + self.hadError = true; + + std.debug.warn("parser error at line {}\n\t", self.scanner.line); + std.debug.warn(fmt, args); + std.debug.warn("\n"); + } + + fn peek(self: *Parser) Token { + return self.tokens.at(self.tokens.len - 1); + } + + fn previous(self: *Parser) Token { + return self.tokens.at(self.tokens.len - 2); + } + + fn tokenError(self: *Parser, token: Token, msg: []const u8) ParseError!void { + if (token.typ == .EOF) { + err.report(token.line, " at end", msg); + } else { + err.reportFmt(token.line, " at '{}': {}", token.lexeme, msg); + } + + return ParseError.CompileError; + } + + fn isAtEnd(self: *Parser) bool { + return self.peek().typ == .EOF; + } + + fn check(self: *Parser, typ: TokenType) bool { + if (self.isAtEnd()) return false; + return self.peek().typ == typ; + } + + fn nextToken(self: *Parser) !Token { + var token: Token = undefined; + + while (true) { + var next_token_opt = try self.scanner.nextToken(); + if (next_token_opt) |token_nice| { + token = token_nice; + break; + } + } + + try self.tokens.append(token); + std.debug.warn("skip to {}\n", token); + return token; + } + + /// Consume the current token type, then walk to the next token. + /// Returns the consumed token. + fn consume(self: *Parser, ttype: TokenType, comptime msg: []const u8) !Token { + if (self.check(ttype)) { + var tok = self.peek(); + _ = try self.nextToken(); + return tok; + } + + try self.tokenError(self.peek(), msg); + return ParseError.CompileError; + } + + /// Consume the current token. Gives default error messages + fn consumeSingle(self: *Parser, ttype: TokenType) !Token { + if (self.check(ttype)) { + var cur = self.peek(); + _ = try self.nextToken(); + return cur; + } + + // TODO maybe this could be entirely comptime? + var buf_main: [1000]u8 = undefined; + var buf = try std.fmt.bufPrint( + buf_main[0..], + "expected {}, got {}", + ttype, + self.peek().typ, + ); + + try self.tokenError(self.peek(), buf); + return ParseError.CompileError; + } + + /// check() against multiple tokens + fn compareAnyOf(self: *@This(), ttypes: []TokenType) bool { + for (ttypes) |typ| { + if (self.check(typ)) return true; + } + + return false; + } + + // TODO maybe move helper functions to ast_helper.zig? + + fn mkFnDecl( + self: *Parser, + name: Token, + params: ast.ParamList, + return_type: Token, + block: ast.StmtList, + method: ?*ast.MethodData, + ) !*ast.Node { + var node = try self.allocator.create(Node); + node.* = Node{ + .FnDecl = ast.FnDecl{ + .func_name = name, + .params = params, + .return_type = return_type, + .body = block, + .method = method, + }, + }; + return node; + } + + fn mkConstDecl(self: *Parser, consts: ast.ConstList) !*ast.Node { + var node = try self.allocator.create(Node); + node.* = Node{ .ConstDecl = consts }; + return node; + } + + fn mkBlock(self: *Parser, stmts: ast.StmtList) !*ast.Node { + var node = try self.allocator.create(Node); + node.* = Node{ .Block = stmts }; + return node; + } + + fn mkStmt(self: *Parser, stmt: *Stmt) !*ast.Node { + var node = try self.allocator.create(Node); + node.* = Node{ .Stmt = stmt }; + return node; + } + + fn mkStmtExpr(self: *Parser, expr: *Expr) !*Stmt { + var stmt = try self.allocator.create(Stmt); + stmt.* = Stmt{ .Expr = expr }; + return stmt; + } + + fn mkGrouping(self: *Parser, expr: *Expr) !*ast.Expr { + var grouping = try self.allocator.create(Expr); + grouping.* = Expr{ .Grouping = expr }; + return grouping; + } + + fn mkUnary(self: *Parser, op: Token, right: *Expr) !*Expr { + std.debug.warn("Unary\n"); + + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Unary = ast.UnaryExpr{ + .op = op, + .right = right, + }, + }; + return expr; + } + + fn mkBinary(self: *Parser, left: *Expr, op: Token, right: *Expr) !*Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Binary = ast.BinaryExpr{ + .left = left, + .op = op, + .right = right, + }, + }; + + return expr; + } + + fn mkLogical(self: *Parser, left: *Expr, op: Token, right: *Expr) !*Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Logical = ast.LogicalExpr{ + .left = left, + .op = op, + .right = right, + }, + }; + + return expr; + } + + fn mkAssign(self: *Parser, name: Token, value: *Expr) !*Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Assign = ast.AssignExpr{ + .name = name, + .value = value, + }, + }; + + return expr; + } + + fn mkVarDecl(self: *@This(), name: Token, value: *Expr, mutable: bool) !*Expr { + var vardecl = try self.allocator.create(Expr); + vardecl.* = Expr{ + .VarDecl = ast.VarDecl{ + .assign = ast.AssignExpr{ + .name = name, + .value = value, + }, + .mutable = mutable, + }, + }; + + return vardecl; + } + + fn mkCall(self: *@This(), callee: *Expr, paren: Token, args: ast.ExprList) !*Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Call = ast.CallExpr{ + .callee = callee, + .paren = paren, + .arguments = args, + }, + }; + + return expr; + } + + fn mkStructExpr(self: *@This(), name: Token, args: ast.StructInitList) !*Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Struct = ast.StructExpr{ + .name = name, + .inits = args, + }, + }; + + return expr; + } + + fn mkGet(self: *@This(), struc: *Expr, name: Token) !*Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Get = ast.GetExpr{ + .struc = struc, + .name = name, + }, + }; + + return expr; + } + + fn mkSet(self: *@This(), struc: *Expr, field: Token, value: *Expr) !*Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Set = ast.SetExpr{ + .struc = struc, + .field = field, + .value = value, + }, + }; + + return expr; + } + + fn mkBool(self: *Parser, val: bool) !*ast.Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Literal = ast.LiteralExpr{ + .Bool = val, + }, + }; + + return expr; + } + + fn mkInteger(self: *Parser, val: []const u8) !*ast.Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Literal = ast.LiteralExpr{ + .Integer = val, + }, + }; + + return expr; + } + + fn mkFloat(self: *Parser, val: []const u8) !*ast.Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Literal = ast.LiteralExpr{ + .Float = val, + }, + }; + + return expr; + } + + fn mkString(self: *Parser, val: []const u8) !*ast.Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Literal = ast.LiteralExpr{ + .String = val, + }, + }; + + return expr; + } + + fn mkArray(self: *Parser, exprs: ast.ExprList) !*ast.Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ + .Literal = ast.LiteralExpr{ + .Array = exprs, + }, + }; + + return expr; + } + + fn mkVariable(self: *Parser, variable: Token) !*ast.Expr { + var expr = try self.allocator.create(Expr); + expr.* = Expr{ .Variable = variable }; + + return expr; + } + + pub fn parse(self: *Parser) !*ast.Node { + var root = try Node.mkRoot(self.allocator); + + var token_opt: ?Token = null; + + while (true) { + if (token_opt == null) { + token_opt = try self.nextToken(); + } else { + token_opt = self.peek(); + } + + var token = token_opt.?; + if (token.typ == .EOF) break; + + var node = try self.parseTopDecl(); + try root.Root.append(node); + } + + if (self.hadError) { + return error.ParseError; + } + + return root; + } + + /// Copy a token with a different lexeme. + fn mkToken(self: *@This(), ttype: TokenType, lexeme: []const u8, line: usize) !Token { + const owned_lexeme = try std.mem.dupe(self.allocator, u8, lexeme); + return Token{ + .typ = ttype, + .lexeme = owned_lexeme, + .line = line, + }; + } + + fn parseFnDecl(self: *@This()) !*Node { + var param_list = ast.ParamList.init(self.allocator); + errdefer param_list.deinit(); + + var method: ?*ast.MethodData = null; + + _ = try self.consumeSingle(.Fn); + + if (self.check(.LeftParen)) { + method = try self.parsePreMethod(); + } + + const name = try self.consumeSingle(.Identifier); + + _ = try self.consumeSingle(.LeftParen); + + while (self.peek().typ != .RightParen) { + const param_name = try self.consumeSingle(.Identifier); + const param_type = try self.consumeSingle(.Identifier); + + try param_list.append(ast.ParamDecl{ + .name = param_name, + .typ = param_type, + }); + } + + _ = try self.consumeSingle(.RightParen); + + // the return type is default void if a type + // is not provided + var return_type: Token = undefined; + if (self.check(.Identifier)) { + return_type = try self.consumeSingle(.Identifier); + } else { + return_type = try self.mkToken(.Identifier, "void", name.line); + } + + var block_node = try self.parseBlock(); + return try self.mkFnDecl(name, param_list, return_type, block_node.Block, method); + } + + /// parse the (v [mut] T) part of the method (defined here + /// as a premethod) + fn parsePreMethod(self: *@This()) !?*ast.MethodData { + _ = try self.consumeSingle(.LeftParen); + + var mutable_ref: bool = false; + const variable = try self.consumeSingle(.Identifier); + + if (self.check(.Mut)) { + _ = try self.consumeSingle(.Mut); + mutable_ref = true; + } + + const typ = try self.consumeSingle(.Identifier); + + _ = try self.consumeSingle(.RightParen); + + // create method data and assign the values we got into it + var method = try self.allocator.create(ast.MethodData); + method.* = ast.MethodData{ + .variable = variable, + .typ = typ, + .mutable = mutable_ref, + }; + + return method; + } + + fn parseConstDecl(self: *@This()) !*Node { + var consts = ast.ConstList.init(self.allocator); + errdefer consts.deinit(); + + _ = try self.consumeSingle(.Const); + _ = try self.consumeSingle(.LeftParen); + + while (self.peek().typ != .RightParen) { + const const_name = try self.consumeSingle(.Identifier); + _ = try self.consumeSingle(.Equal); + + // const declarations dont have type, a future type system must + // check the output type of the expression and assign it to the + // const later on. + + var expr = try self.parseExpr(); + try consts.append(ast.SingleConst{ + .name = const_name, + .expr = expr, + }); + } + + _ = try self.consumeSingle(.RightParen); + + return self.mkConstDecl(consts); + } + + fn parseStructDecl(self: *@This()) !*Node { + var fields = ast.FieldList.init(self.allocator); + errdefer fields.deinit(); + + _ = try self.consumeSingle(.Struct); + + var name = try self.consumeSingle(.Identifier); + + _ = try self.consumeSingle(.LeftBrace); + + var field_state = FieldState{}; + + while (!self.check(.RightBrace)) { + try self.parseFieldModifiers(&field_state); + + const field_name = try self.consumeSingle(.Identifier); + const field_type = try self.consumeSingle(.Identifier); + + // we could create a FieldState on the heap and copy our current + // field state into a StructField.state, but copying via this makes + // things so much nicer. + + try fields.append(ast.StructField{ + .name = field_name, + .typ = field_type, + + .mutable = field_state.mutable, + .public = field_state.public, + .mutable_outside = field_state.mutable_outside, + }); + } + + _ = try self.consumeSingle(.RightBrace); + + return Node.mkStructDecl(self.allocator, name, fields); + } + + fn parseFieldModifiers(self: *@This(), field_state: *FieldState) !void { + + // there are five access modifiers: + // - none (private immutable) + // - mut (private mutable) + // - pub (public immutable) + // - pub mut (public mutable only in module) + // - pub mut mut (public mutable everywhere) + + // this function takes care of that by changing the current FieldState + // to what the modifiers dictate. + switch (self.peek().typ) { + .Mut => { + // There are no oher modifiers that start with mut, so we + // can just go the way of marking it as mutable + _ = try self.consumeSingle(.Mut); + _ = try self.consumeSingle(.Colon); + + field_state.mutable = true; + }, + + // 'pub', 'pub mut', and 'pub mut mut' are all handled here + .Pub => { + _ = try self.consumeSingle(.Pub); + field_state.public = true; + + if (self.check(.Mut)) { + _ = try self.consumeSingle(.Mut); + + field_state.mutable = true; + if (self.check(.Mut)) { + _ = try self.consumeSingle(.Mut); + field_state.mutable_outside = true; + } + } + + _ = try self.consumeSingle(.Colon); + }, + + // if it isn't mut or pub we're likely in an identifier, just + // ignore it. + else => return, + } + } + + fn parseEnumDecl(self: *@This()) !*Node { + _ = try self.consumeSingle(.Enum); + + var fields = ast.TokenList.init(self.allocator); + errdefer fields.deinit(); + + const name = try self.consumeSingle(.Identifier); + + _ = try self.consumeSingle(.LeftBrace); + + while (!self.check(.RightBrace)) { + try fields.append(try self.consumeSingle(.Identifier)); + } + + _ = try self.consumeSingle(.RightBrace); + + return try Node.mkEnumDecl(self.allocator, name, fields); + } + + fn parseTopDecl(self: *@This()) !*Node { + return switch (self.peek().typ) { + .Fn => try self.parseFnDecl(), + .Const => try self.parseConstDecl(), + .Struct => try self.parseStructDecl(), + .Enum => try self.parseEnumDecl(), + + else => |typ| blk: { + self.doError("expected Fn, Const, Struct, got {}\n", typ); + return ParseError.CompileError; + }, + }; + } + + fn parseBlockInternal(self: *@This(), comptime T: type) !T { + var stmts = T.init(self.allocator); + errdefer stmts.deinit(); + + _ = try self.consumeSingle(.LeftBrace); + + while (self.peek().typ != .RightBrace) { + var stmt = try self.parseStmt(); + printer.printNode(try self.mkStmt(stmt), 0); + try stmts.append(stmt); + } + + _ = try self.consumeSingle(.RightBrace); + + return stmts; + } + + fn parseStmt(self: *@This()) anyerror!*Stmt { + return switch (self.peek().typ) { + .If => try self.parseIfStmt(), + .Loop => try self.parseLoop(), + .For => try self.parseForStmt(), + .Println => try self.parsePrintln(), + .Return => try self.parseReturn(), + else => try self.parseStmtExpr(), + }; + } + + /// Parse a list of statements. + fn parseBlock(self: *@This()) !*Node { + var stmts = try self.parseBlockInternal(ast.StmtList); + return try self.mkBlock(stmts); + } + + /// parse blocks inside statements + fn parseStmtBlock(self: *@This()) !ast.Block { + var block = try self.parseBlockInternal(ast.Block); + return block; + } + + fn parseIfStmt(self: *@This()) !*Stmt { + _ = try self.consumeSingle(.If); + var condition = try self.parseExpr(); + + const then_branch = try self.parseStmtBlock(); + + var else_branch: ?ast.Block = null; + + if (self.check(.Else)) { + _ = try self.consumeSingle(.Else); + else_branch = try self.parseStmtBlock(); + } + + return try Stmt.mkIfStmt( + self.allocator, + condition, + then_branch, + else_branch, + ); + } + + fn parseForStmt(self: *@This()) !*Stmt { + // There are two types of for in vig's V subset: + // - for x in y + // - for idx, x in y + _ = try self.consumeSingle(.For); + + var index_var: ?Token = null; + var value_var: Token = undefined; + + const subject_1 = try self.consumeSingle(.Identifier); + + if (self.check(.Comma)) { + _ = try self.consumeSingle(.Comma); + + const subject_2 = try self.consumeSingle(.Identifier); + index_var = subject_1; + value_var = subject_2; + } else { + value_var = subject_1; + } + + _ = try self.consumeSingle(.In); + + // MUST be identifier + var array = try self.consumeSingle(.Identifier); + var block = try self.parseStmtBlock(); + + return try Stmt.mkFor( + self.allocator, + index_var, + value_var, + array, + block, + ); + } + + fn parseLoop(self: *@This()) !*Stmt { + _ = try self.consumeSingle(.Loop); + var expr: ?*Expr = null; + var body: ast.Block = undefined; + + // 'loop {' = infinite loop + if (self.check(.LeftBrace)) { + body = try self.parseStmtBlock(); + } else { + expr = try self.parseExpr(); + body = try self.parseStmtBlock(); + } + + return try Stmt.mkLoop(self.allocator, expr, body); + } + + fn parseReturn(self: *@This()) !*Stmt { + const tok = try self.consumeSingle(.Return); + const expr = try self.parseExpr(); + return try Stmt.mkReturn(self.allocator, tok, expr); + } + + fn parsePrintln(self: *@This()) !*Stmt { + _ = try self.consumeSingle(.Println); + + _ = try self.consumeSingle(.LeftParen); + var expr = try self.parseExpr(); + _ = try self.consumeSingle(.RightParen); + + return try Stmt.mkPrintln(self.allocator, expr); + } + + fn parseStmtExpr(self: *@This()) !*Stmt { + var expr = try self.parseExpr(); + return try self.mkStmtExpr(expr); + } + + fn parseExpr(self: *@This()) anyerror!*Expr { + return try self.parseAssignment(); + } + + fn parseAssignment(self: *@This()) anyerror!*Expr { + // there can be two assignments coming out of this function: + // - a mutable/immutable variable declaration with := + // - an assignment to a variable with =, +=, -= + + // one is a statement, other is an expression. since the normal result + // of this is an Expr, we wrap variable assignments in an Expr as well. + var mutable: bool = false; + + if (self.check(.Mut)) { + _ = try self.consumeSingle(.Mut); + mutable = true; + } + + var expr = try self.parseOr(); + + if (self.compareAnyOf(&[_]TokenType{ + .ColonEqual, .Equal, .PlusEqual, .MinusEqual, .StarEqual, + .SlashEqual, + })) { + return try self.finishAssignment(expr, mutable); + } + + return expr; + } + + fn finishAssignment(self: *@This(), expr: *Expr, mutable: bool) !*Expr { + var op = self.peek(); + _ = try self.nextToken(); + var value = try self.parseAssignment(); + + // TODO convert binary's op field from Token to + // something else, maybe enum'd + + const new_op_ttype: TokenType = switch (op.typ) { + .ColonEqual => TokenType.ColonEqual, + .Equal => .Equal, + + .PlusEqual => .Plus, + .MinusEqual => .Minus, + .StarEqual => .Star, + .SlashEqual => .Slash, + + else => unreachable, + }; + + // we create new_lexeme so that + // the AST printer properly prints + // x += 1 + // as + // (set x (+ x 1)) + // and not + // (set x (+= x 1)) + const new_lexeme: []const u8 = switch (op.typ) { + .ColonEqual => ":=", + .Equal => "=", + + .PlusEqual => "+", + .MinusEqual => "-", + .StarEqual => "*", + .SlashEqual => "/", + + else => unreachable, + }; + + switch (expr.*) { + .Variable => { + switch (op.typ) { + .ColonEqual => return try self.mkVarDecl(expr.Variable, value, mutable), + .Equal => return try self.mkAssign(expr.Variable, value), + + .PlusEqual, .MinusEqual, .StarEqual, .SlashEqual => { + var new_op = try self.mkToken(new_op_ttype, new_lexeme, op.line); + return try self.mkAssign( + expr.Variable, + try self.mkBinary(expr, new_op, value), + ); + }, + + else => unreachable, + } + }, + + .Get => |get| { + switch (op.typ) { + .ColonEqual => { + self.doError("can not initialize struct field"); + return ParseError.CompileError; + }, + + .Equal => return try self.mkSet(get.struc, get.name, value), + + .PlusEqual, .MinusEqual, .StarEqual, .SlashEqual => { + var new_op = try self.mkToken(new_op_ttype, new_lexeme, op.line); + return try self.mkSet( + get.struc, + get.name, + try self.mkBinary(expr, new_op, value), + ); + }, + + else => unreachable, + } + }, + + else => |expr_typ| { + self.doError("Invalid assignment target {}", expr_typ); + return ParseError.CompileError; + }, + } + } + + fn parseOr(self: *@This()) !*Expr { + var expr = try self.parseAnd(); + + while (self.check(.Or)) { + var op = self.peek(); + _ = try self.nextToken(); + + var right = try self.parseAnd(); + expr = try self.mkLogical(expr, op, right); + } + + return expr; + } + + fn parseAnd(self: *@This()) !*Expr { + var expr = try self.parseEquality(); + + while (self.check(.And)) { + var op = self.peek(); + _ = try self.nextToken(); + + var right = try self.parseEquality(); + expr = try self.mkLogical(expr, op, right); + } + + return expr; + } + + fn parseEquality(self: *@This()) !*Expr { + var expr = try self.parseComparison(); + + while (self.check(.EqualEqual)) { + var op = self.peek(); + _ = try self.nextToken(); + + var right = try self.parseComparison(); + expr = try self.mkBinary(expr, op, right); + } + + return expr; + } + + fn parseComparison(self: *@This()) !*Expr { + var expr = try self.parseAddition(); + + while (self.compareAnyOf(&[_]TokenType{ + .Greater, + .GreaterEqual, + .Less, + .LessEqual, + })) { + var op = self.peek(); + _ = try self.nextToken(); + + var right = try self.parseAddition(); + expr = try self.mkBinary(expr, op, right); + } + + return expr; + } + + fn parseAddition(self: *@This()) !*Expr { + var expr = try self.parseMultiplication(); + + while (self.compareAnyOf(&[_]TokenType{ + .Minus, .Plus, + })) { + var op = self.peek(); + _ = try self.nextToken(); + + var right = try self.parseMultiplication(); + expr = try self.mkBinary(expr, op, right); + } + + return expr; + } + + fn parseMultiplication(self: *@This()) !*Expr { + var expr = try self.parseUnary(); + + while (self.compareAnyOf(&[_]TokenType{ + .Star, .Slash, + })) { + var op = self.peek(); + _ = try self.nextToken(); + var right = try self.parseUnary(); + + expr = try self.mkBinary(expr, op, right); + } + + return expr; + } + + fn parseUnary(self: *@This()) anyerror!*Expr { + if (self.compareAnyOf(&[_]TokenType{ .Bang, .Minus })) { + var op = self.previous(); + var right = try self.parseUnary(); + + return try self.mkUnary(op, right); + } + + var expr = try self.parseCall(); + return expr; + } + + /// Parse either: + /// - A function call + /// - A struct initialization (Point.{...}) + /// - A struct Get expression (p.x) + fn parseCall(self: *@This()) !*Expr { + var expr = try self.parsePrimary(); + + while (true) { + if (self.check(.LeftParen)) { + _ = try self.consumeSingle(.LeftParen); + expr = try self.finishCall(expr); + } else if (self.check(.Dot)) { + _ = try self.consumeSingle(.Dot); + + if (self.check(.LeftBrace)) { + _ = try self.consumeSingle(.LeftBrace); + expr = try self.finishStructVal(expr); + } else { + var name = try self.consume( + .Identifier, + "Expect property name after '.'", + ); + + expr = try self.mkGet(expr, name); + } + } else { + break; + } + } + + return expr; + } + + fn finishCall(self: *@This(), callee: *Expr) !*Expr { + var args = ast.ExprList.init(self.allocator); + errdefer args.deinit(); + + if (!self.check(.RightParen)) { + + // emulating do-while really badly + var arg = try self.parseExpr(); + try args.append(arg); + + while (self.check(.Comma)) { + _ = try self.consumeSingle(.Comma); + + arg = try self.parseExpr(); + try args.append(arg); + } + } + + var paren = try self.consume(.RightParen, "Expected ')' after arguments"); + + return self.mkCall(callee, paren, args); + } + + fn finishStructVal(self: *@This(), expr: *Expr) !*Expr { + // {a: 10 b: 10} + // for this to work properly, must be Variable, since its a type. + if (ast.ExprType(expr.*) != .Variable) { + self.doError("Expected variable for struct type, got {}", ast.ExprType(expr.*)); + return ParseError.CompileError; + } + + var inits = ast.StructInitList.init(self.allocator); + errdefer inits.deinit(); + + while (!self.check(.RightBrace)) { + const field_name = try self.consumeSingle(.Identifier); + // TODO check .Comma for the quick initialization {val,val,val} + + _ = try self.consumeSingle(.Colon); + const field_value = try self.parseExpr(); + + try inits.append(ast.StructInit{ + .field = field_name, + .expr = field_value, + }); + } + + _ = try self.consumeSingle(.RightBrace); + + return try self.mkStructExpr(expr.Variable, inits); + } + + fn parsePrimary(self: *@This()) !*Expr { + const curtype = self.peek().typ; + const lexeme = self.peek().lexeme; + + var expr = switch (curtype) { + .False => try self.mkBool(false), + .True => try self.mkBool(true), + + .Integer => try self.mkInteger(lexeme), + .Float => try self.mkFloat(lexeme), + .String => try self.mkString(lexeme), + .Identifier => try self.mkVariable(self.peek()), + + // type checking for arrays happens at later stages + .LeftSquare => { + _ = try self.consumeSingle(.LeftSquare); + + var exprs = ast.ExprList.init(self.allocator); + errdefer exprs.deinit(); + + while (!self.check(.RightSquare)) { + try exprs.append(try self.parseExpr()); + if (self.check(.Comma)) _ = try self.consumeSingle(.Comma); + } + + _ = try self.consumeSingle(.RightSquare); + return try self.mkArray(exprs); + }, + + .LeftParen => { + _ = try self.nextToken(); + var expr = try self.parseExpr(); + _ = try self.consume(.RightParen, "Expected ')' after expression"); + + // for groupings, we don't want to skip tokens as we already + // consumed RightParen. + return try self.mkGrouping(expr); + }, + + else => blk: { + self.doError("expected literal, got {}", curtype); + return ParseError.CompileError; + }, + }; + + _ = try self.nextToken(); + return expr; + } +}; diff --git a/src/scanners.zig b/src/scanners.zig index 0a10196..ced1da8 100644 --- a/src/scanners.zig +++ b/src/scanners.zig @@ -107,6 +107,12 @@ pub const Scanner = struct { return Scanner{ .allocator = allocator, .source = source }; } + pub fn reset(self: *Scanner) void { + self.start = 0; + self.current = 0; + self.line = 1; + } + fn isAtEnd(self: *Scanner) bool { return self.current >= self.source.len; }