const std = @import("std"); const scanner = @import("scanner.zig"); const vm = @import("vm.zig"); const chunks = @import("chunk.zig"); const tokens = @import("token.zig"); const values = @import("value.zig"); const objects = @import("object.zig"); const Allocator = std.mem.Allocator; const Scanner = scanner.Scanner; const Chunk = chunks.Chunk; const Token = tokens.Token; const TokenType = tokens.TokenType; const Value = values.Value; const OpCode = chunks.OpCode; /// Holds parser state for the compiler. const Parser = struct { previous: Token = undefined, current: Token = undefined, // TODO are those needed hadError: bool = false, panicMode: bool = false, }; /// Represents the order of operations in the parser. const Precedence = enum(u5) { None, Assignment, // = Or, // or And, // and Equality, // == != Comparison, // < > <= >= Term, // + - Factor, // * / Unary, // ! - Call, // . () [] Primary, }; const ParseFn = fn (*Compiler, bool) anyerror!void; const ParseRule = struct { prefix: ?ParseFn = null, infix: ?ParseFn = null, precedence: Precedence = Precedence.None, }; /// For each token, this defines a parse rule for it. var rules = []ParseRule{ // for LEFT_PAREN, we determine it as a call precedence // plus a prefix parse function of grouping ParseRule{ .prefix = Compiler.grouping, .precedence = .Call }, ParseRule{}, ParseRule{}, ParseRule{}, ParseRule{}, // dot token, means a call too, for things like a.b ParseRule{ .precedence = .Call }, // specific to -, as it can be an unary operator when its a prefix // of something, or a binary one, when its a infix or another thing. ParseRule{ .prefix = Compiler.unary, .infix = Compiler.binary, .precedence = .Term, }, ParseRule{ .infix = Compiler.binary, .precedence = .Term }, ParseRule{}, // slash is a binary operator, as well as star. ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, // as the token enum says, those are 1/2 char tokens. ParseRule{ .prefix = Compiler.unary }, // this is specifically for the != operator ParseRule{ .infix = Compiler.binary, .precedence = .Equality }, ParseRule{}, // this is specifically for the == operator ParseRule{ .infix = Compiler.binary, .precedence = .Equality }, // all the comparison ones ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, ParseRule{ .prefix = Compiler.variable }, ParseRule{ .prefix = Compiler.string }, ParseRule{ .prefix = Compiler.number }, ParseRule{ .precedence = .And }, ParseRule{}, ParseRule{}, // false ParseRule{ .prefix = Compiler.literal }, ParseRule{}, ParseRule{}, ParseRule{}, ParseRule{ .prefix = Compiler.literal }, ParseRule{ .precedence = .Or }, ParseRule{}, ParseRule{}, ParseRule{}, ParseRule{}, ParseRule{ .prefix = Compiler.literal }, ParseRule{}, ParseRule{}, ParseRule{}, }; pub const Local = struct { name: tokens.Token, depth: i32, }; pub const Compiler = struct { src: []const u8, stdout: vm.StdOut, allocator: *Allocator, parser: Parser, scanr: Scanner = undefined, chunk: *chunks.Chunk, debug_flag: bool = false, vmach: *vm.VM, locals: [256]Local, localCount: i32 = 0, scopeDepth: i32 = 0, pub fn init( allocator: *Allocator, chunk: *chunks.Chunk, stdout: vm.StdOut, source: []const u8, debug_flag: bool, vmach: *vm.VM, ) Compiler { return Compiler{ .src = source, .chunk = chunk, .allocator = allocator, .stdout = stdout, .parser = Parser{}, .debug_flag = debug_flag, .vmach = vmach, // local variable resolution .locals = []Local{Local{ .name = Token{}, .depth = -1, }} ** 256, }; } fn errorAt(self: *Compiler, token: Token, msg: []const u8) void { if (self.parser.panicMode) return; self.parser.panicMode = true; std.debug.warn("[line {}] Error", token.line); if (token.ttype == TokenType.EOF) { std.debug.warn(" at end"); } else { std.debug.warn(" at '{}'", token.lexeme); } std.debug.warn(": {}\n", msg); self.parser.hadError = true; } fn errorCurrent(self: *Compiler, msg: []const u8) void { self.errorAt(self.parser.current, msg); } fn errorPrevious(self: *Compiler, msg: []const u8) void { self.errorAt(self.parser.previous, msg); } fn advance(self: *Compiler) !void { self.parser.previous = self.parser.current; while (true) { var token_opt = try self.scanr.scanToken(); if (token_opt) |token| { self.parser.current = token; break; } } } fn consume(self: *Compiler, ttype: TokenType, msg: []const u8) !void { if (self.parser.current.ttype == ttype) { try self.advance(); return; } self.errorCurrent(msg); } fn check(self: *Compiler, ttype: TokenType) bool { return self.parser.current.ttype == ttype; } fn match(self: *Compiler, ttype: TokenType) !bool { if (!(self.check(ttype))) return false; try self.advance(); return true; } fn currentChunk(self: *Compiler) *chunks.Chunk { return self.chunk; } fn emitByte(self: *Compiler, byte: u8) !void { try self.currentChunk().write(byte, self.parser.previous.line); } fn emitBytes(self: *Compiler, byte1: u8, byte2: u8) !void { try self.emitByte(byte1); try self.emitByte(byte2); } fn emitReturn(self: *Compiler) !void { try self.emitByte(OpCode.Return); } fn emitConstant(self: *Compiler, value: Value) !void { _ = try self.currentChunk().writeConstant( value, self.parser.previous.line, ); } fn end(self: *Compiler) !void { try self.emitReturn(); if (self.debug_flag and !self.parser.hadError) { try self.currentChunk().disassemble(self.stdout, "code"); } } fn beginScope(self: *Compiler) void { self.scopeDepth += 1; } fn endScope(self: *Compiler) !void { self.scopeDepth -= 1; // clear the current scope in the stack while (self.localCount > 0 and self.locals[@intCast(usize, self.localCount - 1)].depth > self.scopeDepth) { try self.emitByte(chunks.OpCode.Pop); self.localCount -= 1; } } fn grouping(self: *Compiler, canAssign: bool) !void { try self.expression(); try self.consume(.RIGHT_PAREN, "Expect ')' after expression."); } /// Emits bytecode for a number being loaded into the code. fn number(self: *Compiler, canAssign: bool) !void { var value: f64 = try std.fmt.parseFloat( f64, self.parser.previous.lexeme, ); try self.emitConstant(values.NumberVal(value)); } fn string(self: *Compiler, canAssign: bool) !void { const lexeme_len = self.parser.previous.lexeme.len; try self.emitConstant(values.ObjVal(try objects.copyString( self.vmach, self.parser.previous.lexeme[1 .. lexeme_len - 1], ))); } fn resolveLocal(self: *Compiler, name: *Token) i32 { var i = self.localCount - 1; while (i >= 0) : (i -= 1) { var idx = @intCast(usize, i); var local = &self.locals[idx]; if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { if (local.depth == -1) { self.errorCurrent("Cannot read local variable in its own initializer."); } return i; } } return -1; } fn namedVariable(self: *Compiler, tok: *Token, canAssign: bool) !void { // writeConstant always writes OP_CODE which may be not // what we want, so. var getOp: u8 = undefined; var setOp: u8 = undefined; // we try to resolve the local. depending if it gets resolved // or not, we select the necessary get/set op codes. var arg: i32 = self.resolveLocal(tok); if (arg != -1) { getOp = chunks.OpCode.GetLocal; setOp = chunks.OpCode.SetLocal; } else { arg = (try self.identifierConstant(tok)).Small; getOp = chunks.OpCode.GetGlobal; setOp = chunks.OpCode.SetGlobal; } var idx: u8 = @intCast(u8, arg); if (canAssign and try self.match(.EQUAL)) { try self.expression(); try self.emitBytes(setOp, idx); } else { try self.emitBytes(getOp, idx); } } fn variable(self: *Compiler, canAssign: bool) !void { try self.namedVariable(&self.parser.previous, canAssign); } /// Emits bytecode for a given unary. fn unary(self: *Compiler, canAssign: bool) !void { var ttype = self.parser.previous.ttype; try self.parsePrecedence(.Unary); switch (ttype) { .MINUS => try self.emitByte(OpCode.Negate), .BANG => try self.emitByte(OpCode.Not), else => unreachable, } } fn binary(self: *Compiler, canAssign: bool) !void { var op_type = self.parser.previous.ttype; var rule: *ParseRule = self.getRule(op_type); try self.parsePrecedence(@intToEnum(Precedence, @enumToInt(rule.precedence) + 1)); switch (op_type) { .PLUS => try self.emitByte(OpCode.Add), .MINUS => try self.emitByte(OpCode.Subtract), .STAR => try self.emitByte(OpCode.Multiply), .SLASH => try self.emitByte(OpCode.Divide), .EQUAL_EQUAL => try self.emitByte(OpCode.Equal), .GREATER => try self.emitByte(OpCode.Greater), .LESS => try self.emitByte(OpCode.Less), .BANG_EQUAL => try self.emitBytes(OpCode.Equal, OpCode.Not), .GREATER_EQUAL => try self.emitBytes(OpCode.Less, OpCode.Not), .LESS_EQUAL => try self.emitBytes(OpCode.Greater, OpCode.Not), else => unreachable, } } fn literal(self: *Compiler, canAssign: bool) !void { switch (self.parser.previous.ttype) { .FALSE => try self.emitByte(OpCode.False), .NIL => try self.emitByte(OpCode.Nil), .TRUE => try self.emitByte(OpCode.True), else => unreachable, } } fn parsePrecedence(self: *Compiler, precedence: Precedence) anyerror!void { try self.advance(); var as_int = @enumToInt(precedence); var prefix_rule_opt = self.getRule(self.parser.previous.ttype).prefix; if (prefix_rule_opt) |prefix_rule| { var canAssign: bool = as_int <= @enumToInt(Precedence.Assignment); try prefix_rule(self, canAssign); while (as_int <= @enumToInt(self.getRule(self.parser.current.ttype).precedence)) { try self.advance(); var infix_rule_opt = self.getRule(self.parser.previous.ttype).infix; if (infix_rule_opt) |infix_rule| { try infix_rule(self, canAssign); } } if (canAssign and try self.match(.EQUAL)) { self.errorPrevious("Invalid assignment target."); try self.expression(); } } else { self.errorPrevious("Expect expression."); return; } } fn getRule(self: *Compiler, ttype: TokenType) *ParseRule { return &rules[@enumToInt(ttype)]; } fn expression(self: *Compiler) anyerror!void { try self.parsePrecedence(.Assignment); } fn printStmt(self: *Compiler) !void { try self.expression(); try self.consume(.SEMICOLON, "Expect ';' after value."); try self.emitByte(OpCode.Print); } fn exprStmt(self: *Compiler) !void { try self.expression(); try self.consume(.SEMICOLON, "Expect ';' after expression."); try self.emitByte(OpCode.Pop); } fn synchronize(self: *Compiler) !void { self.parser.panicMode = false; while (self.parser.current.ttype != .EOF) { if (self.parser.previous.ttype == .SEMICOLON) return; switch (self.parser.current.ttype) { .CLASS, .FUN, .VAR, .FOR, .IF, .WHILE, .PRINT, .RETURN => return, else => {}, } try self.advance(); } } /// Write an identifier constant to the bytecode. fn identifierConstant( self: *Compiler, token: *Token, ) !chunks.ConstantIndex { return try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString( self.vmach, token.lexeme, )), token.line); } fn addLocal(self: *Compiler, name: Token) void { if (self.localCount == 256) { self.errorCurrent("Too many variables in function."); return; } self.localCount += 1; var local: *Local = &self.locals[@intCast(usize, self.localCount)]; local.name = name; //local.depth = self.scopeDepth; local.depth = -1; } fn declareVariable(self: *Compiler) void { if (self.scopeDepth == 0) return; var name: *Token = &self.parser.previous; // check if we're redeclaring an existing variable // in the *CURRENT* scope. // go from current down to global var i = self.localCount; while (i >= 0) : (i -= 1) { var local = self.locals[@intCast(usize, i)]; if (local.depth == -1 and local.depth < self.scopeDepth) break; if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { self.errorCurrent("Variable with this name already declared in this scope."); } } self.addLocal(name.*); } fn parseVariable(self: *Compiler, msg: []const u8) !chunks.ConstantIndex { try self.consume(.IDENTIFIER, msg); self.declareVariable(); if (self.scopeDepth > 0) return chunks.ConstantIndex{ .Small = 0 }; return try self.identifierConstant(&self.parser.previous); } fn emitConstWithIndex( self: *Compiler, op_short: u8, op_long: u8, idx: chunks.ConstantIndex, ) !void { switch (idx) { .Small => |val| try self.emitBytes(op_short, val), .Long => |val| blk: { try self.emitByte(op_long); try self.emitByte(val[0]); try self.emitByte(val[1]); try self.emitByte(val[2]); }, else => unreachable, } } fn markInitialized(self: *Compiler) void { if (self.scopeDepth == 0) return; var idx = @intCast(usize, self.localCount); self.locals[idx].depth = self.scopeDepth; } fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void { if (self.scopeDepth > 0) { self.markInitialized(); return; } try self.emitConstWithIndex( chunks.OpCode.DefineGlobal, chunks.OpCode.DefineGlobalLong, global, ); } fn varDecl(self: *Compiler) !void { var global = try self.parseVariable("Expect variable name."); if (try self.match(.EQUAL)) { try self.expression(); } else { try self.emitByte(chunks.OpCode.Nil); } // check scopeDepth here try self.consume(.SEMICOLON, "Expect ';' after variable declaration."); try self.defineVariable(global); } fn declaration(self: *Compiler) anyerror!void { if (try self.match(.VAR)) { try self.varDecl(); } else { try self.statement(); } if (self.parser.panicMode) try self.synchronize(); } fn block(self: *Compiler) anyerror!void { while (!self.check(.RIGHT_BRACE) and !self.check(.EOF)) { try self.declaration(); } try self.consume(.RIGHT_BRACE, "Expect '}' after block."); } fn statement(self: *Compiler) !void { if (try self.match(.PRINT)) { try self.printStmt(); } else if (try self.match(.LEFT_BRACE)) { self.beginScope(); try self.block(); try self.endScope(); } else { try self.exprStmt(); } } /// Compile the source given when initializing the compiler /// into the given chunk. pub fn compile(self: *Compiler, chunk: *Chunk) !bool { self.scanr = try scanner.Scanner.init(self.allocator, self.src); try self.advance(); while (!(try self.match(.EOF))) { try self.declaration(); } // try self.expression(); // try self.consume(.EOF, "Expect end of expression."); try self.end(); return !self.parser.hadError; } };