jorts/src/compiler.zig

588 lines
17 KiB
Zig

const std = @import("std");
const scanner = @import("scanner.zig");
const vm = @import("vm.zig");
const chunks = @import("chunk.zig");
const tokens = @import("token.zig");
const values = @import("value.zig");
const objects = @import("object.zig");
const Allocator = std.mem.Allocator;
const Scanner = scanner.Scanner;
const Chunk = chunks.Chunk;
const Token = tokens.Token;
const TokenType = tokens.TokenType;
const Value = values.Value;
const OpCode = chunks.OpCode;
/// Holds parser state for the compiler.
const Parser = struct {
previous: Token = undefined,
current: Token = undefined,
// TODO are those needed
hadError: bool = false,
panicMode: bool = false,
};
/// Represents the order of operations in the parser.
const Precedence = enum(u5) {
None,
Assignment, // =
Or, // or
And, // and
Equality, // == !=
Comparison, // < > <= >=
Term, // + -
Factor, // * /
Unary, // ! -
Call, // . () []
Primary,
};
const ParseFn = fn (*Compiler, bool) anyerror!void;
const ParseRule = struct {
prefix: ?ParseFn = null,
infix: ?ParseFn = null,
precedence: Precedence = Precedence.None,
};
/// For each token, this defines a parse rule for it.
var rules = [_]ParseRule{
// for LEFT_PAREN, we determine it as a call precedence
// plus a prefix parse function of grouping
ParseRule{ .prefix = Compiler.grouping, .precedence = .Call },
ParseRule{},
ParseRule{},
ParseRule{},
ParseRule{},
// dot token, means a call too, for things like a.b
ParseRule{ .precedence = .Call },
// specific to -, as it can be an unary operator when its a prefix
// of something, or a binary one, when its a infix or another thing.
ParseRule{
.prefix = Compiler.unary,
.infix = Compiler.binary,
.precedence = .Term,
},
ParseRule{ .infix = Compiler.binary, .precedence = .Term },
ParseRule{},
// slash is a binary operator, as well as star.
ParseRule{ .infix = Compiler.binary, .precedence = .Factor },
ParseRule{ .infix = Compiler.binary, .precedence = .Factor },
// as the token enum says, those are 1/2 char tokens.
ParseRule{ .prefix = Compiler.unary },
// this is specifically for the != operator
ParseRule{ .infix = Compiler.binary, .precedence = .Equality },
ParseRule{},
// this is specifically for the == operator
ParseRule{ .infix = Compiler.binary, .precedence = .Equality },
// all the comparison ones
ParseRule{ .infix = Compiler.binary, .precedence = .Comparison },
ParseRule{ .infix = Compiler.binary, .precedence = .Comparison },
ParseRule{ .infix = Compiler.binary, .precedence = .Comparison },
ParseRule{ .infix = Compiler.binary, .precedence = .Comparison },
ParseRule{ .prefix = Compiler.variable },
ParseRule{ .prefix = Compiler.string },
ParseRule{ .prefix = Compiler.number },
ParseRule{ .precedence = .And },
ParseRule{},
ParseRule{},
// false
ParseRule{ .prefix = Compiler.literal },
ParseRule{},
ParseRule{},
ParseRule{},
ParseRule{ .prefix = Compiler.literal },
ParseRule{ .precedence = .Or },
ParseRule{},
ParseRule{},
ParseRule{},
ParseRule{},
ParseRule{ .prefix = Compiler.literal },
ParseRule{},
ParseRule{},
ParseRule{},
};
pub const Local = struct {
name: tokens.Token,
depth: i32,
};
pub const Compiler = struct {
src: []const u8,
stdout: vm.StdOut,
allocator: *Allocator,
parser: Parser,
scanr: Scanner = undefined,
chunk: *chunks.Chunk,
debug_flag: bool = false,
vmach: *vm.VM,
locals: [256]Local,
localCount: i32 = 0,
scopeDepth: i32 = 0,
pub fn init(
allocator: *Allocator,
chunk: *chunks.Chunk,
stdout: vm.StdOut,
source: []const u8,
debug_flag: bool,
vmach: *vm.VM,
) Compiler {
return Compiler{
.src = source,
.chunk = chunk,
.allocator = allocator,
.stdout = stdout,
.parser = Parser{},
.debug_flag = debug_flag,
.vmach = vmach,
// local variable resolution
.locals = [_]Local{Local{
.name = Token{},
.depth = -1,
}} ** 256,
};
}
fn errorAt(self: *Compiler, token: Token, msg: []const u8) void {
if (self.parser.panicMode) return;
self.parser.panicMode = true;
std.debug.warn("[line {}] Error", token.line);
if (token.ttype == TokenType.EOF) {
std.debug.warn(" at end");
} else {
std.debug.warn(" at '{}'", token.lexeme);
}
std.debug.warn(": {}\n", msg);
self.parser.hadError = true;
}
fn errorCurrent(self: *Compiler, msg: []const u8) void {
self.errorAt(self.parser.current, msg);
}
fn errorPrevious(self: *Compiler, msg: []const u8) void {
self.errorAt(self.parser.previous, msg);
}
fn advance(self: *Compiler) !void {
self.parser.previous = self.parser.current;
while (true) {
var token_opt = try self.scanr.scanToken();
if (token_opt) |token| {
self.parser.current = token;
break;
}
}
}
fn consume(self: *Compiler, ttype: TokenType, msg: []const u8) !void {
if (self.parser.current.ttype == ttype) {
try self.advance();
return;
}
self.errorCurrent(msg);
}
fn check(self: *Compiler, ttype: TokenType) bool {
return self.parser.current.ttype == ttype;
}
fn match(self: *Compiler, ttype: TokenType) !bool {
if (!(self.check(ttype))) return false;
try self.advance();
return true;
}
fn currentChunk(self: *Compiler) *chunks.Chunk {
return self.chunk;
}
fn emitByte(self: *Compiler, byte: u8) !void {
try self.currentChunk().write(byte, self.parser.previous.line);
}
fn emitBytes(self: *Compiler, byte1: u8, byte2: u8) !void {
try self.emitByte(byte1);
try self.emitByte(byte2);
}
fn emitReturn(self: *Compiler) !void {
try self.emitByte(OpCode.Return);
}
fn emitConstant(self: *Compiler, value: Value) !void {
_ = try self.currentChunk().writeConstant(
value,
self.parser.previous.line,
);
}
fn end(self: *Compiler) !void {
try self.emitReturn();
if (self.debug_flag and !self.parser.hadError) {
try self.currentChunk().disassemble(self.stdout, "code");
}
}
fn beginScope(self: *Compiler) void {
self.scopeDepth += 1;
}
fn endScope(self: *Compiler) !void {
self.scopeDepth -= 1;
// clear the current scope in the stack
while (self.localCount > 0 and self.locals[@intCast(usize, self.localCount - 1)].depth > self.scopeDepth) {
try self.emitByte(chunks.OpCode.Pop);
self.localCount -= 1;
}
}
fn grouping(self: *Compiler, canAssign: bool) !void {
try self.expression();
try self.consume(.RIGHT_PAREN, "Expect ')' after expression.");
}
/// Emits bytecode for a number being loaded into the code.
fn number(self: *Compiler, canAssign: bool) !void {
var value: f64 = try std.fmt.parseFloat(
f64,
self.parser.previous.lexeme,
);
try self.emitConstant(values.NumberVal(value));
}
fn string(self: *Compiler, canAssign: bool) !void {
const lexeme_len = self.parser.previous.lexeme.len;
try self.emitConstant(values.ObjVal(try objects.copyString(
self.vmach,
self.parser.previous.lexeme[1 .. lexeme_len - 1],
)));
}
fn resolveLocal(self: *Compiler, name: *Token) i32 {
var i = self.localCount - 1;
while (i >= 0) : (i -= 1) {
var idx = @intCast(usize, i);
var local = &self.locals[idx];
if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) {
if (local.depth == -1) {
self.errorCurrent("Cannot read local variable in its own initializer.");
}
return i;
}
}
return -1;
}
fn namedVariable(self: *Compiler, tok: *Token, canAssign: bool) !void {
// writeConstant always writes OP_CODE which may be not
// what we want, so.
var getOp: u8 = undefined;
var setOp: u8 = undefined;
// we try to resolve the local. depending if it gets resolved
// or not, we select the necessary get/set op codes.
var arg: i32 = self.resolveLocal(tok);
if (arg != -1) {
getOp = chunks.OpCode.GetLocal;
setOp = chunks.OpCode.SetLocal;
} else {
arg = (try self.identifierConstant(tok)).Small;
getOp = chunks.OpCode.GetGlobal;
setOp = chunks.OpCode.SetGlobal;
}
var idx: u8 = @intCast(u8, arg);
if (canAssign and try self.match(.EQUAL)) {
try self.expression();
try self.emitBytes(setOp, idx);
} else {
try self.emitBytes(getOp, idx);
}
}
fn variable(self: *Compiler, canAssign: bool) !void {
try self.namedVariable(&self.parser.previous, canAssign);
}
/// Emits bytecode for a given unary.
fn unary(self: *Compiler, canAssign: bool) !void {
var ttype = self.parser.previous.ttype;
try self.parsePrecedence(.Unary);
switch (ttype) {
.MINUS => try self.emitByte(OpCode.Negate),
.BANG => try self.emitByte(OpCode.Not),
else => unreachable,
}
}
fn binary(self: *Compiler, canAssign: bool) !void {
var op_type = self.parser.previous.ttype;
var rule: *ParseRule = self.getRule(op_type);
try self.parsePrecedence(@intToEnum(Precedence, @enumToInt(rule.precedence) + 1));
switch (op_type) {
.PLUS => try self.emitByte(OpCode.Add),
.MINUS => try self.emitByte(OpCode.Subtract),
.STAR => try self.emitByte(OpCode.Multiply),
.SLASH => try self.emitByte(OpCode.Divide),
.EQUAL_EQUAL => try self.emitByte(OpCode.Equal),
.GREATER => try self.emitByte(OpCode.Greater),
.LESS => try self.emitByte(OpCode.Less),
.BANG_EQUAL => try self.emitBytes(OpCode.Equal, OpCode.Not),
.GREATER_EQUAL => try self.emitBytes(OpCode.Less, OpCode.Not),
.LESS_EQUAL => try self.emitBytes(OpCode.Greater, OpCode.Not),
else => unreachable,
}
}
fn literal(self: *Compiler, canAssign: bool) !void {
switch (self.parser.previous.ttype) {
.FALSE => try self.emitByte(OpCode.False),
.NIL => try self.emitByte(OpCode.Nil),
.TRUE => try self.emitByte(OpCode.True),
else => unreachable,
}
}
fn parsePrecedence(self: *Compiler, precedence: Precedence) anyerror!void {
try self.advance();
var as_int = @enumToInt(precedence);
var prefix_rule_opt = self.getRule(self.parser.previous.ttype).prefix;
if (prefix_rule_opt) |prefix_rule| {
var canAssign: bool = as_int <= @enumToInt(Precedence.Assignment);
try prefix_rule(self, canAssign);
while (as_int <= @enumToInt(self.getRule(self.parser.current.ttype).precedence)) {
try self.advance();
var infix_rule_opt = self.getRule(self.parser.previous.ttype).infix;
if (infix_rule_opt) |infix_rule| {
try infix_rule(self, canAssign);
}
}
if (canAssign and try self.match(.EQUAL)) {
self.errorPrevious("Invalid assignment target.");
try self.expression();
}
} else {
self.errorPrevious("Expect expression.");
return;
}
}
fn getRule(self: *Compiler, ttype: TokenType) *ParseRule {
return &rules[@enumToInt(ttype)];
}
fn expression(self: *Compiler) anyerror!void {
try self.parsePrecedence(.Assignment);
}
fn printStmt(self: *Compiler) !void {
try self.expression();
try self.consume(.SEMICOLON, "Expect ';' after value.");
try self.emitByte(OpCode.Print);
}
fn exprStmt(self: *Compiler) !void {
try self.expression();
try self.consume(.SEMICOLON, "Expect ';' after expression.");
try self.emitByte(OpCode.Pop);
}
fn synchronize(self: *Compiler) !void {
self.parser.panicMode = false;
while (self.parser.current.ttype != .EOF) {
if (self.parser.previous.ttype == .SEMICOLON) return;
switch (self.parser.current.ttype) {
.CLASS, .FUN, .VAR, .FOR, .IF, .WHILE, .PRINT, .RETURN => return,
else => {},
}
try self.advance();
}
}
/// Write an identifier constant to the bytecode.
fn identifierConstant(
self: *Compiler,
token: *Token,
) !chunks.ConstantIndex {
return try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString(
self.vmach,
token.lexeme,
)), token.line);
}
fn addLocal(self: *Compiler, name: Token) void {
if (self.localCount == 256) {
self.errorCurrent("Too many variables in function.");
return;
}
self.localCount += 1;
var local: *Local = &self.locals[@intCast(usize, self.localCount)];
local.name = name;
//local.depth = self.scopeDepth;
local.depth = -1;
}
fn declareVariable(self: *Compiler) void {
if (self.scopeDepth == 0) return;
var name: *Token = &self.parser.previous;
// check if we're redeclaring an existing variable
// in the *CURRENT* scope.
// go from current down to global
var i = self.localCount;
while (i >= 0) : (i -= 1) {
var local = self.locals[@intCast(usize, i)];
if (local.depth == -1 and local.depth < self.scopeDepth) break;
if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) {
self.errorCurrent("Variable with this name already declared in this scope.");
}
}
self.addLocal(name.*);
}
fn parseVariable(self: *Compiler, msg: []const u8) !chunks.ConstantIndex {
try self.consume(.IDENTIFIER, msg);
self.declareVariable();
if (self.scopeDepth > 0) return chunks.ConstantIndex{ .Small = 0 };
return try self.identifierConstant(&self.parser.previous);
}
fn emitConstWithIndex(
self: *Compiler,
op_short: u8,
op_long: u8,
idx: chunks.ConstantIndex,
) !void {
switch (idx) {
.Small => |val| try self.emitBytes(op_short, val),
.Long => |val| blk: {
try self.emitByte(op_long);
try self.emitByte(val[0]);
try self.emitByte(val[1]);
try self.emitByte(val[2]);
},
else => unreachable,
}
}
fn markInitialized(self: *Compiler) void {
if (self.scopeDepth == 0) return;
var idx = @intCast(usize, self.localCount);
self.locals[idx].depth = self.scopeDepth;
}
fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void {
if (self.scopeDepth > 0) {
self.markInitialized();
return;
}
try self.emitConstWithIndex(
chunks.OpCode.DefineGlobal,
chunks.OpCode.DefineGlobalLong,
global,
);
}
fn varDecl(self: *Compiler) !void {
var global = try self.parseVariable("Expect variable name.");
if (try self.match(.EQUAL)) {
try self.expression();
} else {
try self.emitByte(chunks.OpCode.Nil);
}
// check scopeDepth here
try self.consume(.SEMICOLON, "Expect ';' after variable declaration.");
try self.defineVariable(global);
}
fn declaration(self: *Compiler) anyerror!void {
if (try self.match(.VAR)) {
try self.varDecl();
} else {
try self.statement();
}
if (self.parser.panicMode) try self.synchronize();
}
fn block(self: *Compiler) anyerror!void {
while (!self.check(.RIGHT_BRACE) and !self.check(.EOF)) {
try self.declaration();
}
try self.consume(.RIGHT_BRACE, "Expect '}' after block.");
}
fn statement(self: *Compiler) !void {
if (try self.match(.PRINT)) {
try self.printStmt();
} else if (try self.match(.LEFT_BRACE)) {
self.beginScope();
try self.block();
try self.endScope();
} else {
try self.exprStmt();
}
}
/// Compile the source given when initializing the compiler
/// into the given chunk.
pub fn compile(self: *Compiler, chunk: *Chunk) !bool {
self.scanr = try scanner.Scanner.init(self.allocator, self.src);
try self.advance();
while (!(try self.match(.EOF))) {
try self.declaration();
}
// try self.expression();
// try self.consume(.EOF, "Expect end of expression.");
try self.end();
return !self.parser.hadError;
}
};