diff --git a/src/compiler.zig b/src/compiler.zig index 459adda..7b75e0e 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const scanner = @import("new_scanner.zig"); +const scanner = @import("scanner.zig"); const vm = @import("vm.zig"); const chunks = @import("chunk.zig"); const tokens = @import("token.zig"); diff --git a/src/new_scanner.zig b/src/new_scanner.zig deleted file mode 100644 index 93f8160..0000000 --- a/src/new_scanner.zig +++ /dev/null @@ -1,276 +0,0 @@ -const std = @import("std"); -const tokens = @import("token.zig"); - -const Token = tokens.Token; -const TokenType = tokens.TokenType; - -const Allocator = std.mem.Allocator; - -pub const TokenError = error{ - Unexpected, - Unterminated, -}; - -fn isDigit(char: u8) bool { - return char >= '0' and char <= '9'; -} - -fn isAlpha(c: u8) bool { - return (c >= 'a' and c <= 'z') or - (c >= 'A' and c <= 'Z') or - c == '_'; -} - -fn isAlphaNumeric(char: u8) bool { - return isAlpha(char) or isDigit(char); -} - -pub const KeywordMap = std.AutoHashMap([]const u8, u6); - -/// The book does say that C doesn't have hashmaps. but Zig does. and I can -/// use it here. -fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { - var map = KeywordMap.init(allocator); - - const keywords = [][]const u8{ - "and"[0..], - "class"[0..], - "else"[0..], - "false"[0..], - "for"[0..], - "fun"[0..], - "if"[0..], - "nil"[0..], - "or"[0..], - "print"[0..], - "return"[0..], - "super"[0..], - "this"[0..], - "true"[0..], - "var"[0..], - "while"[0..], - }; - - const tags = []TokenType{ - TokenType.AND, - TokenType.CLASS, - TokenType.ELSE, - TokenType.FALSE, - TokenType.FOR, - TokenType.FUN, - TokenType.IF, - TokenType.NIL, - TokenType.OR, - TokenType.PRINT, - TokenType.RETURN, - TokenType.SUPER, - TokenType.THIS, - TokenType.TRUE, - TokenType.VAR, - TokenType.WHILE, - }; - - for (keywords) |keyword, idx| { - var tag = @enumToInt(tags[idx]); - _ = try map.put(keyword, tag); - } - - return map; -} - -pub const Scanner = struct { - source: []const u8, - keywords: KeywordMap, - - start: usize = 0, - current: usize = 0, - line: usize = 1, - - allocator: *Allocator, - - pub fn init(allocator: *Allocator, data: []const u8) !Scanner { - return Scanner{ - .source = data, - .keywords = try initKeywordMap(allocator), - .allocator = allocator, - }; - } - - fn isAtEnd(self: *Scanner) bool { - return self.current >= self.source.len; - } - - fn advance(self: *Scanner) u8 { - self.current += 1; - return self.source[self.current - 1]; - } - - pub fn currentLexeme(self: *Scanner) []const u8 { - return self.source[self.start..self.current]; - } - - fn makeToken(self: *Scanner, ttype: TokenType) Token { - return Token{ - .ttype = ttype, - .lexeme = self.currentLexeme(), - .line = self.line, - }; - } - - /// Check if the next character matches what is expected. - fn match(self: *Scanner, expected: u8) bool { - if (self.isAtEnd()) return false; - if (self.source[self.current] != expected) return false; - - self.current += 1; - return true; - } - - /// Add a SimpleToken of type_match if the next character is - /// `expected`. Adds a SimpleToken of type_nomatch when it is not. - fn makeMatchToken( - self: *Scanner, - expected: u8, - type_match: TokenType, - type_nomatch: TokenType, - ) Token { - if (self.match(expected)) { - return self.makeToken(type_match); - } else { - return self.makeToken(type_nomatch); - } - } - - fn peek(self: *Scanner) u8 { - if (self.isAtEnd()) return 0; - return self.source[self.current]; - } - - fn peekNext(self: *Scanner) u8 { - if (self.current + 1 >= self.source.len) return 0; - return self.source[self.current + 1]; - } - - fn skipWhitespace(self: *Scanner) void { - while (true) { - var c = self.peek(); - switch (c) { - ' ', '\r', '\t' => blk: { - _ = self.advance(); - }, - '\n' => blk: { - self.line += 1; - _ = self.advance(); - }, - else => return, - } - } - } - - fn doString(self: *Scanner) !Token { - // consume entire string - while (self.peek() != '"' and !self.isAtEnd()) { - if (self.peek() == '\n') self.line += 1; - _ = self.advance(); - } - - // unterminated string. - if (self.isAtEnd()) { - return TokenError.Unterminated; - } - - // the closing ". - _ = self.advance(); - - // trim the surrounding quotes. - return self.makeToken(.STRING); - } - - /// Consume a number - fn doNumber(self: *Scanner) Token { - while (isDigit(self.peek())) { - _ = self.advance(); - } - - // check if its a number like 12.34, where the '.' character - // exists and the one next to it is a digit. - if (self.peek() == '.' and isDigit(self.peekNext())) { - _ = self.advance(); - - while (isDigit(self.peek())) { - _ = self.advance(); - } - } - - return self.makeToken(.NUMBER); - } - - /// Either a keyword or an identifier come out of this. - fn doIdentifier(self: *Scanner) Token { - while (isAlphaNumeric(self.peek())) { - _ = self.advance(); - } - - // after reading the identifier, we check - // if it is any of our keywords, if it is, then we add - // the specificed keyword type. if not, just .IDENTIFIER - var text = self.source[self.start..self.current]; - var type_opt = self.keywords.get(text); - var toktype: TokenType = undefined; - - if (type_opt) |kv| { - toktype = @intToEnum(TokenType, kv.value); - } else { - toktype = TokenType.IDENTIFIER; - } - - return self.makeToken(toktype); - } - - pub fn scanToken(self: *Scanner) !?Token { - self.skipWhitespace(); - self.start = self.current; - - if (self.isAtEnd()) return self.makeToken(TokenType.EOF); - - var c = self.advance(); - if (isAlpha(c)) return self.doIdentifier(); - if (isDigit(c)) return self.doNumber(); - - var token = switch (c) { - '(' => self.makeToken(.LEFT_PAREN), - ')' => self.makeToken(.RIGHT_PAREN), - '{' => self.makeToken(.LEFT_BRACE), - '}' => self.makeToken(.RIGHT_BRACE), - ',' => self.makeToken(.COMMA), - '.' => self.makeToken(.DOT), - '-' => self.makeToken(.MINUS), - '+' => self.makeToken(.PLUS), - ';' => self.makeToken(.SEMICOLON), - '*' => self.makeToken(.STAR), - - '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), - '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), - '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), - '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), - - '/' => blk: { - if (self.peekNext() == '/') { - while (self.peek() != '\n' and !self.isAtEnd()) { - _ = self.advance(); - } - - break :blk null; - } else { - break :blk self.makeToken(.SLASH); - } - }, - - '"' => try self.doString(), - - else => return TokenError.Unexpected, - }; - - return token; - } -}; diff --git a/src/scanner.zig b/src/scanner.zig index b83a7a4..93f8160 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -1,10 +1,15 @@ const std = @import("std"); +const tokens = @import("token.zig"); -const token = @import("token.zig"); -const main = @import("main.zig"); +const Token = tokens.Token; +const TokenType = tokens.TokenType; -const TokenList = std.ArrayList(token.Token); -const TokenType = token.TokenType; +const Allocator = std.mem.Allocator; + +pub const TokenError = error{ + Unexpected, + Unterminated, +}; fn isDigit(char: u8) bool { return char >= '0' and char <= '9'; @@ -22,6 +27,8 @@ fn isAlphaNumeric(char: u8) bool { pub const KeywordMap = std.AutoHashMap([]const u8, u6); +/// The book does say that C doesn't have hashmaps. but Zig does. and I can +/// use it here. fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { var map = KeywordMap.init(allocator); @@ -72,19 +79,20 @@ fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { } pub const Scanner = struct { - source: []u8, - tokens: TokenList, + source: []const u8, keywords: KeywordMap, start: usize = 0, current: usize = 0, line: usize = 1, - pub fn init(allocator: *std.mem.Allocator, data: []u8) !Scanner { + allocator: *Allocator, + + pub fn init(allocator: *Allocator, data: []const u8) !Scanner { return Scanner{ .source = data, - .tokens = TokenList.init(allocator), .keywords = try initKeywordMap(allocator), + .allocator = allocator, }; } @@ -97,49 +105,16 @@ pub const Scanner = struct { return self.source[self.current - 1]; } - pub fn currentLexeme(self: *Scanner) []u8 { + pub fn currentLexeme(self: *Scanner) []const u8 { return self.source[self.start..self.current]; } - fn addSimpleToken(self: *Scanner, ttype: token.TokenType) !void { - try self.addToken(token.Token{ - .Simple = token.SimpleToken.init( - ttype, - self.currentLexeme(), - self.line, - {}, - ), - }); - } - - fn addSliceToken(self: *Scanner, ttype: token.TokenType, slice: []u8) !void { - try self.addToken(token.Token{ - .Slice = token.SliceToken.init( - ttype, - self.currentLexeme(), - self.line, - slice, - ), - }); - } - - /// Keep in mind Lox only has a single number type and that is a float one. - fn addNumberToken(self: *Scanner, ttype: token.TokenType, num: f32) !void { - try self.addToken(token.Token{ - .Number = token.NumberToken.init( - ttype, - self.currentLexeme(), - self.line, - num, - ), - }); - } - - fn addToken( - self: *Scanner, - tok: token.Token, - ) !void { - try self.tokens.append(tok); + fn makeToken(self: *Scanner, ttype: TokenType) Token { + return Token{ + .ttype = ttype, + .lexeme = self.currentLexeme(), + .line = self.line, + }; } /// Check if the next character matches what is expected. @@ -153,16 +128,16 @@ pub const Scanner = struct { /// Add a SimpleToken of type_match if the next character is /// `expected`. Adds a SimpleToken of type_nomatch when it is not. - fn addMatchToken( + fn makeMatchToken( self: *Scanner, expected: u8, - type_match: token.TokenType, - type_nomatch: token.TokenType, - ) !void { + type_match: TokenType, + type_nomatch: TokenType, + ) Token { if (self.match(expected)) { - try self.addSimpleToken(type_match); + return self.makeToken(type_match); } else { - try self.addSimpleToken(type_nomatch); + return self.makeToken(type_nomatch); } } @@ -171,7 +146,28 @@ pub const Scanner = struct { return self.source[self.current]; } - fn doString(self: *Scanner) !void { + fn peekNext(self: *Scanner) u8 { + if (self.current + 1 >= self.source.len) return 0; + return self.source[self.current + 1]; + } + + fn skipWhitespace(self: *Scanner) void { + while (true) { + var c = self.peek(); + switch (c) { + ' ', '\r', '\t' => blk: { + _ = self.advance(); + }, + '\n' => blk: { + self.line += 1; + _ = self.advance(); + }, + else => return, + } + } + } + + fn doString(self: *Scanner) !Token { // consume entire string while (self.peek() != '"' and !self.isAtEnd()) { if (self.peek() == '\n') self.line += 1; @@ -180,27 +176,18 @@ pub const Scanner = struct { // unterminated string. if (self.isAtEnd()) { - try main.doError(self.line, "Unterminated string."); - return; + return TokenError.Unterminated; } // the closing ". _ = self.advance(); // trim the surrounding quotes. - try self.addSliceToken( - .STRING, - self.source[self.start + 1 .. self.current - 1], - ); - } - - fn peekNext(self: *Scanner) u8 { - if (self.current + 1 >= self.source.len) return 0; - return self.source[self.current + 1]; + return self.makeToken(.STRING); } /// Consume a number - fn doNumber(self: *Scanner) !void { + fn doNumber(self: *Scanner) Token { while (isDigit(self.peek())) { _ = self.advance(); } @@ -215,17 +202,11 @@ pub const Scanner = struct { } } - // after going through all of the number, we can just use fmt.parseFloat - - var num = try std.fmt.parseFloat( - f32, - self.source[self.start..self.current], - ); - - try self.addNumberToken(.NUMBER, num); + return self.makeToken(.NUMBER); } - fn doIdentifier(self: *Scanner) !void { + /// Either a keyword or an identifier come out of this. + fn doIdentifier(self: *Scanner) Token { while (isAlphaNumeric(self.peek())) { _ = self.advance(); } @@ -243,90 +224,53 @@ pub const Scanner = struct { toktype = TokenType.IDENTIFIER; } - try self.addSimpleToken(toktype); + return self.makeToken(toktype); } - /// Scan through our tokens and add them to the Scanner's token list. - fn scanToken(self: *Scanner) !void { + pub fn scanToken(self: *Scanner) !?Token { + self.skipWhitespace(); + self.start = self.current; + + if (self.isAtEnd()) return self.makeToken(TokenType.EOF); + var c = self.advance(); + if (isAlpha(c)) return self.doIdentifier(); + if (isDigit(c)) return self.doNumber(); - switch (c) { - '(' => try self.addSimpleToken(.LEFT_PAREN), - ')' => try self.addSimpleToken(.RIGHT_PAREN), - '{' => try self.addSimpleToken(.LEFT_BRACE), - '}' => try self.addSimpleToken(.RIGHT_BRACE), - ',' => try self.addSimpleToken(.COMMA), - '.' => try self.addSimpleToken(.DOT), - '-' => try self.addSimpleToken(.MINUS), - '+' => try self.addSimpleToken(.PLUS), - ';' => try self.addSimpleToken(.SEMICOLON), - '*' => try self.addSimpleToken(.STAR), + var token = switch (c) { + '(' => self.makeToken(.LEFT_PAREN), + ')' => self.makeToken(.RIGHT_PAREN), + '{' => self.makeToken(.LEFT_BRACE), + '}' => self.makeToken(.RIGHT_BRACE), + ',' => self.makeToken(.COMMA), + '.' => self.makeToken(.DOT), + '-' => self.makeToken(.MINUS), + '+' => self.makeToken(.PLUS), + ';' => self.makeToken(.SEMICOLON), + '*' => self.makeToken(.STAR), - '!' => try self.addMatchToken('=', .BANG_EQUAL, .BANG), - '=' => try self.addMatchToken('=', .EQUAL_EQUAL, .EQUAL), - '<' => try self.addMatchToken('=', .LESS_EQUAL, .LESS), - '>' => try self.addMatchToken('=', .GREATER_EQUAL, .GREATER), + '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), + '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), + '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), + '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), '/' => blk: { - // consume comments - if (self.match('/')) { + if (self.peekNext() == '/') { while (self.peek() != '\n' and !self.isAtEnd()) { _ = self.advance(); } - } else if (self.match('*')) { - // multiline block comments are messier to work with, but - // we can still do it! - while (true) { - if (self.isAtEnd()) break; - // check '*/' - if (self.peek() == '*' and self.peekNext() == '/') { - self.current += 2; - break; - } - _ = self.advance(); - } + break :blk null; } else { - try self.addSimpleToken(.SLASH); + break :blk self.makeToken(.SLASH); } }, - ' ', '\r', '\t' => blk: {}, - '\n' => blk: { - self.line += 1; - }, - '"' => try self.doString(), - else => { - if (isDigit(c)) { - try self.doNumber(); - } else if (isAlpha(c)) { - try self.doIdentifier(); - } else { - try main.doError(self.line, "Unexpected character"); - } - }, - } - } + else => return TokenError.Unexpected, + }; - pub fn scanTokens(self: *Scanner) !TokenList { - // while we aren't at the end, we're still consuming - // tokens. - while (!self.isAtEnd()) { - self.start = self.current; - try self.scanToken(); - } - - try self.addToken(token.Token{ - .Simple = token.SimpleToken.init( - .EOF, - "", - self.line, - {}, - ), - }); - - return self.tokens; + return token; } };