const std = @import("std"); const token = @import("token.zig"); const main = @import("main.zig"); const TokenList = std.ArrayList(token.Token); const TokenType = token.TokenType; fn isDigit(char: u8) bool { return char >= '0' and char <= '9'; } fn isAlpha(c: u8) bool { return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_'; } fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } // hashmaps don't work on HashMaps for some reason. anyways. pub const KeywordMap = std.AutoHashMap([]const u8, u6); fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { var map = KeywordMap.init(allocator); const keywords = [][]const u8{ "and"[0..], "class"[0..], "else"[0..], "false"[0..], "for"[0..], "fun"[0..], "if"[0..], "nil"[0..], "or"[0..], "print"[0..], "return"[0..], "super"[0..], "this"[0..], "true"[0..], "var"[0..], "while"[0..], }; const tags = []TokenType{ TokenType.AND, TokenType.CLASS, TokenType.ELSE, TokenType.FALSE, TokenType.FOR, TokenType.FUN, TokenType.IF, TokenType.NIL, TokenType.OR, TokenType.PRINT, TokenType.RETURN, TokenType.SUPER, TokenType.THIS, TokenType.TRUE, TokenType.VAR, TokenType.WHILE, }; for (keywords) |keyword, idx| { var tag = @enumToInt(tags[idx]); _ = try map.put(keyword, tag); } return map; } pub const Scanner = struct { source: []u8, tokens: TokenList, keywords: KeywordMap, start: usize = 0, current: usize = 0, line: usize = 1, pub fn init(allocator: *std.mem.Allocator, data: []u8) !Scanner { return Scanner{ .source = data, .tokens = TokenList.init(allocator), .keywords = try initKeywordMap(allocator), }; } fn isAtEnd(self: *Scanner) bool { return self.current >= self.source.len; } fn advance(self: *Scanner) u8 { self.current += 1; return self.source[self.current - 1]; } pub fn currentLexeme(self: *Scanner) []u8 { return self.source[self.start..self.current]; } fn addSimpleToken(self: *Scanner, ttype: token.TokenType) !void { try self.addToken(token.Token{ .Simple = token.SimpleToken.init( ttype, self.currentLexeme(), self.line, {}, ), }); } fn addSliceToken(self: *Scanner, ttype: token.TokenType, slice: []u8) !void { try self.addToken(token.Token{ .Slice = token.SliceToken.init( ttype, self.currentLexeme(), self.line, slice, ), }); } /// Keep in mind Lox only has a single number type and that is a float one. fn addNumberToken(self: *Scanner, ttype: token.TokenType, num: f32) !void { try self.addToken(token.Token{ .Number = token.NumberToken.init( ttype, self.currentLexeme(), self.line, num, ), }); } fn addToken( self: *Scanner, tok: token.Token, ) !void { try self.tokens.append(tok); } /// Check if the next character matches what is expected. fn match(self: *Scanner, expected: u8) bool { if (self.isAtEnd()) return false; if (self.source[self.current] != expected) return false; self.current += 1; return true; } /// Add a SimpleToken of type_match if the next character is /// `expected`. Adds a SimpleToken of type_nomatch when it is not. fn addMatchToken( self: *Scanner, expected: u8, type_match: token.TokenType, type_nomatch: token.TokenType, ) !void { if (self.match(expected)) { try self.addSimpleToken(type_match); } else { try self.addSimpleToken(type_nomatch); } } fn peek(self: *Scanner) u8 { if (self.isAtEnd()) return 0; return self.source[self.current]; } fn doString(self: *Scanner) !void { // consume entire string while (self.peek() != '"' and !self.isAtEnd()) { if (self.peek() == '\n') self.line += 1; _ = self.advance(); } // unterminated string. if (self.isAtEnd()) { try main.doError(self.line, "Unterminated string."); return; } // the closing ". _ = self.advance(); // trim the surrounding quotes. try self.addSliceToken( .STRING, self.source[self.start + 1 .. self.current - 1], ); } fn peekNext(self: *Scanner) u8 { if (self.current + 1 >= self.source.len) return 0; return self.source[self.current + 1]; } /// Consume a number fn doNumber(self: *Scanner) !void { while (isDigit(self.peek())) { _ = self.advance(); } // check if its a number like 12.34, where the '.' character // exists and the one next to it is a digit. if (self.peek() == '.' and isDigit(self.peekNext())) { _ = self.advance(); while (isDigit(self.peek())) { _ = self.advance(); } } // after going through all of the number, we can just use fmt.parseFloat var num = try std.fmt.parseFloat( f32, self.source[self.start..self.current], ); try self.addNumberToken(.NUMBER, num); } fn doIdentifier(self: *Scanner) !void { while (isAlphaNumeric(self.peek())) { _ = self.advance(); } // after reading the identifier, we check // if it is any of our keywords, if it is, then we add // the specificed keyword type. if not, just .IDENTIFIER var text = self.source[self.start..self.current]; var type_opt = self.keywords.get(text); var toktype: TokenType = undefined; if (type_opt) |kv| { toktype = @intToEnum(TokenType, kv.value); } else { toktype = TokenType.IDENTIFIER; } try self.addSimpleToken(toktype); } /// Scan through our tokens and add them to the Scanner's token list. fn scanToken(self: *Scanner) !void { var c = self.advance(); switch (c) { '(' => try self.addSimpleToken(.LEFT_PAREN), ')' => try self.addSimpleToken(.RIGHT_PAREN), '{' => try self.addSimpleToken(.LEFT_BRACE), '}' => try self.addSimpleToken(.RIGHT_BRACE), ',' => try self.addSimpleToken(.COMMA), '.' => try self.addSimpleToken(.DOT), '-' => try self.addSimpleToken(.MINUS), '+' => try self.addSimpleToken(.PLUS), ';' => try self.addSimpleToken(.SEMICOLON), '*' => try self.addSimpleToken(.STAR), '!' => try self.addMatchToken('=', .BANG_EQUAL, .BANG), '=' => try self.addMatchToken('=', .EQUAL_EQUAL, .EQUAL), '<' => try self.addMatchToken('=', .LESS_EQUAL, .LESS), '>' => try self.addMatchToken('=', .GREATER_EQUAL, .GREATER), '/' => blk: { // consume comments if (self.match('/')) { while (self.peek() != '\n' and !self.isAtEnd()) { _ = self.advance(); } } else if (self.match('*')) { // multiline block comments are messier to work with, but // we can still do it! while (true) { if (self.isAtEnd()) break; // check '*/' if (self.peek() == '*' and self.peekNext() == '/') { self.current += 2; break; } _ = self.advance(); } } else { try self.addSimpleToken(.SLASH); } }, ' ', '\r', '\t' => blk: {}, '\n' => blk: { self.line += 1; }, '"' => try self.doString(), else => { if (isDigit(c)) { try self.doNumber(); } else if (isAlpha(c)) { try self.doIdentifier(); } else { try main.doError(self.line, "Unexpected character"); } }, } } pub fn scanTokens(self: *Scanner) !TokenList { // while we aren't at the end, we're still consuming // tokens. while (!self.isAtEnd()) { self.start = self.current; try self.scanToken(); } try self.addToken(token.Token{ .Simple = token.SimpleToken.init( .EOF, "", self.line, {}, ), }); return self.tokens; } };