const std = @import("std"); const tokens = @import("token.zig"); const Token = tokens.Token; const TokenType = tokens.TokenType; const Allocator = std.mem.Allocator; pub const TokenError = error{ Unexpected, Unterminated, }; fn isDigit(char: u8) bool { return char >= '0' and char <= '9'; } fn isAlpha(c: u8) bool { return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_'; } fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } pub const KeywordMap = std.StringHashMap(u6); /// The book does say that C doesn't have hashmaps. but Zig does. and I can /// use it here. fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { var map = KeywordMap.init(allocator); const keywords = [_][]const u8{ "and"[0..], "class"[0..], "else"[0..], "false"[0..], "for"[0..], "fun"[0..], "if"[0..], "nil"[0..], "or"[0..], "print"[0..], "return"[0..], "super"[0..], "this"[0..], "true"[0..], "var"[0..], "while"[0..], }; const tags = [_]TokenType{ TokenType.AND, TokenType.CLASS, TokenType.ELSE, TokenType.FALSE, TokenType.FOR, TokenType.FUN, TokenType.IF, TokenType.NIL, TokenType.OR, TokenType.PRINT, TokenType.RETURN, TokenType.SUPER, TokenType.THIS, TokenType.TRUE, TokenType.VAR, TokenType.WHILE, }; for (keywords) |keyword, idx| { var tag = @enumToInt(tags[idx]); _ = try map.put(keyword, tag); } return map; } pub const Scanner = struct { source: []const u8, keywords: KeywordMap, start: usize = 0, current: usize = 0, line: usize = 1, allocator: *Allocator, pub fn init(allocator: *Allocator, data: []const u8) !Scanner { return Scanner{ .source = data, .keywords = try initKeywordMap(allocator), .allocator = allocator, }; } fn isAtEnd(self: *Scanner) bool { return self.current >= self.source.len; } fn advance(self: *Scanner) u8 { self.current += 1; return self.source[self.current - 1]; } pub fn currentLexeme(self: *Scanner) []const u8 { return self.source[self.start..self.current]; } fn makeToken(self: *Scanner, ttype: TokenType) Token { return Token{ .ttype = ttype, .lexeme = self.currentLexeme(), .line = self.line, }; } /// Check if the next character matches what is expected. fn match(self: *Scanner, expected: u8) bool { if (self.isAtEnd()) return false; if (self.source[self.current] != expected) return false; self.current += 1; return true; } /// Add a SimpleToken of type_match if the next character is /// `expected`. Adds a SimpleToken of type_nomatch when it is not. fn makeMatchToken( self: *Scanner, expected: u8, type_match: TokenType, type_nomatch: TokenType, ) Token { if (self.match(expected)) { return self.makeToken(type_match); } else { return self.makeToken(type_nomatch); } } fn peek(self: *Scanner) u8 { if (self.isAtEnd()) return 0; return self.source[self.current]; } fn peekNext(self: *Scanner) u8 { if (self.current + 1 >= self.source.len) return 0; return self.source[self.current + 1]; } fn skipWhitespace(self: *Scanner) void { while (true) { var c = self.peek(); switch (c) { ' ', '\r', '\t' => blk: { _ = self.advance(); }, '\n' => blk: { self.line += 1; _ = self.advance(); }, else => return, } } } fn doString(self: *Scanner) !Token { // consume entire string while (self.peek() != '"' and !self.isAtEnd()) { if (self.peek() == '\n') self.line += 1; _ = self.advance(); } // unterminated string. if (self.isAtEnd()) { return TokenError.Unterminated; } // the closing ". _ = self.advance(); // trim the surrounding quotes. return self.makeToken(.STRING); } /// Consume a number fn doNumber(self: *Scanner) Token { while (isDigit(self.peek())) { _ = self.advance(); } // check if its a number like 12.34, where the '.' character // exists and the one next to it is a digit. if (self.peek() == '.' and isDigit(self.peekNext())) { _ = self.advance(); while (isDigit(self.peek())) { _ = self.advance(); } } return self.makeToken(.NUMBER); } /// Either a keyword or an identifier come out of this. fn doIdentifier(self: *Scanner) Token { while (isAlphaNumeric(self.peek())) { _ = self.advance(); } // after reading the identifier, we check // if it is any of our keywords, if it is, then we add // the specificed keyword type. if not, just .IDENTIFIER var text = self.source[self.start..self.current]; var type_opt = self.keywords.get(text); var toktype: TokenType = undefined; if (type_opt) |kv| { toktype = @intToEnum(TokenType, kv.value); } else { toktype = TokenType.IDENTIFIER; } return self.makeToken(toktype); } pub fn scanToken(self: *Scanner) !?Token { self.skipWhitespace(); self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); if (isAlpha(c)) return self.doIdentifier(); if (isDigit(c)) return self.doNumber(); var token = switch (c) { '(' => self.makeToken(.LEFT_PAREN), ')' => self.makeToken(.RIGHT_PAREN), '{' => self.makeToken(.LEFT_BRACE), '}' => self.makeToken(.RIGHT_BRACE), ',' => self.makeToken(.COMMA), '.' => self.makeToken(.DOT), '-' => self.makeToken(.MINUS), '+' => self.makeToken(.PLUS), ';' => self.makeToken(.SEMICOLON), '*' => self.makeToken(.STAR), '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), '/' => blk: { if (self.peekNext() == '/') { while (self.peek() != '\n' and !self.isAtEnd()) { _ = self.advance(); } break :blk null; } else { break :blk self.makeToken(.SLASH); } }, '"' => try self.doString(), else => return TokenError.Unexpected, }; return token; } };