const std = @import("std"); const tokens = @import("tokens.zig"); const Allocator = std.mem.Allocator; const Token = tokens.Token; const TokenType = tokens.TokenType; pub const ScannerError = error{ Unexpected, Unterminated, }; fn isDigit(char: u8) bool { return char >= '0' and char <= '9'; } fn isAlpha(c: u8) bool { return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_'; } fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } const keywords = [_][]const u8{ "break", "const", "continue", "defer", "else", "enum", "fn", "for", "go", "goto", "if", "import", "in", "interface", "match", "module", "mut", "or", "return", "struct", "type", "true", "false", "None", "println", "loop", "pub", }; const keyword_ttypes = [_]TokenType{ .Break, .Const, .Continue, .Defer, .Else, .Enum, .Fn, .For, .Go, .Goto, .If, .Import, .In, .Interface, .Match, .Module, .Mut, .Or, .Return, .Struct, .Type, .True, .False, .None, .Println, .Loop, .Pub, }; fn getKeyword(keyword: []const u8) ?TokenType { for (keywords) |kw, idx| { if (std.mem.eql(u8, keyword, kw)) { return keyword_ttypes[idx]; } } return null; } /// Scanner for vlang tokens. pub const Scanner = struct { allocator: *Allocator, source: []const u8, start: usize = 0, current: usize = 0, line: usize = 1, pub fn init(allocator: *Allocator, source: []const u8) Scanner { return Scanner{ .allocator = allocator, .source = source }; } fn isAtEnd(self: *Scanner) bool { return self.current >= self.source.len; } fn advance(self: *Scanner) u8 { self.current += 1; return self.source[self.current - 1]; } fn rollback(self: *Scanner) void { self.current -= 1; } pub fn currentLexeme(self: *Scanner) []const u8 { return self.source[self.start..self.current]; } fn makeToken(self: *Scanner, ttype: TokenType) Token { return Token{ .ttype = ttype, .lexeme = self.currentLexeme(), .line = self.line, }; } fn makeTokenLexeme( self: *Scanner, ttype: TokenType, lexeme: []const u8, ) Token { return Token{ .ttype = ttype, .lexeme = lexeme, .line = self.line, }; } /// Check if the next character matches what is expected. fn match(self: *Scanner, expected: u8) bool { if (self.isAtEnd()) return false; if (self.source[self.current] != expected) return false; self.current += 1; return true; } /// Add a SimpleToken of type_match if the next character is /// `expected`. Adds a SimpleToken of type_nomatch when it is not. fn makeMatchToken( self: *Scanner, expected: u8, type_match: TokenType, type_nomatch: TokenType, ) Token { if (self.match(expected)) { return self.makeToken(type_match); } else { return self.makeToken(type_nomatch); } } /// "triple" version of makeMatchToken. /// Required per vlang's tokens. fn makeTripleMatchToken( self: *Scanner, char1: u8, ttype1: TokenType, char2: u8, ttype2: TokenType, fallback: TokenType, ) Token { if (self.match(char1)) { return self.makeToken(ttype1); } else if (self.match(char2)) { return self.makeToken(ttype2); } else { return self.makeToken(fallback); } } /// Peek at the current character in the scanner fn peek(self: *Scanner) u8 { if (self.isAtEnd()) return 0; if (self.current == 0) return 0; return self.source[self.current - 1]; } /// Peek at the next character in the scanner fn peekNext(self: *Scanner) u8 { if (self.current + 1 > self.source.len) return 0; return self.source[self.current]; } /// Consume a number. /// Returns either an Integer or a Float token. Proper typing /// of the number (i32 i64 u32 u64 f32 f64) are for the parser. fn doNumber(self: *Scanner) Token { var ttype = TokenType.Integer; while (isDigit(self.peekNext())) { _ = self.advance(); } // check if its a number like 12.34, where the '.' character // exists and the one next to it is a digit. if (self.peek() == '.' and isDigit(self.peekNext())) { ttype = TokenType.Float; _ = self.advance(); while (isDigit(self.peek())) { _ = self.advance(); } } return self.makeToken(ttype); } /// Consume a string. stop_char is used to determine /// if the string is a single quote or double quote string fn doString(self: *Scanner, stop_char: u8) !Token { // consume entire string while (self.peekNext() != stop_char and !self.isAtEnd()) { if (self.peek() == '\n') self.line += 1; _ = self.advance(); } // unterminated string. if (self.isAtEnd()) { return ScannerError.Unterminated; } // the closing character of the string _ = self.advance(); // remove the starting and ending chars of the string const lexeme = self.currentLexeme(); return self.makeTokenLexeme( .String, lexeme[1 .. lexeme.len - 1], ); } /// Either a keyword or an identifier come out of this. fn doIdentifier(self: *Scanner) Token { while (isAlphaNumeric(self.peek())) { _ = self.advance(); } // ugly hack. self.rollback(); // after reading the identifier, we check // if it is any of our keywords, if it is, then we add // the specificed keyword type. if not, just .IDENTIFIER var toktype: TokenType = undefined; var ttype_opt = getKeyword(self.currentLexeme()); if (ttype_opt) |ttype| { toktype = ttype; } else { toktype = TokenType.Identifier; } return self.makeToken(toktype); } pub fn nextToken(self: *Scanner) !?Token { self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); if (isDigit(c)) return self.doNumber(); if (isAlpha(c)) return self.doIdentifier(); var token: ?Token = switch (c) { '(' => self.makeToken(.LeftParen), ')' => self.makeToken(.RightParen), '{' => self.makeToken(.LeftBrace), '}' => self.makeToken(.RightBrace), '[' => self.makeToken(.LeftSquare), ']' => self.makeToken(.RightSquare), '.' => self.makeToken(.Dot), ';' => self.makeToken(.Semicolon), ',' => self.makeToken(.Comma), '?' => self.makeToken(.QuestionMark), '$' => self.makeToken(.DollarSign), '%' => self.makeToken(.Modulo), ':' => self.makeMatchToken('=', .ColonEqual, .Colon), '*' => self.makeMatchToken('=', .StarEqual, .Star), '-' => self.makeMatchToken('=', .MinusEqual, .Minus), // we use the existing .And and .Or tokens // representing the and and or keywords to // also have || and && '&' => self.makeMatchToken('&', .And, .Address), '|' => self.makeMatchToken('|', .Or, .Pipe), '!' => self.makeMatchToken('=', .BangEqual, .Bang), '=' => self.makeMatchToken('=', .EqualEqual, .Equal), '>' => self.makeMatchToken('=', .GreaterEqual, .Greater), '+' => self.makeTripleMatchToken('+', .PlusPlus, '=', .PlusEqual, .Plus), '<' => self.makeTripleMatchToken('=', .LessEqual, '<', .LeftDoubleChevron, .Less), '/' => blk: { var next = self.peekNext(); switch (next) { '=' => { self.current += 1; return self.makeToken(.SlashEqual); }, '/' => blk2: { while (self.peek() != '\n' and !self.isAtEnd()) { _ = self.advance(); } return null; }, '*' => blk2: { while (self.peek() != '*' or self.peekNext() != '/') { _ = self.advance(); } // consume the ending slash _ = self.advance(); return null; }, else => break :blk self.makeToken(.Slash), } }, '\'' => try self.doString('\''), '"' => try self.doString('"'), ' ', '\r', '\t' => null, '\n' => blk: { self.line += 1; break :blk null; }, else => return ScannerError.Unexpected, }; return token; } };