diff --git a/src/main.zig b/src/main.zig index 655520e..05b5fbb 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,7 +1,5 @@ const std = @import("std"); -const scanners = @import("scanners.zig"); - pub const Result = enum { Ok, TokenizeError, @@ -10,34 +8,6 @@ pub const Result = enum { }; pub fn run(allocator: *std.mem.Allocator, slice: []const u8) Result { - var scan = scanners.Scanner.init(allocator, slice); - //defer scan.deinit(); - - // do a full scan pass, then reset, then do it again (with parser) - while (true) { - var tok_opt = scan.nextToken() catch |err| { - std.debug.warn( - "error at '{}': {}\n", - scan.currentLexeme(), - err, - ); - - return Result.TokenizeError; - }; - - if (tok_opt) |tok| { - if (tok.typ == .EOF) break; - - // TODO remove - std.debug.warn("{x}\n", tok); - } - } - - // scan.reset(); - - //var parser = parsers.Parser.init(allocator, scan); - //defer parser.deinit(); - return Result.Ok; } @@ -58,8 +28,8 @@ pub fn main() anyerror!void { _ = try file.read(slice); + //switch (try run(allocator, slice)) { const result = run(allocator, slice); - //const result = try run(allocator, slice); switch (result) { .Ok => std.os.exit(0), diff --git a/src/scanners.zig b/src/scanners.zig deleted file mode 100644 index 0a10196..0000000 --- a/src/scanners.zig +++ /dev/null @@ -1,361 +0,0 @@ -const std = @import("std"); -const tokens = @import("tokens.zig"); - -const Allocator = std.mem.Allocator; -const Token = tokens.Token; -const TokenType = tokens.TokenType; - -pub const ScannerError = error{ - Unexpected, - Unterminated, -}; - -fn isDigit(char: u8) bool { - return char >= '0' and char <= '9'; -} - -fn isAlpha(c: u8) bool { - return (c >= 'a' and c <= 'z') or - (c >= 'A' and c <= 'Z') or - c == '_'; -} - -fn isAlphaNumeric(char: u8) bool { - return isAlpha(char) or isDigit(char); -} - -const keywords = [_][]const u8{ - "break", - "const", - "continue", - "defer", - "else", - "enum", - "fn", - "for", - "go", - "goto", - "if", - "import", - "in", - "interface", - "match", - "module", - "mut", - "or", - "return", - "struct", - "type", - "true", - "false", - "None", - "println", - "loop", - "pub", -}; - -const keyword_ttypes = [_]TokenType{ - .Break, - .Const, - .Continue, - .Defer, - .Else, - .Enum, - .Fn, - .For, - .Go, - .Goto, - .If, - .Import, - .In, - .Interface, - .Match, - .Module, - .Mut, - .Or, - .Return, - .Struct, - .Type, - .True, - .False, - .None, - .Println, - .Loop, - .Pub, -}; - -fn getKeyword(keyword: []const u8) ?TokenType { - for (keywords) |kw, idx| { - if (std.mem.eql(u8, keyword, kw)) { - return keyword_ttypes[idx]; - } - } - - return null; -} - -/// Scanner for vlang tokens. -pub const Scanner = struct { - allocator: *Allocator, - source: []const u8, - - start: usize = 0, - current: usize = 0, - line: usize = 1, - - pub fn init(allocator: *Allocator, source: []const u8) Scanner { - return Scanner{ .allocator = allocator, .source = source }; - } - - fn isAtEnd(self: *Scanner) bool { - return self.current >= self.source.len; - } - - fn advance(self: *Scanner) u8 { - self.current += 1; - return self.source[self.current - 1]; - } - - fn rollback(self: *Scanner) void { - self.current -= 1; - } - - pub fn currentLexeme(self: *Scanner) []const u8 { - return self.source[self.start..self.current]; - } - - fn makeToken(self: *Scanner, ttype: TokenType) Token { - return Token{ - .typ = ttype, - .lexeme = self.currentLexeme(), - .line = self.line, - }; - } - - fn makeTokenLexeme( - self: *Scanner, - ttype: TokenType, - lexeme: []const u8, - ) Token { - return Token{ - .typ = ttype, - .lexeme = lexeme, - .line = self.line, - }; - } - - /// Check if the next character matches what is expected. - fn match(self: *Scanner, expected: u8) bool { - if (self.isAtEnd()) return false; - if (self.source[self.current] != expected) return false; - - self.current += 1; - return true; - } - - /// Add a SimpleToken of type_match if the next character is - /// `expected`. Adds a SimpleToken of type_nomatch when it is not. - fn makeMatchToken( - self: *Scanner, - expected: u8, - type_match: TokenType, - type_nomatch: TokenType, - ) Token { - if (self.match(expected)) { - return self.makeToken(type_match); - } else { - return self.makeToken(type_nomatch); - } - } - - /// "triple" version of makeMatchToken. - /// Required per vlang's tokens. - fn makeTripleMatchToken( - self: *Scanner, - char1: u8, - ttype1: TokenType, - char2: u8, - ttype2: TokenType, - fallback: TokenType, - ) Token { - if (self.match(char1)) { - return self.makeToken(ttype1); - } else if (self.match(char2)) { - return self.makeToken(ttype2); - } else { - return self.makeToken(fallback); - } - } - - /// Peek at the current character in the scanner - fn peek(self: *Scanner) u8 { - if (self.isAtEnd()) return 0; - if (self.current == 0) return 0; - return self.source[self.current - 1]; - } - - /// Peek at the next character in the scanner - fn peekNext(self: *Scanner) u8 { - if (self.current + 1 > self.source.len) return 0; - return self.source[self.current]; - } - - /// Consume a number. - /// Returns either an Integer or a Float token. Proper typing - /// of the number (i32 i64 u32 u64 f32 f64) are for the parser. - fn doNumber(self: *Scanner) Token { - var ttype = TokenType.Integer; - - while (isDigit(self.peekNext())) { - _ = self.advance(); - } - - // check if its a number like 12.34, where the '.' character - // exists and the one next to it is a digit. - if (self.peek() == '.' and isDigit(self.peekNext())) { - ttype = TokenType.Float; - - _ = self.advance(); - while (isDigit(self.peek())) { - _ = self.advance(); - } - } - - return self.makeToken(ttype); - } - - /// Consume a string. stop_char is used to determine - /// if the string is a single quote or double quote string - fn doString(self: *Scanner, stop_char: u8) !Token { - // consume entire string - while (self.peekNext() != stop_char and !self.isAtEnd()) { - if (self.peek() == '\n') self.line += 1; - _ = self.advance(); - } - - // unterminated string. - if (self.isAtEnd()) { - return ScannerError.Unterminated; - } - - // the closing character of the string - _ = self.advance(); - - // remove the starting and ending chars of the string - const lexeme = self.currentLexeme(); - return self.makeTokenLexeme( - .String, - lexeme[1 .. lexeme.len - 1], - ); - } - - /// Either a keyword or an identifier come out of this. - fn doIdentifier(self: *Scanner) Token { - while (isAlphaNumeric(self.peek())) { - _ = self.advance(); - } - - // ugly hack. - self.rollback(); - - // after reading the identifier, we check - // if it is any of our keywords, if it is, then we add - // the specificed keyword type. if not, just .IDENTIFIER - var toktype: TokenType = undefined; - var ttype_opt = getKeyword(self.currentLexeme()); - - if (ttype_opt) |ttype| { - toktype = ttype; - } else { - toktype = TokenType.Identifier; - } - - return self.makeToken(toktype); - } - - pub fn nextToken(self: *Scanner) !?Token { - self.start = self.current; - - if (self.isAtEnd()) return self.makeToken(TokenType.EOF); - - var c = self.advance(); - if (isDigit(c)) return self.doNumber(); - if (isAlpha(c)) return self.doIdentifier(); - - var token: ?Token = switch (c) { - '(' => self.makeToken(.LeftParen), - ')' => self.makeToken(.RightParen), - '{' => self.makeToken(.LeftBrace), - '}' => self.makeToken(.RightBrace), - '[' => self.makeToken(.LeftSquare), - ']' => self.makeToken(.RightSquare), - '.' => self.makeToken(.Dot), - ';' => self.makeToken(.Semicolon), - ',' => self.makeToken(.Comma), - '?' => self.makeToken(.QuestionMark), - '$' => self.makeToken(.DollarSign), - - '%' => self.makeToken(.Modulo), - - ':' => self.makeMatchToken('=', .ColonEqual, .Colon), - '*' => self.makeMatchToken('=', .StarEqual, .Star), - '-' => self.makeMatchToken('=', .MinusEqual, .Minus), - - // we use the existing .And and .Or tokens - // representing the and and or keywords to - // also have || and && - '&' => self.makeMatchToken('&', .And, .Address), - '|' => self.makeMatchToken('|', .Or, .Pipe), - - '!' => self.makeMatchToken('=', .BangEqual, .Bang), - '=' => self.makeMatchToken('=', .EqualEqual, .Equal), - '>' => self.makeMatchToken('=', .GreaterEqual, .Greater), - '<' => self.makeMatchToken('=', .LessEqual, .Less), - '+' => self.makeTripleMatchToken('+', .PlusPlus, '=', .PlusEqual, .Plus), - - '/' => blk: { - var next = self.peekNext(); - - switch (next) { - '=' => { - self.current += 1; - return self.makeToken(.SlashEqual); - }, - - '/' => blk2: { - while (self.peek() != '\n' and !self.isAtEnd()) { - _ = self.advance(); - } - - return null; - }, - - '*' => blk2: { - while (self.peek() != '*' or self.peekNext() != '/') { - _ = self.advance(); - } - - // consume the ending slash - _ = self.advance(); - return null; - }, - - else => break :blk self.makeToken(.Slash), - } - }, - - // '\'' => try self.doString('\''), - '"' => try self.doString('"'), - - ' ', '\r', '\t' => null, - '\n' => blk: { - self.line += 1; - break :blk null; - }, - - else => return ScannerError.Unexpected, - }; - - return token; - } -}; diff --git a/src/tokens.zig b/src/tokens.zig deleted file mode 100644 index 1df9b37..0000000 --- a/src/tokens.zig +++ /dev/null @@ -1,88 +0,0 @@ -pub const TokenType = enum { - // basic tokens - LeftParen, - RightParen, - LeftBrace, - RightBrace, - LeftSquare, - RightSquare, - Dot, - Equal, - Semicolon, - Comma, - Colon, - Address, - Pipe, - QuestionMark, - DollarSign, - - // math operators - Plus, - Minus, - Star, - Slash, - Modulo, - - // one-two char tokens - DotEqual, - PlusPlus, - PlusEqual, - MinusEqual, - ColonEqual, - StarEqual, - SlashEqual, - - // comparison ones - EqualEqual, - Less, - LessEqual, - Greater, - GreaterEqual, - Bang, - BangEqual, - - // complex types - Integer, - Float, - String, - Identifier, - - // keywords - Break, - Const, - Continue, - Defer, - Else, - Enum, - Fn, - For, - Loop, - Go, - Goto, - If, - Import, - In, - Interface, - Match, - Module, - Mut, - Or, - And, - Return, - Struct, - Type, - True, - False, - None, - - Println, - Pub, - - EOF, -}; - -pub const Token = struct { - typ: TokenType, - lexeme: []const u8, - line: usize, -};