From 59083198c698610c8994130e52d771e6fd182f4b Mon Sep 17 00:00:00 2001 From: Luna Date: Tue, 4 Jun 2019 15:06:57 -0300 Subject: [PATCH] add basic scanner logic --- src/main.zig | 11 +++ src/scanner.zig | 182 ++++++++++++++++++++++++++++++++++++++++++++++-- src/tokens.zig | 11 ++- 3 files changed, 197 insertions(+), 7 deletions(-) diff --git a/src/main.zig b/src/main.zig index 9280494..78ef22a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,5 +1,7 @@ const std = @import("std"); +const scanners = @import("scanner.zig"); + const Allocator = std.mem.Allocator; pub const Result = error{ @@ -10,6 +12,15 @@ pub const Result = error{ fn run(allocator: *Allocator, data: []u8) !void { var stdout_file = try std.io.getStdOut(); const stdout = &stdout_file.outStream().stream; + + var scanner = scanners.Scanner.init(allocator, data); + + while (true) { + var tok = try scanner.nextToken(); + if (tok.ttype == .EOF) break; + try stdout.print("{x}\n", tok); + } + return Result.Ok; } diff --git a/src/scanner.zig b/src/scanner.zig index e2b7682..c7319fa 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -1,9 +1,11 @@ const std = @import("std"); -const tokens = @import("tokens.std"); +const tokens = @import("tokens.zig"); const Allocator = std.mem.Allocator; +const Token = tokens.Token; +const TokenType = tokens.TokenType; -pub const TokenError = error{ +pub const ScannerError = error{ Unexpected, Unterminated, }; @@ -22,10 +24,182 @@ fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } +const keywords = [][]const u8{ + "break", + "const", + "continue", + "defer", + "else", + "enum", + "fn", + "for", + "go", + "goto", + "if", + "import", + "in", + "interface", + "match", + "module", + "mut", + "or", + "return", + "struct", + "type", +}; + +const keyword_ttypes = []TokenType{ + .Break, + .Const, + .Continue, + .Defer, + .Else, + .Enum, + .Fn, + .For, + .Go, + .Goto, + .If, + .Import, + .In, + .Interface, + .Match, + .Module, + .Mut, + .Or, + .Return, + .Struct, + .Type, +}; + +fn getKeyword(keyword: []const u8) ?TokenType { + for (keywords) |kw, idx| { + if (std.mem.eql(u8, keyword, kw)) { + return keyword_ttypes[idx]; + } + } + + return null; +} + +/// Scanner for vlang tokens. pub const Scanner = struct { allocator: *Allocator, + source: []u8, - pub fn init(allocator: *Allocator) Scanner { - return Scanner{ .allocator = allocator }; + start: usize = 0, + current: usize = 0, + line: usize = 1, + + pub fn init(allocator: *Allocator, source: []u8) Scanner { + return Scanner{ .allocator = allocator, .source = source }; + } + + fn isAtEnd(self: *Scanner) bool { + return self.current >= self.source.len; + } + + fn advance(self: *Scanner) u8 { + self.current += 1; + return self.source[self.current - 1]; + } + + fn currentLexeme(self: *Scanner) []const u8 { + return self.source[self.start..self.current]; + } + + fn makeToken(self: *Scanner, ttype: TokenType) Token { + return Token{ + .ttype = ttype, + .lexeme = self.currentLexeme(), + .line = self.line, + }; + } + + fn makeTokenAdvance(self: *Scanner, ttype: TokenType) Token { + var tok = self.makeToken(ttype); + self.current += 1; + return tok; + } + + /// Check if the next character matches what is expected. + fn match(self: *Scanner, expected: u8) bool { + if (self.isAtEnd()) return false; + if (self.source[self.current] != expected) return false; + + self.current += 1; + return true; + } + + /// Add a SimpleToken of type_match if the next character is + /// `expected`. Adds a SimpleToken of type_nomatch when it is not. + fn makeMatchToken( + self: *Scanner, + expected: u8, + type_match: TokenType, + type_nomatch: TokenType, + ) Token { + if (self.match(expected)) { + return self.makeToken(type_match); + } else { + return self.makeToken(type_nomatch); + } + } + + fn peek(self: *Scanner) u8 { + if (self.isAtEnd()) return 0; + return self.source[self.current]; + } + + fn peekNext(self: *Scanner) u8 { + if (self.current + 1 >= self.source.len) return 0; + return self.source[self.current + 1]; + } + + pub fn nextToken(self: *Scanner) !Token { + self.start = self.current; + + if (self.isAtEnd()) return self.makeToken(TokenType.EOF); + + var c = self.advance(); + + var token = switch (c) { + '(' => self.makeToken(.LeftParen), + ')' => self.makeToken(.RightParen), + '{' => self.makeToken(.LeftBrace), + '}' => self.makeToken(.RightBrace), + '[' => self.makeToken(.LeftSquare), + ']' => self.makeToken(.RightSquare), + '.' => self.makeToken(.Dot), + ';' => self.makeToken(.Semicolon), + ',' => self.makeToken(.Comma), + ':' => self.makeToken(.Colon), + '&' => self.makeToken(.Ampersand), + '|' => self.makeToken(.Pipe), + '?' => self.makeToken(.QuestionMark), + '$' => self.makeToken(.DollarSign), + + '!' => self.makeMatchToken('=', .BangEqual, .Bang), + '=' => self.makeMatchToken('=', .EqualEqual, .Equal), + + // there can be three tokens from a < + // - <, which is LessThan + // - <=, which is LessEqual + // - <<, which is LeftDoubleChevron + '<' => blk: { + if (self.match('=')) { + break :blk self.makeToken(.LessEqual); + } else if (self.match('<')) { + break :blk self.makeToken(.LeftDoubleChevron); + } else { + break :blk self.makeToken(.Less); + } + }, + '>' => self.makeMatchToken('=', .GreaterEqual, .Greater), + + else => return ScannerError.Unexpected, + }; + + return token; } }; diff --git a/src/tokens.zig b/src/tokens.zig index fb0a6e7..20ab7e3 100644 --- a/src/tokens.zig +++ b/src/tokens.zig @@ -30,9 +30,11 @@ pub const TokenType = enum { PlusEqual, MinusEqual, - LessThan, + // comparison ones + EqualEqual, + Less, LessEqual, - GreaterThan, + Greater, GreaterEqual, Bang, BangEqual, @@ -65,9 +67,12 @@ pub const TokenType = enum { Return, Struct, Type, + + EOF, }; pub const Token = struct { ttype: TokenType, - lexeme: []u8, + lexeme: []const u8, + line: usize, };