add basic scanner logic

2019-06-04 15:06:57 -03:00 · 2019-06-04 15:06:57 -03:00 · 59083198c6
parent 3d243eefff
commit 59083198c6
3 changed files with 197 additions and 7 deletions
--- a/src/main.zig
+++ b/src/main.zig
@ -1,5 +1,7 @@
 const std = @import("std");

+const scanners = @import("scanner.zig");
+
 const Allocator = std.mem.Allocator;

 pub const Result = error{
@ -10,6 +12,15 @@ pub const Result = error{
 fn run(allocator: *Allocator, data: []u8) !void {
    var stdout_file = try std.io.getStdOut();
    const stdout = &stdout_file.outStream().stream;
+
+    var scanner = scanners.Scanner.init(allocator, data);
+
+    while (true) {
+        var tok = try scanner.nextToken();
+        if (tok.ttype == .EOF) break;
+        try stdout.print("{x}\n", tok);
+    }
+
    return Result.Ok;
 }

--- a/src/scanner.zig
+++ b/src/scanner.zig
@ -1,9 +1,11 @@
 const std = @import("std");
-const tokens = @import("tokens.std");
+const tokens = @import("tokens.zig");

 const Allocator = std.mem.Allocator;
+const Token = tokens.Token;
+const TokenType = tokens.TokenType;

-pub const TokenError = error{
+pub const ScannerError = error{
    Unexpected,
    Unterminated,
 };
@ -22,10 +24,182 @@ fn isAlphaNumeric(char: u8) bool {
    return isAlpha(char) or isDigit(char);
 }

+const keywords = [][]const u8{
+    "break",
+    "const",
+    "continue",
+    "defer",
+    "else",
+    "enum",
+    "fn",
+    "for",
+    "go",
+    "goto",
+    "if",
+    "import",
+    "in",
+    "interface",
+    "match",
+    "module",
+    "mut",
+    "or",
+    "return",
+    "struct",
+    "type",
+};
+
+const keyword_ttypes = []TokenType{
+    .Break,
+    .Const,
+    .Continue,
+    .Defer,
+    .Else,
+    .Enum,
+    .Fn,
+    .For,
+    .Go,
+    .Goto,
+    .If,
+    .Import,
+    .In,
+    .Interface,
+    .Match,
+    .Module,
+    .Mut,
+    .Or,
+    .Return,
+    .Struct,
+    .Type,
+};
+
+fn getKeyword(keyword: []const u8) ?TokenType {
+    for (keywords) |kw, idx| {
+        if (std.mem.eql(u8, keyword, kw)) {
+            return keyword_ttypes[idx];
+        }
+    }
+
+    return null;
+}
+
+/// Scanner for vlang tokens.
 pub const Scanner = struct {
    allocator: *Allocator,
+    source: []u8,

-    pub fn init(allocator: *Allocator) Scanner {
-        return Scanner{ .allocator = allocator };
+    start: usize = 0,
+    current: usize = 0,
+    line: usize = 1,
+
+    pub fn init(allocator: *Allocator, source: []u8) Scanner {
+        return Scanner{ .allocator = allocator, .source = source };
+    }
+
+    fn isAtEnd(self: *Scanner) bool {
+        return self.current >= self.source.len;
+    }
+
+    fn advance(self: *Scanner) u8 {
+        self.current += 1;
+        return self.source[self.current - 1];
+    }
+
+    fn currentLexeme(self: *Scanner) []const u8 {
+        return self.source[self.start..self.current];
+    }
+
+    fn makeToken(self: *Scanner, ttype: TokenType) Token {
+        return Token{
+            .ttype = ttype,
+            .lexeme = self.currentLexeme(),
+            .line = self.line,
+        };
+    }
+
+    fn makeTokenAdvance(self: *Scanner, ttype: TokenType) Token {
+        var tok = self.makeToken(ttype);
+        self.current += 1;
+        return tok;
+    }
+
+    /// Check if the next character matches what is expected.
+    fn match(self: *Scanner, expected: u8) bool {
+        if (self.isAtEnd()) return false;
+        if (self.source[self.current] != expected) return false;
+
+        self.current += 1;
+        return true;
+    }
+
+    /// Add a SimpleToken of type_match if the next character is
+    /// `expected`. Adds a SimpleToken of type_nomatch when it is not.
+    fn makeMatchToken(
+        self: *Scanner,
+        expected: u8,
+        type_match: TokenType,
+        type_nomatch: TokenType,
+    ) Token {
+        if (self.match(expected)) {
+            return self.makeToken(type_match);
+        } else {
+            return self.makeToken(type_nomatch);
+        }
+    }
+
+    fn peek(self: *Scanner) u8 {
+        if (self.isAtEnd()) return 0;
+        return self.source[self.current];
+    }
+
+    fn peekNext(self: *Scanner) u8 {
+        if (self.current + 1 >= self.source.len) return 0;
+        return self.source[self.current + 1];
+    }
+
+    pub fn nextToken(self: *Scanner) !Token {
+        self.start = self.current;
+
+        if (self.isAtEnd()) return self.makeToken(TokenType.EOF);
+
+        var c = self.advance();
+
+        var token = switch (c) {
+            '(' => self.makeToken(.LeftParen),
+            ')' => self.makeToken(.RightParen),
+            '{' => self.makeToken(.LeftBrace),
+            '}' => self.makeToken(.RightBrace),
+            '[' => self.makeToken(.LeftSquare),
+            ']' => self.makeToken(.RightSquare),
+            '.' => self.makeToken(.Dot),
+            ';' => self.makeToken(.Semicolon),
+            ',' => self.makeToken(.Comma),
+            ':' => self.makeToken(.Colon),
+            '&' => self.makeToken(.Ampersand),
+            '|' => self.makeToken(.Pipe),
+            '?' => self.makeToken(.QuestionMark),
+            '$' => self.makeToken(.DollarSign),
+
+            '!' => self.makeMatchToken('=', .BangEqual, .Bang),
+            '=' => self.makeMatchToken('=', .EqualEqual, .Equal),
+
+            // there can be three tokens from a <
+            //  - <, which is LessThan
+            //  - <=, which is LessEqual
+            //  - <<, which is LeftDoubleChevron
+            '<' => blk: {
+                if (self.match('=')) {
+                    break :blk self.makeToken(.LessEqual);
+                } else if (self.match('<')) {
+                    break :blk self.makeToken(.LeftDoubleChevron);
+                } else {
+                    break :blk self.makeToken(.Less);
+                }
+            },
+            '>' => self.makeMatchToken('=', .GreaterEqual, .Greater),
+
+            else => return ScannerError.Unexpected,
+        };
+
+        return token;
    }
 };
--- a/src/tokens.zig
+++ b/src/tokens.zig
@ -30,9 +30,11 @@ pub const TokenType = enum {
    PlusEqual,
    MinusEqual,

-    LessThan,
+    // comparison ones
+    EqualEqual,
+    Less,
    LessEqual,
-    GreaterThan,
+    Greater,
    GreaterEqual,
    Bang,
    BangEqual,
@ -65,9 +67,12 @@ pub const TokenType = enum {
    Return,
    Struct,
    Type,
+
+    EOF,
 };

 pub const Token = struct {
    ttype: TokenType,
-    lexeme: []u8,
+    lexeme: []const u8,
+    line: usize,
 };