vig/src/scanner.zig

const std = @import("std");
const tokens = @import("tokens.zig");

const Allocator = std.mem.Allocator;
const Token = tokens.Token;
const TokenType = tokens.TokenType;

pub const ScannerError = error{
    Unexpected,
    Unterminated,
};

fn isDigit(char: u8) bool {
    return char >= '0' and char <= '9';
}

fn isAlpha(c: u8) bool {
    return (c >= 'a' and c <= 'z') or
        (c >= 'A' and c <= 'Z') or
        c == '_';
}

fn isAlphaNumeric(char: u8) bool {
    return isAlpha(char) or isDigit(char);
}

const keywords = [][]const u8{
    "break",
    "const",
    "continue",
    "defer",
    "else",
    "enum",
    "fn",
    "for",
    "go",
    "goto",
    "if",
    "import",
    "in",
    "interface",
    "match",
    "module",
    "mut",
    "or",
    "return",
    "struct",
    "type",
};

const keyword_ttypes = []TokenType{
    .Break,
    .Const,
    .Continue,
    .Defer,
    .Else,
    .Enum,
    .Fn,
    .For,
    .Go,
    .Goto,
    .If,
    .Import,
    .In,
    .Interface,
    .Match,
    .Module,
    .Mut,
    .Or,
    .Return,
    .Struct,
    .Type,
};

fn getKeyword(keyword: []const u8) ?TokenType {
    for (keywords) |kw, idx| {
        if (std.mem.eql(u8, keyword, kw)) {
            return keyword_ttypes[idx];
        }
    }

    return null;
}

/// Scanner for vlang tokens.
pub const Scanner = struct {
    allocator: *Allocator,
    source: []u8,

    start: usize = 0,
    current: usize = 0,
    line: usize = 1,

    pub fn init(allocator: *Allocator, source: []u8) Scanner {
        return Scanner{ .allocator = allocator, .source = source };
    }

    fn isAtEnd(self: *Scanner) bool {
        return self.current >= self.source.len;
    }

    fn advance(self: *Scanner) u8 {
        self.current += 1;
        return self.source[self.current - 1];
    }

    pub fn currentLexeme(self: *Scanner) []const u8 {
        return self.source[self.start..self.current];
    }

    fn makeToken(self: *Scanner, ttype: TokenType) Token {
        return Token{
            .ttype = ttype,
            .lexeme = self.currentLexeme(),
            .line = self.line,
        };
    }

    fn makeTokenAdvance(self: *Scanner, ttype: TokenType) Token {
        var tok = self.makeToken(ttype);
        self.current += 1;
        return tok;
    }

    /// Check if the next character matches what is expected.
    fn match(self: *Scanner, expected: u8) bool {
        if (self.isAtEnd()) return false;
        if (self.source[self.current] != expected) return false;

        self.current += 1;
        return true;
    }

    /// Add a SimpleToken of type_match if the next character is
    /// `expected`. Adds a SimpleToken of type_nomatch when it is not.
    fn makeMatchToken(
        self: *Scanner,
        expected: u8,
        type_match: TokenType,
        type_nomatch: TokenType,
    ) Token {
        if (self.match(expected)) {
            return self.makeToken(type_match);
        } else {
            return self.makeToken(type_nomatch);
        }
    }

    fn peek(self: *Scanner) u8 {
        if (self.isAtEnd()) return 0;
        return self.source[self.current];
    }

    fn peekNext(self: *Scanner) u8 {
        if (self.current + 1 >= self.source.len) return 0;
        return self.source[self.current + 1];
    }

    fn skipWhitespace(self: *Scanner) void {
        while (true) {
            var c = self.peek();

            switch (c) {
                ' ', '\r', '\t' => blk: {
                    _ = self.advance();
                },
                '\n' => blk: {
                    self.line += 1;
                    _ = self.advance();
                },
                else => return,
            }
        }
    }

    /// Consume a number.
    /// Returns either an Integer or a Float token. Proper typing
    /// of the number (i32 i64 u32 u64 f32 f64) are for the parser.
    fn doNumber(self: *Scanner) Token {
        var ttype = TokenType.Integer;

        while (isDigit(self.peek())) {
            _ = self.advance();
        }

        // check if its a number like 12.34, where the '.' character
        // exists and the one next to it is a digit.
        if (self.peek() == '.' and isDigit(self.peekNext())) {
            ttype = TokenType.Float;

            _ = self.advance();
            while (isDigit(self.peek())) {
                _ = self.advance();
            }
        }

        return self.makeToken(ttype);
    }

    pub fn nextToken(self: *Scanner) !?Token {
        self.skipWhitespace();
        self.start = self.current;

        if (self.isAtEnd()) return self.makeToken(TokenType.EOF);

        var c = self.advance();
        if (isDigit(c)) return self.doNumber();

        var token: ?Token = switch (c) {
            '(' => self.makeToken(.LeftParen),
            ')' => self.makeToken(.RightParen),
            '{' => self.makeToken(.LeftBrace),
            '}' => self.makeToken(.RightBrace),
            '[' => self.makeToken(.LeftSquare),
            ']' => self.makeToken(.RightSquare),
            '.' => self.makeToken(.Dot),
            ';' => self.makeToken(.Semicolon),
            ',' => self.makeToken(.Comma),
            ':' => self.makeToken(.Colon),
            '&' => self.makeToken(.Ampersand),
            '|' => self.makeToken(.Pipe),
            '?' => self.makeToken(.QuestionMark),
            '$' => self.makeToken(.DollarSign),

            '-' => self.makeToken(.Minus),
            '+' => self.makeToken(.Plus),
            '*' => self.makeToken(.Star),

            '!' => self.makeMatchToken('=', .BangEqual, .Bang),
            '=' => self.makeMatchToken('=', .EqualEqual, .Equal),

            // there can be three tokens from a <
            //  - <, which is LessThan
            //  - <=, which is LessEqual
            //  - <<, which is LeftDoubleChevron
            '<' => blk: {
                if (self.match('=')) {
                    break :blk self.makeToken(.LessEqual);
                } else if (self.match('<')) {
                    break :blk self.makeToken(.LeftDoubleChevron);
                } else {
                    break :blk self.makeToken(.Less);
                }
            },
            '>' => self.makeMatchToken('=', .GreaterEqual, .Greater),

            '/' => blk: {
                if (self.peekNext() == '/') {
                    while (self.peek() != '\n' and !self.isAtEnd()) {
                        _ = self.advance();
                    }

                    break :blk null;
                } else {
                    break :blk self.makeToken(.Slash);
                }
            },

            else => return ScannerError.Unexpected,
        };

        return token;
    }
};
add runPrompt / runFile / run functions 2019-06-04 02:12:16 +00:00			`const std = @import("std");`
add basic scanner logic 2019-06-04 18:06:57 +00:00			`const tokens = @import("tokens.zig");`
add runPrompt / runFile / run functions 2019-06-04 02:12:16 +00:00
			`const Allocator = std.mem.Allocator;`
add basic scanner logic 2019-06-04 18:06:57 +00:00			`const Token = tokens.Token;`
			`const TokenType = tokens.TokenType;`
add runPrompt / runFile / run functions 2019-06-04 02:12:16 +00:00
add basic scanner logic 2019-06-04 18:06:57 +00:00			`pub const ScannerError = error{`
add runPrompt / runFile / run functions 2019-06-04 02:12:16 +00:00			`Unexpected,`
			`Unterminated,`
			`};`

			`fn isDigit(char: u8) bool {`
			`return char >= '0' and char <= '9';`
			`}`

			`fn isAlpha(c: u8) bool {`
			`return (c >= 'a' and c <= 'z') or`
			`(c >= 'A' and c <= 'Z') or`
			`c == '_';`
			`}`

			`fn isAlphaNumeric(char: u8) bool {`
			`return isAlpha(char) or isDigit(char);`
			`}`

add basic scanner logic 2019-06-04 18:06:57 +00:00			`const keywords = [][]const u8{`
			`"break",`
			`"const",`
			`"continue",`
			`"defer",`
			`"else",`
			`"enum",`
			`"fn",`
			`"for",`
			`"go",`
			`"goto",`
			`"if",`
			`"import",`
			`"in",`
			`"interface",`
			`"match",`
			`"module",`
			`"mut",`
			`"or",`
			`"return",`
			`"struct",`
			`"type",`
			`};`

			`const keyword_ttypes = []TokenType{`
			`.Break,`
			`.Const,`
			`.Continue,`
			`.Defer,`
			`.Else,`
			`.Enum,`
			`.Fn,`
			`.For,`
			`.Go,`
			`.Goto,`
			`.If,`
			`.Import,`
			`.In,`
			`.Interface,`
			`.Match,`
			`.Module,`
			`.Mut,`
			`.Or,`
			`.Return,`
			`.Struct,`
			`.Type,`
			`};`

			`fn getKeyword(keyword: []const u8) ?TokenType {`
			`for (keywords) \|kw, idx\| {`
			`if (std.mem.eql(u8, keyword, kw)) {`
			`return keyword_ttypes[idx];`
			`}`
			`}`

			`return null;`
			`}`

			`/// Scanner for vlang tokens.`
add runPrompt / runFile / run functions 2019-06-04 02:12:16 +00:00			`pub const Scanner = struct {`
			`allocator: *Allocator,`
add basic scanner logic 2019-06-04 18:06:57 +00:00			`source: []u8,`

			`start: usize = 0,`
			`current: usize = 0,`
			`line: usize = 1,`

			`pub fn init(allocator: *Allocator, source: []u8) Scanner {`
			`return Scanner{ .allocator = allocator, .source = source };`
			`}`

			`fn isAtEnd(self: *Scanner) bool {`
			`return self.current >= self.source.len;`
			`}`

			`fn advance(self: *Scanner) u8 {`
			`self.current += 1;`
			`return self.source[self.current - 1];`
			`}`

add nicer error handling, whitespace skipping 2019-06-04 18:18:52 +00:00			`pub fn currentLexeme(self: *Scanner) []const u8 {`
add basic scanner logic 2019-06-04 18:06:57 +00:00			`return self.source[self.start..self.current];`
			`}`

			`fn makeToken(self: *Scanner, ttype: TokenType) Token {`
			`return Token{`
			`.ttype = ttype,`
			`.lexeme = self.currentLexeme(),`
			`.line = self.line,`
			`};`
			`}`

			`fn makeTokenAdvance(self: *Scanner, ttype: TokenType) Token {`
			`var tok = self.makeToken(ttype);`
			`self.current += 1;`
			`return tok;`
			`}`

			`/// Check if the next character matches what is expected.`
			`fn match(self: *Scanner, expected: u8) bool {`
			`if (self.isAtEnd()) return false;`
			`if (self.source[self.current] != expected) return false;`

			`self.current += 1;`
			`return true;`
			`}`

			`/// Add a SimpleToken of type_match if the next character is`
			/// `expected`. Adds a SimpleToken of type_nomatch when it is not.
			`fn makeMatchToken(`
			`self: *Scanner,`
			`expected: u8,`
			`type_match: TokenType,`
			`type_nomatch: TokenType,`
			`) Token {`
			`if (self.match(expected)) {`
			`return self.makeToken(type_match);`
			`} else {`
			`return self.makeToken(type_nomatch);`
			`}`
			`}`

			`fn peek(self: *Scanner) u8 {`
			`if (self.isAtEnd()) return 0;`
			`return self.source[self.current];`
			`}`

			`fn peekNext(self: *Scanner) u8 {`
			`if (self.current + 1 >= self.source.len) return 0;`
			`return self.source[self.current + 1];`
			`}`

add nicer error handling, whitespace skipping 2019-06-04 18:18:52 +00:00			`fn skipWhitespace(self: *Scanner) void {`
			`while (true) {`
			`var c = self.peek();`

			`switch (c) {`
			`' ', '\r', '\t' => blk: {`
			`_ = self.advance();`
			`},`
			`'\n' => blk: {`
			`self.line += 1;`
			`_ = self.advance();`
			`},`
			`else => return,`
			`}`
			`}`
			`}`

add number and comment support 2019-06-04 20:24:07 +00:00			`/// Consume a number.`
			`/// Returns either an Integer or a Float token. Proper typing`
			`/// of the number (i32 i64 u32 u64 f32 f64) are for the parser.`
			`fn doNumber(self: *Scanner) Token {`
			`var ttype = TokenType.Integer;`

			`while (isDigit(self.peek())) {`
			`_ = self.advance();`
			`}`

			`// check if its a number like 12.34, where the '.' character`
			`// exists and the one next to it is a digit.`
			`if (self.peek() == '.' and isDigit(self.peekNext())) {`
			`ttype = TokenType.Float;`

			`_ = self.advance();`
			`while (isDigit(self.peek())) {`
			`_ = self.advance();`
			`}`
			`}`

			`return self.makeToken(ttype);`
			`}`

			`pub fn nextToken(self: *Scanner) !?Token {`
add nicer error handling, whitespace skipping 2019-06-04 18:18:52 +00:00			`self.skipWhitespace();`
add basic scanner logic 2019-06-04 18:06:57 +00:00			`self.start = self.current;`

			`if (self.isAtEnd()) return self.makeToken(TokenType.EOF);`

			`var c = self.advance();`
add number and comment support 2019-06-04 20:24:07 +00:00			`if (isDigit(c)) return self.doNumber();`
add basic scanner logic 2019-06-04 18:06:57 +00:00
add number and comment support 2019-06-04 20:24:07 +00:00			`var token: ?Token = switch (c) {`
add basic scanner logic 2019-06-04 18:06:57 +00:00			`'(' => self.makeToken(.LeftParen),`
			`')' => self.makeToken(.RightParen),`
			`'{' => self.makeToken(.LeftBrace),`
			`'}' => self.makeToken(.RightBrace),`
			`'[' => self.makeToken(.LeftSquare),`
			`']' => self.makeToken(.RightSquare),`
			`'.' => self.makeToken(.Dot),`
			`';' => self.makeToken(.Semicolon),`
			`',' => self.makeToken(.Comma),`
			`':' => self.makeToken(.Colon),`
			`'&' => self.makeToken(.Ampersand),`
			`'\|' => self.makeToken(.Pipe),`
			`'?' => self.makeToken(.QuestionMark),`
			`'$' => self.makeToken(.DollarSign),`

add number and comment support 2019-06-04 20:24:07 +00:00			`'-' => self.makeToken(.Minus),`
			`'+' => self.makeToken(.Plus),`
			`'*' => self.makeToken(.Star),`

add basic scanner logic 2019-06-04 18:06:57 +00:00			`'!' => self.makeMatchToken('=', .BangEqual, .Bang),`
			`'=' => self.makeMatchToken('=', .EqualEqual, .Equal),`

			`// there can be three tokens from a <`
			`// - <, which is LessThan`
			`// - <=, which is LessEqual`
			`// - <<, which is LeftDoubleChevron`
			`'<' => blk: {`
			`if (self.match('=')) {`
			`break :blk self.makeToken(.LessEqual);`
			`} else if (self.match('<')) {`
			`break :blk self.makeToken(.LeftDoubleChevron);`
			`} else {`
			`break :blk self.makeToken(.Less);`
			`}`
			`},`
			`'>' => self.makeMatchToken('=', .GreaterEqual, .Greater),`

add number and comment support 2019-06-04 20:24:07 +00:00			`'/' => blk: {`
			`if (self.peekNext() == '/') {`
			`while (self.peek() != '\n' and !self.isAtEnd()) {`
			`_ = self.advance();`
			`}`

			`break :blk null;`
			`} else {`
			`break :blk self.makeToken(.Slash);`
			`}`
			`},`

add basic scanner logic 2019-06-04 18:06:57 +00:00			`else => return ScannerError.Unexpected,`
			`};`
add runPrompt / runFile / run functions 2019-06-04 02:12:16 +00:00
add basic scanner logic 2019-06-04 18:06:57 +00:00			`return token;`
add runPrompt / runFile / run functions 2019-06-04 02:12:16 +00:00			`}`
			`};`