vig/src/scanner.zig

265 lines
6.5 KiB
Zig
Raw Normal View History

const std = @import("std");
2019-06-04 18:06:57 +00:00
const tokens = @import("tokens.zig");
const Allocator = std.mem.Allocator;
2019-06-04 18:06:57 +00:00
const Token = tokens.Token;
const TokenType = tokens.TokenType;
2019-06-04 18:06:57 +00:00
pub const ScannerError = error{
Unexpected,
Unterminated,
};
fn isDigit(char: u8) bool {
return char >= '0' and char <= '9';
}
fn isAlpha(c: u8) bool {
return (c >= 'a' and c <= 'z') or
(c >= 'A' and c <= 'Z') or
c == '_';
}
fn isAlphaNumeric(char: u8) bool {
return isAlpha(char) or isDigit(char);
}
2019-06-04 18:06:57 +00:00
const keywords = [][]const u8{
"break",
"const",
"continue",
"defer",
"else",
"enum",
"fn",
"for",
"go",
"goto",
"if",
"import",
"in",
"interface",
"match",
"module",
"mut",
"or",
"return",
"struct",
"type",
};
const keyword_ttypes = []TokenType{
.Break,
.Const,
.Continue,
.Defer,
.Else,
.Enum,
.Fn,
.For,
.Go,
.Goto,
.If,
.Import,
.In,
.Interface,
.Match,
.Module,
.Mut,
.Or,
.Return,
.Struct,
.Type,
};
fn getKeyword(keyword: []const u8) ?TokenType {
for (keywords) |kw, idx| {
if (std.mem.eql(u8, keyword, kw)) {
return keyword_ttypes[idx];
}
}
return null;
}
/// Scanner for vlang tokens.
pub const Scanner = struct {
allocator: *Allocator,
2019-06-04 18:06:57 +00:00
source: []u8,
start: usize = 0,
current: usize = 0,
line: usize = 1,
pub fn init(allocator: *Allocator, source: []u8) Scanner {
return Scanner{ .allocator = allocator, .source = source };
}
fn isAtEnd(self: *Scanner) bool {
return self.current >= self.source.len;
}
fn advance(self: *Scanner) u8 {
self.current += 1;
return self.source[self.current - 1];
}
pub fn currentLexeme(self: *Scanner) []const u8 {
2019-06-04 18:06:57 +00:00
return self.source[self.start..self.current];
}
fn makeToken(self: *Scanner, ttype: TokenType) Token {
return Token{
.ttype = ttype,
.lexeme = self.currentLexeme(),
.line = self.line,
};
}
fn makeTokenAdvance(self: *Scanner, ttype: TokenType) Token {
var tok = self.makeToken(ttype);
self.current += 1;
return tok;
}
/// Check if the next character matches what is expected.
fn match(self: *Scanner, expected: u8) bool {
if (self.isAtEnd()) return false;
if (self.source[self.current] != expected) return false;
self.current += 1;
return true;
}
/// Add a SimpleToken of type_match if the next character is
/// `expected`. Adds a SimpleToken of type_nomatch when it is not.
fn makeMatchToken(
self: *Scanner,
expected: u8,
type_match: TokenType,
type_nomatch: TokenType,
) Token {
if (self.match(expected)) {
return self.makeToken(type_match);
} else {
return self.makeToken(type_nomatch);
}
}
fn peek(self: *Scanner) u8 {
if (self.isAtEnd()) return 0;
return self.source[self.current];
}
fn peekNext(self: *Scanner) u8 {
if (self.current + 1 >= self.source.len) return 0;
return self.source[self.current + 1];
}
fn skipWhitespace(self: *Scanner) void {
while (true) {
var c = self.peek();
switch (c) {
' ', '\r', '\t' => blk: {
_ = self.advance();
},
'\n' => blk: {
self.line += 1;
_ = self.advance();
},
else => return,
}
}
}
2019-06-04 20:24:07 +00:00
/// Consume a number.
/// Returns either an Integer or a Float token. Proper typing
/// of the number (i32 i64 u32 u64 f32 f64) are for the parser.
fn doNumber(self: *Scanner) Token {
var ttype = TokenType.Integer;
while (isDigit(self.peek())) {
_ = self.advance();
}
// check if its a number like 12.34, where the '.' character
// exists and the one next to it is a digit.
if (self.peek() == '.' and isDigit(self.peekNext())) {
ttype = TokenType.Float;
_ = self.advance();
while (isDigit(self.peek())) {
_ = self.advance();
}
}
return self.makeToken(ttype);
}
pub fn nextToken(self: *Scanner) !?Token {
self.skipWhitespace();
2019-06-04 18:06:57 +00:00
self.start = self.current;
if (self.isAtEnd()) return self.makeToken(TokenType.EOF);
var c = self.advance();
2019-06-04 20:24:07 +00:00
if (isDigit(c)) return self.doNumber();
2019-06-04 18:06:57 +00:00
2019-06-04 20:24:07 +00:00
var token: ?Token = switch (c) {
2019-06-04 18:06:57 +00:00
'(' => self.makeToken(.LeftParen),
')' => self.makeToken(.RightParen),
'{' => self.makeToken(.LeftBrace),
'}' => self.makeToken(.RightBrace),
'[' => self.makeToken(.LeftSquare),
']' => self.makeToken(.RightSquare),
'.' => self.makeToken(.Dot),
';' => self.makeToken(.Semicolon),
',' => self.makeToken(.Comma),
':' => self.makeToken(.Colon),
'&' => self.makeToken(.Ampersand),
'|' => self.makeToken(.Pipe),
'?' => self.makeToken(.QuestionMark),
'$' => self.makeToken(.DollarSign),
2019-06-04 20:24:07 +00:00
'-' => self.makeToken(.Minus),
'+' => self.makeToken(.Plus),
'*' => self.makeToken(.Star),
2019-06-04 18:06:57 +00:00
'!' => self.makeMatchToken('=', .BangEqual, .Bang),
'=' => self.makeMatchToken('=', .EqualEqual, .Equal),
// there can be three tokens from a <
// - <, which is LessThan
// - <=, which is LessEqual
// - <<, which is LeftDoubleChevron
'<' => blk: {
if (self.match('=')) {
break :blk self.makeToken(.LessEqual);
} else if (self.match('<')) {
break :blk self.makeToken(.LeftDoubleChevron);
} else {
break :blk self.makeToken(.Less);
}
},
'>' => self.makeMatchToken('=', .GreaterEqual, .Greater),
2019-06-04 20:24:07 +00:00
'/' => blk: {
if (self.peekNext() == '/') {
while (self.peek() != '\n' and !self.isAtEnd()) {
_ = self.advance();
}
break :blk null;
} else {
break :blk self.makeToken(.Slash);
}
},
2019-06-04 18:06:57 +00:00
else => return ScannerError.Unexpected,
};
2019-06-04 18:06:57 +00:00
return token;
}
};