vig/src/scanner.zig

338 lines
8.7 KiB
Zig
Raw Normal View History

const std = @import("std");
2019-06-04 18:06:57 +00:00
const tokens = @import("tokens.zig");
const Allocator = std.mem.Allocator;
2019-06-04 18:06:57 +00:00
const Token = tokens.Token;
const TokenType = tokens.TokenType;
2019-06-04 18:06:57 +00:00
pub const ScannerError = error{
Unexpected,
Unterminated,
};
fn isDigit(char: u8) bool {
return char >= '0' and char <= '9';
}
fn isAlpha(c: u8) bool {
return (c >= 'a' and c <= 'z') or
(c >= 'A' and c <= 'Z') or
c == '_';
}
fn isAlphaNumeric(char: u8) bool {
return isAlpha(char) or isDigit(char);
}
2019-06-30 02:50:43 +00:00
const keywords = [_][]const u8{
2019-06-04 18:06:57 +00:00
"break",
"const",
"continue",
"defer",
"else",
"enum",
"fn",
"for",
"go",
"goto",
"if",
"import",
"in",
"interface",
"match",
"module",
"mut",
"or",
"return",
"struct",
"type",
2019-06-06 01:06:12 +00:00
"true",
"false",
"None",
2019-06-04 18:06:57 +00:00
};
2019-06-30 02:50:43 +00:00
const keyword_ttypes = [_]TokenType{
2019-06-04 18:06:57 +00:00
.Break,
.Const,
.Continue,
.Defer,
.Else,
.Enum,
.Fn,
.For,
.Go,
.Goto,
.If,
.Import,
.In,
.Interface,
.Match,
.Module,
.Mut,
.Or,
.Return,
.Struct,
.Type,
2019-06-06 01:06:12 +00:00
.True,
.False,
.None,
2019-06-04 18:06:57 +00:00
};
fn getKeyword(keyword: []const u8) ?TokenType {
for (keywords) |kw, idx| {
if (std.mem.eql(u8, keyword, kw)) {
return keyword_ttypes[idx];
}
}
return null;
}
/// Scanner for vlang tokens.
pub const Scanner = struct {
allocator: *Allocator,
2019-06-04 18:06:57 +00:00
source: []u8,
start: usize = 0,
current: usize = 0,
line: usize = 1,
pub fn init(allocator: *Allocator, source: []u8) Scanner {
return Scanner{ .allocator = allocator, .source = source };
}
fn isAtEnd(self: *Scanner) bool {
return self.current >= self.source.len;
}
fn advance(self: *Scanner) u8 {
self.current += 1;
return self.source[self.current - 1];
}
pub fn currentLexeme(self: *Scanner) []const u8 {
2019-06-04 18:06:57 +00:00
return self.source[self.start..self.current];
}
fn makeToken(self: *Scanner, ttype: TokenType) Token {
return Token{
.ttype = ttype,
.lexeme = self.currentLexeme(),
.line = self.line,
};
}
2019-06-04 20:52:37 +00:00
fn makeTokenLexeme(
self: *Scanner,
ttype: TokenType,
lexeme: []const u8,
) Token {
return Token{
.ttype = ttype,
.lexeme = lexeme,
.line = self.line,
};
2019-06-04 18:06:57 +00:00
}
/// Check if the next character matches what is expected.
fn match(self: *Scanner, expected: u8) bool {
if (self.isAtEnd()) return false;
if (self.source[self.current] != expected) return false;
self.current += 1;
return true;
}
/// Add a SimpleToken of type_match if the next character is
/// `expected`. Adds a SimpleToken of type_nomatch when it is not.
fn makeMatchToken(
self: *Scanner,
expected: u8,
type_match: TokenType,
type_nomatch: TokenType,
) Token {
if (self.match(expected)) {
return self.makeToken(type_match);
} else {
return self.makeToken(type_nomatch);
}
}
/// "triple" version of makeMatchToken.
/// Required per vlang's tokens.
fn makeTripleMatchToken(
self: *Scanner,
char1: u8,
ttype1: TokenType,
char2: u8,
ttype2: TokenType,
fallback: TokenType,
) Token {
if (self.match(char1)) {
return self.makeToken(ttype1);
} else if (self.match(char2)) {
return self.makeToken(ttype2);
} else {
return self.makeToken(fallback);
}
}
/// Peek at the current character in the scanner
2019-06-04 18:06:57 +00:00
fn peek(self: *Scanner) u8 {
if (self.isAtEnd()) return 0;
2019-06-04 20:28:48 +00:00
if (self.current == 0) return 0;
return self.source[self.current - 1];
2019-06-04 18:06:57 +00:00
}
/// Peek at the next character in the scanner
2019-06-04 18:06:57 +00:00
fn peekNext(self: *Scanner) u8 {
2019-06-06 04:27:59 +00:00
if (self.current + 1 > self.source.len) return 0;
2019-06-04 20:28:48 +00:00
return self.source[self.current];
2019-06-04 18:06:57 +00:00
}
2019-06-04 20:24:07 +00:00
/// Consume a number.
/// Returns either an Integer or a Float token. Proper typing
/// of the number (i32 i64 u32 u64 f32 f64) are for the parser.
fn doNumber(self: *Scanner) Token {
var ttype = TokenType.Integer;
2019-06-06 04:27:59 +00:00
while (isDigit(self.peekNext())) {
2019-06-04 20:24:07 +00:00
_ = self.advance();
}
// check if its a number like 12.34, where the '.' character
// exists and the one next to it is a digit.
if (self.peek() == '.' and isDigit(self.peekNext())) {
ttype = TokenType.Float;
_ = self.advance();
while (isDigit(self.peek())) {
_ = self.advance();
}
}
return self.makeToken(ttype);
}
2019-06-04 20:52:37 +00:00
/// Consume a string. stop_char is used to determine
/// if the string is a single quote or double quote string
fn doString(self: *Scanner, stop_char: u8) !Token {
// consume entire string
while (self.peekNext() != stop_char and !self.isAtEnd()) {
if (self.peek() == '\n') self.line += 1;
_ = self.advance();
}
// unterminated string.
if (self.isAtEnd()) {
return ScannerError.Unterminated;
}
// the closing character of the string
_ = self.advance();
// remove the starting and ending chars of the string
const lexeme = self.currentLexeme();
return self.makeTokenLexeme(
.String,
lexeme[1 .. lexeme.len - 1],
);
}
/// Either a keyword or an identifier come out of this.
fn doIdentifier(self: *Scanner) Token {
while (isAlphaNumeric(self.peek())) {
_ = self.advance();
}
// after reading the identifier, we check
// if it is any of our keywords, if it is, then we add
// the specificed keyword type. if not, just .IDENTIFIER
var toktype: TokenType = undefined;
var ttype_opt = getKeyword(self.currentLexeme());
if (ttype_opt) |ttype| {
toktype = ttype;
2019-06-04 21:07:46 +00:00
} else {
toktype = TokenType.Identifier;
2019-06-04 21:07:46 +00:00
}
return self.makeToken(toktype);
2019-06-04 21:07:46 +00:00
}
2019-06-04 20:24:07 +00:00
pub fn nextToken(self: *Scanner) !?Token {
2019-06-04 18:06:57 +00:00
self.start = self.current;
if (self.isAtEnd()) return self.makeToken(TokenType.EOF);
var c = self.advance();
2019-06-04 20:24:07 +00:00
if (isDigit(c)) return self.doNumber();
if (isAlpha(c)) return self.doIdentifier();
2019-06-04 18:06:57 +00:00
2019-06-04 20:24:07 +00:00
var token: ?Token = switch (c) {
2019-06-04 18:06:57 +00:00
'(' => self.makeToken(.LeftParen),
')' => self.makeToken(.RightParen),
'{' => self.makeToken(.LeftBrace),
'}' => self.makeToken(.RightBrace),
'[' => self.makeToken(.LeftSquare),
']' => self.makeToken(.RightSquare),
'.' => self.makeToken(.Dot),
';' => self.makeToken(.Semicolon),
',' => self.makeToken(.Comma),
':' => self.makeToken(.Colon),
'&' => self.makeToken(.Ampersand),
'|' => self.makeToken(.Pipe),
'?' => self.makeToken(.QuestionMark),
'$' => self.makeToken(.DollarSign),
2019-06-04 20:24:07 +00:00
'-' => self.makeToken(.Minus),
'*' => self.makeToken(.Star),
2019-06-04 21:07:46 +00:00
'%' => self.makeToken(.Modulo),
2019-06-04 20:24:07 +00:00
2019-06-04 18:06:57 +00:00
'!' => self.makeMatchToken('=', .BangEqual, .Bang),
'=' => self.makeMatchToken('=', .EqualEqual, .Equal),
'>' => self.makeMatchToken('=', .GreaterEqual, .Greater),
2019-06-04 21:07:46 +00:00
'+' => self.makeTripleMatchToken('+', .PlusPlus, '=', .PlusEqual, .Plus),
'<' => self.makeTripleMatchToken('=', .LessEqual, '<', .LeftDoubleChevron, .Less),
2019-06-04 18:06:57 +00:00
2019-06-04 20:24:07 +00:00
'/' => blk: {
2019-06-04 20:40:13 +00:00
var next = self.peekNext();
switch (next) {
'/' => blk2: {
while (self.peek() != '\n' and !self.isAtEnd()) {
_ = self.advance();
}
return null;
},
'*' => blk2: {
while (self.peek() != '*' or self.peekNext() != '/') {
_ = self.advance();
}
// consume the ending slash
2019-06-04 20:24:07 +00:00
_ = self.advance();
2019-06-04 20:40:13 +00:00
return null;
},
2019-06-04 20:24:07 +00:00
2019-06-04 20:40:13 +00:00
else => break :blk self.makeToken(.Slash),
2019-06-04 20:24:07 +00:00
}
},
2019-06-04 20:52:37 +00:00
'\'' => try self.doString('\''),
'"' => try self.doString('"'),
2019-06-06 04:27:59 +00:00
' ', '\r', '\t' => null,
'\n' => blk: {
self.line += 1;
break :blk null;
},
2019-06-04 18:06:57 +00:00
else => return ScannerError.Unexpected,
};
2019-06-04 18:06:57 +00:00
return token;
}
};