add basic scanner logic

This commit is contained in:
Luna 2019-06-04 15:06:57 -03:00
parent 3d243eefff
commit 59083198c6
3 changed files with 197 additions and 7 deletions

View file

@ -1,5 +1,7 @@
const std = @import("std");
const scanners = @import("scanner.zig");
const Allocator = std.mem.Allocator;
pub const Result = error{
@ -10,6 +12,15 @@ pub const Result = error{
fn run(allocator: *Allocator, data: []u8) !void {
var stdout_file = try std.io.getStdOut();
const stdout = &stdout_file.outStream().stream;
var scanner = scanners.Scanner.init(allocator, data);
while (true) {
var tok = try scanner.nextToken();
if (tok.ttype == .EOF) break;
try stdout.print("{x}\n", tok);
}
return Result.Ok;
}

View file

@ -1,9 +1,11 @@
const std = @import("std");
const tokens = @import("tokens.std");
const tokens = @import("tokens.zig");
const Allocator = std.mem.Allocator;
const Token = tokens.Token;
const TokenType = tokens.TokenType;
pub const TokenError = error{
pub const ScannerError = error{
Unexpected,
Unterminated,
};
@ -22,10 +24,182 @@ fn isAlphaNumeric(char: u8) bool {
return isAlpha(char) or isDigit(char);
}
const keywords = [][]const u8{
"break",
"const",
"continue",
"defer",
"else",
"enum",
"fn",
"for",
"go",
"goto",
"if",
"import",
"in",
"interface",
"match",
"module",
"mut",
"or",
"return",
"struct",
"type",
};
const keyword_ttypes = []TokenType{
.Break,
.Const,
.Continue,
.Defer,
.Else,
.Enum,
.Fn,
.For,
.Go,
.Goto,
.If,
.Import,
.In,
.Interface,
.Match,
.Module,
.Mut,
.Or,
.Return,
.Struct,
.Type,
};
fn getKeyword(keyword: []const u8) ?TokenType {
for (keywords) |kw, idx| {
if (std.mem.eql(u8, keyword, kw)) {
return keyword_ttypes[idx];
}
}
return null;
}
/// Scanner for vlang tokens.
pub const Scanner = struct {
allocator: *Allocator,
source: []u8,
pub fn init(allocator: *Allocator) Scanner {
return Scanner{ .allocator = allocator };
start: usize = 0,
current: usize = 0,
line: usize = 1,
pub fn init(allocator: *Allocator, source: []u8) Scanner {
return Scanner{ .allocator = allocator, .source = source };
}
fn isAtEnd(self: *Scanner) bool {
return self.current >= self.source.len;
}
fn advance(self: *Scanner) u8 {
self.current += 1;
return self.source[self.current - 1];
}
fn currentLexeme(self: *Scanner) []const u8 {
return self.source[self.start..self.current];
}
fn makeToken(self: *Scanner, ttype: TokenType) Token {
return Token{
.ttype = ttype,
.lexeme = self.currentLexeme(),
.line = self.line,
};
}
fn makeTokenAdvance(self: *Scanner, ttype: TokenType) Token {
var tok = self.makeToken(ttype);
self.current += 1;
return tok;
}
/// Check if the next character matches what is expected.
fn match(self: *Scanner, expected: u8) bool {
if (self.isAtEnd()) return false;
if (self.source[self.current] != expected) return false;
self.current += 1;
return true;
}
/// Add a SimpleToken of type_match if the next character is
/// `expected`. Adds a SimpleToken of type_nomatch when it is not.
fn makeMatchToken(
self: *Scanner,
expected: u8,
type_match: TokenType,
type_nomatch: TokenType,
) Token {
if (self.match(expected)) {
return self.makeToken(type_match);
} else {
return self.makeToken(type_nomatch);
}
}
fn peek(self: *Scanner) u8 {
if (self.isAtEnd()) return 0;
return self.source[self.current];
}
fn peekNext(self: *Scanner) u8 {
if (self.current + 1 >= self.source.len) return 0;
return self.source[self.current + 1];
}
pub fn nextToken(self: *Scanner) !Token {
self.start = self.current;
if (self.isAtEnd()) return self.makeToken(TokenType.EOF);
var c = self.advance();
var token = switch (c) {
'(' => self.makeToken(.LeftParen),
')' => self.makeToken(.RightParen),
'{' => self.makeToken(.LeftBrace),
'}' => self.makeToken(.RightBrace),
'[' => self.makeToken(.LeftSquare),
']' => self.makeToken(.RightSquare),
'.' => self.makeToken(.Dot),
';' => self.makeToken(.Semicolon),
',' => self.makeToken(.Comma),
':' => self.makeToken(.Colon),
'&' => self.makeToken(.Ampersand),
'|' => self.makeToken(.Pipe),
'?' => self.makeToken(.QuestionMark),
'$' => self.makeToken(.DollarSign),
'!' => self.makeMatchToken('=', .BangEqual, .Bang),
'=' => self.makeMatchToken('=', .EqualEqual, .Equal),
// there can be three tokens from a <
// - <, which is LessThan
// - <=, which is LessEqual
// - <<, which is LeftDoubleChevron
'<' => blk: {
if (self.match('=')) {
break :blk self.makeToken(.LessEqual);
} else if (self.match('<')) {
break :blk self.makeToken(.LeftDoubleChevron);
} else {
break :blk self.makeToken(.Less);
}
},
'>' => self.makeMatchToken('=', .GreaterEqual, .Greater),
else => return ScannerError.Unexpected,
};
return token;
}
};

View file

@ -30,9 +30,11 @@ pub const TokenType = enum {
PlusEqual,
MinusEqual,
LessThan,
// comparison ones
EqualEqual,
Less,
LessEqual,
GreaterThan,
Greater,
GreaterEqual,
Bang,
BangEqual,
@ -65,9 +67,12 @@ pub const TokenType = enum {
Return,
Struct,
Type,
EOF,
};
pub const Token = struct {
ttype: TokenType,
lexeme: []u8,
lexeme: []const u8,
line: usize,
};