copy-paste vig's scanner into rayoko

This commit is contained in:
Luna 2019-09-18 14:30:23 -03:00
parent 05509d1181
commit 93e7d9db7b
3 changed files with 481 additions and 1 deletions

View file

@ -1,5 +1,7 @@
const std = @import("std");
const scanners = @import("scanners.zig");
pub const Result = enum {
Ok,
TokenizeError,
@ -8,6 +10,34 @@ pub const Result = enum {
};
pub fn run(allocator: *std.mem.Allocator, slice: []const u8) Result {
var scan = scanners.Scanner.init(allocator, slice);
//defer scan.deinit();
// do a full scan pass, then reset, then do it again (with parser)
while (true) {
var tok_opt = scan.nextToken() catch |err| {
std.debug.warn(
"error at '{}': {}\n",
scan.currentLexeme(),
err,
);
return Result.TokenizeError;
};
if (tok_opt) |tok| {
if (tok.typ == .EOF) break;
// TODO remove
std.debug.warn("{x}\n", tok);
}
}
// scan.reset();
//var parser = parsers.Parser.init(allocator, scan);
//defer parser.deinit();
return Result.Ok;
}
@ -28,8 +58,8 @@ pub fn main() anyerror!void {
_ = try file.read(slice);
//switch (try run(allocator, slice)) {
const result = run(allocator, slice);
//const result = try run(allocator, slice);
switch (result) {
.Ok => std.os.exit(0),

361
src/scanners.zig Normal file
View file

@ -0,0 +1,361 @@
const std = @import("std");
const tokens = @import("tokens.zig");
const Allocator = std.mem.Allocator;
const Token = tokens.Token;
const TokenType = tokens.TokenType;
pub const ScannerError = error{
Unexpected,
Unterminated,
};
fn isDigit(char: u8) bool {
return char >= '0' and char <= '9';
}
fn isAlpha(c: u8) bool {
return (c >= 'a' and c <= 'z') or
(c >= 'A' and c <= 'Z') or
c == '_';
}
fn isAlphaNumeric(char: u8) bool {
return isAlpha(char) or isDigit(char);
}
const keywords = [_][]const u8{
"break",
"const",
"continue",
"defer",
"else",
"enum",
"fn",
"for",
"go",
"goto",
"if",
"import",
"in",
"interface",
"match",
"module",
"mut",
"or",
"return",
"struct",
"type",
"true",
"false",
"None",
"println",
"loop",
"pub",
};
const keyword_ttypes = [_]TokenType{
.Break,
.Const,
.Continue,
.Defer,
.Else,
.Enum,
.Fn,
.For,
.Go,
.Goto,
.If,
.Import,
.In,
.Interface,
.Match,
.Module,
.Mut,
.Or,
.Return,
.Struct,
.Type,
.True,
.False,
.None,
.Println,
.Loop,
.Pub,
};
fn getKeyword(keyword: []const u8) ?TokenType {
for (keywords) |kw, idx| {
if (std.mem.eql(u8, keyword, kw)) {
return keyword_ttypes[idx];
}
}
return null;
}
/// Scanner for vlang tokens.
pub const Scanner = struct {
allocator: *Allocator,
source: []const u8,
start: usize = 0,
current: usize = 0,
line: usize = 1,
pub fn init(allocator: *Allocator, source: []const u8) Scanner {
return Scanner{ .allocator = allocator, .source = source };
}
fn isAtEnd(self: *Scanner) bool {
return self.current >= self.source.len;
}
fn advance(self: *Scanner) u8 {
self.current += 1;
return self.source[self.current - 1];
}
fn rollback(self: *Scanner) void {
self.current -= 1;
}
pub fn currentLexeme(self: *Scanner) []const u8 {
return self.source[self.start..self.current];
}
fn makeToken(self: *Scanner, ttype: TokenType) Token {
return Token{
.typ = ttype,
.lexeme = self.currentLexeme(),
.line = self.line,
};
}
fn makeTokenLexeme(
self: *Scanner,
ttype: TokenType,
lexeme: []const u8,
) Token {
return Token{
.typ = ttype,
.lexeme = lexeme,
.line = self.line,
};
}
/// Check if the next character matches what is expected.
fn match(self: *Scanner, expected: u8) bool {
if (self.isAtEnd()) return false;
if (self.source[self.current] != expected) return false;
self.current += 1;
return true;
}
/// Add a SimpleToken of type_match if the next character is
/// `expected`. Adds a SimpleToken of type_nomatch when it is not.
fn makeMatchToken(
self: *Scanner,
expected: u8,
type_match: TokenType,
type_nomatch: TokenType,
) Token {
if (self.match(expected)) {
return self.makeToken(type_match);
} else {
return self.makeToken(type_nomatch);
}
}
/// "triple" version of makeMatchToken.
/// Required per vlang's tokens.
fn makeTripleMatchToken(
self: *Scanner,
char1: u8,
ttype1: TokenType,
char2: u8,
ttype2: TokenType,
fallback: TokenType,
) Token {
if (self.match(char1)) {
return self.makeToken(ttype1);
} else if (self.match(char2)) {
return self.makeToken(ttype2);
} else {
return self.makeToken(fallback);
}
}
/// Peek at the current character in the scanner
fn peek(self: *Scanner) u8 {
if (self.isAtEnd()) return 0;
if (self.current == 0) return 0;
return self.source[self.current - 1];
}
/// Peek at the next character in the scanner
fn peekNext(self: *Scanner) u8 {
if (self.current + 1 > self.source.len) return 0;
return self.source[self.current];
}
/// Consume a number.
/// Returns either an Integer or a Float token. Proper typing
/// of the number (i32 i64 u32 u64 f32 f64) are for the parser.
fn doNumber(self: *Scanner) Token {
var ttype = TokenType.Integer;
while (isDigit(self.peekNext())) {
_ = self.advance();
}
// check if its a number like 12.34, where the '.' character
// exists and the one next to it is a digit.
if (self.peek() == '.' and isDigit(self.peekNext())) {
ttype = TokenType.Float;
_ = self.advance();
while (isDigit(self.peek())) {
_ = self.advance();
}
}
return self.makeToken(ttype);
}
/// Consume a string. stop_char is used to determine
/// if the string is a single quote or double quote string
fn doString(self: *Scanner, stop_char: u8) !Token {
// consume entire string
while (self.peekNext() != stop_char and !self.isAtEnd()) {
if (self.peek() == '\n') self.line += 1;
_ = self.advance();
}
// unterminated string.
if (self.isAtEnd()) {
return ScannerError.Unterminated;
}
// the closing character of the string
_ = self.advance();
// remove the starting and ending chars of the string
const lexeme = self.currentLexeme();
return self.makeTokenLexeme(
.String,
lexeme[1 .. lexeme.len - 1],
);
}
/// Either a keyword or an identifier come out of this.
fn doIdentifier(self: *Scanner) Token {
while (isAlphaNumeric(self.peek())) {
_ = self.advance();
}
// ugly hack.
self.rollback();
// after reading the identifier, we check
// if it is any of our keywords, if it is, then we add
// the specificed keyword type. if not, just .IDENTIFIER
var toktype: TokenType = undefined;
var ttype_opt = getKeyword(self.currentLexeme());
if (ttype_opt) |ttype| {
toktype = ttype;
} else {
toktype = TokenType.Identifier;
}
return self.makeToken(toktype);
}
pub fn nextToken(self: *Scanner) !?Token {
self.start = self.current;
if (self.isAtEnd()) return self.makeToken(TokenType.EOF);
var c = self.advance();
if (isDigit(c)) return self.doNumber();
if (isAlpha(c)) return self.doIdentifier();
var token: ?Token = switch (c) {
'(' => self.makeToken(.LeftParen),
')' => self.makeToken(.RightParen),
'{' => self.makeToken(.LeftBrace),
'}' => self.makeToken(.RightBrace),
'[' => self.makeToken(.LeftSquare),
']' => self.makeToken(.RightSquare),
'.' => self.makeToken(.Dot),
';' => self.makeToken(.Semicolon),
',' => self.makeToken(.Comma),
'?' => self.makeToken(.QuestionMark),
'$' => self.makeToken(.DollarSign),
'%' => self.makeToken(.Modulo),
':' => self.makeMatchToken('=', .ColonEqual, .Colon),
'*' => self.makeMatchToken('=', .StarEqual, .Star),
'-' => self.makeMatchToken('=', .MinusEqual, .Minus),
// we use the existing .And and .Or tokens
// representing the and and or keywords to
// also have || and &&
'&' => self.makeMatchToken('&', .And, .Address),
'|' => self.makeMatchToken('|', .Or, .Pipe),
'!' => self.makeMatchToken('=', .BangEqual, .Bang),
'=' => self.makeMatchToken('=', .EqualEqual, .Equal),
'>' => self.makeMatchToken('=', .GreaterEqual, .Greater),
'+' => self.makeTripleMatchToken('+', .PlusPlus, '=', .PlusEqual, .Plus),
'<' => self.makeTripleMatchToken('=', .LessEqual, '<', .LeftDoubleChevron, .Less),
'/' => blk: {
var next = self.peekNext();
switch (next) {
'=' => {
self.current += 1;
return self.makeToken(.SlashEqual);
},
'/' => blk2: {
while (self.peek() != '\n' and !self.isAtEnd()) {
_ = self.advance();
}
return null;
},
'*' => blk2: {
while (self.peek() != '*' or self.peekNext() != '/') {
_ = self.advance();
}
// consume the ending slash
_ = self.advance();
return null;
},
else => break :blk self.makeToken(.Slash),
}
},
'\'' => try self.doString('\''),
'"' => try self.doString('"'),
' ', '\r', '\t' => null,
'\n' => blk: {
self.line += 1;
break :blk null;
},
else => return ScannerError.Unexpected,
};
return token;
}
};

89
src/tokens.zig Normal file
View file

@ -0,0 +1,89 @@
pub const TokenType = enum {
// basic tokens
LeftParen,
RightParen,
LeftBrace,
RightBrace,
LeftSquare,
RightSquare,
Dot,
Equal,
Semicolon,
Comma,
Colon,
Address,
Pipe,
QuestionMark,
DollarSign,
// math operators
Plus,
Minus,
Star,
Slash,
Modulo,
// one-two char tokens
DotEqual,
LeftDoubleChevron, // AKA "<<"
PlusPlus,
PlusEqual,
MinusEqual,
ColonEqual,
StarEqual,
SlashEqual,
// comparison ones
EqualEqual,
Less,
LessEqual,
Greater,
GreaterEqual,
Bang,
BangEqual,
// complex types
Integer,
Float,
String,
Identifier,
// keywords
Break,
Const,
Continue,
Defer,
Else,
Enum,
Fn,
For,
Loop,
Go,
Goto,
If,
Import,
In,
Interface,
Match,
Module,
Mut,
Or,
And,
Return,
Struct,
Type,
True,
False,
None,
Println,
Pub,
EOF,
};
pub const Token = struct {
typ: TokenType,
lexeme: []const u8,
line: usize,
};