forked from luna/jorts
move new_scanner.zig to scanner.zig
This commit is contained in:
parent
71dba5c77d
commit
44c27f43b7
3 changed files with 88 additions and 420 deletions
|
@ -1,5 +1,5 @@
|
|||
const std = @import("std");
|
||||
const scanner = @import("new_scanner.zig");
|
||||
const scanner = @import("scanner.zig");
|
||||
const vm = @import("vm.zig");
|
||||
const chunks = @import("chunk.zig");
|
||||
const tokens = @import("token.zig");
|
||||
|
|
|
@ -1,276 +0,0 @@
|
|||
const std = @import("std");
|
||||
const tokens = @import("token.zig");
|
||||
|
||||
const Token = tokens.Token;
|
||||
const TokenType = tokens.TokenType;
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub const TokenError = error{
|
||||
Unexpected,
|
||||
Unterminated,
|
||||
};
|
||||
|
||||
fn isDigit(char: u8) bool {
|
||||
return char >= '0' and char <= '9';
|
||||
}
|
||||
|
||||
fn isAlpha(c: u8) bool {
|
||||
return (c >= 'a' and c <= 'z') or
|
||||
(c >= 'A' and c <= 'Z') or
|
||||
c == '_';
|
||||
}
|
||||
|
||||
fn isAlphaNumeric(char: u8) bool {
|
||||
return isAlpha(char) or isDigit(char);
|
||||
}
|
||||
|
||||
pub const KeywordMap = std.AutoHashMap([]const u8, u6);
|
||||
|
||||
/// The book does say that C doesn't have hashmaps. but Zig does. and I can
|
||||
/// use it here.
|
||||
fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap {
|
||||
var map = KeywordMap.init(allocator);
|
||||
|
||||
const keywords = [][]const u8{
|
||||
"and"[0..],
|
||||
"class"[0..],
|
||||
"else"[0..],
|
||||
"false"[0..],
|
||||
"for"[0..],
|
||||
"fun"[0..],
|
||||
"if"[0..],
|
||||
"nil"[0..],
|
||||
"or"[0..],
|
||||
"print"[0..],
|
||||
"return"[0..],
|
||||
"super"[0..],
|
||||
"this"[0..],
|
||||
"true"[0..],
|
||||
"var"[0..],
|
||||
"while"[0..],
|
||||
};
|
||||
|
||||
const tags = []TokenType{
|
||||
TokenType.AND,
|
||||
TokenType.CLASS,
|
||||
TokenType.ELSE,
|
||||
TokenType.FALSE,
|
||||
TokenType.FOR,
|
||||
TokenType.FUN,
|
||||
TokenType.IF,
|
||||
TokenType.NIL,
|
||||
TokenType.OR,
|
||||
TokenType.PRINT,
|
||||
TokenType.RETURN,
|
||||
TokenType.SUPER,
|
||||
TokenType.THIS,
|
||||
TokenType.TRUE,
|
||||
TokenType.VAR,
|
||||
TokenType.WHILE,
|
||||
};
|
||||
|
||||
for (keywords) |keyword, idx| {
|
||||
var tag = @enumToInt(tags[idx]);
|
||||
_ = try map.put(keyword, tag);
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
pub const Scanner = struct {
|
||||
source: []const u8,
|
||||
keywords: KeywordMap,
|
||||
|
||||
start: usize = 0,
|
||||
current: usize = 0,
|
||||
line: usize = 1,
|
||||
|
||||
allocator: *Allocator,
|
||||
|
||||
pub fn init(allocator: *Allocator, data: []const u8) !Scanner {
|
||||
return Scanner{
|
||||
.source = data,
|
||||
.keywords = try initKeywordMap(allocator),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
fn isAtEnd(self: *Scanner) bool {
|
||||
return self.current >= self.source.len;
|
||||
}
|
||||
|
||||
fn advance(self: *Scanner) u8 {
|
||||
self.current += 1;
|
||||
return self.source[self.current - 1];
|
||||
}
|
||||
|
||||
pub fn currentLexeme(self: *Scanner) []const u8 {
|
||||
return self.source[self.start..self.current];
|
||||
}
|
||||
|
||||
fn makeToken(self: *Scanner, ttype: TokenType) Token {
|
||||
return Token{
|
||||
.ttype = ttype,
|
||||
.lexeme = self.currentLexeme(),
|
||||
.line = self.line,
|
||||
};
|
||||
}
|
||||
|
||||
/// Check if the next character matches what is expected.
|
||||
fn match(self: *Scanner, expected: u8) bool {
|
||||
if (self.isAtEnd()) return false;
|
||||
if (self.source[self.current] != expected) return false;
|
||||
|
||||
self.current += 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Add a SimpleToken of type_match if the next character is
|
||||
/// `expected`. Adds a SimpleToken of type_nomatch when it is not.
|
||||
fn makeMatchToken(
|
||||
self: *Scanner,
|
||||
expected: u8,
|
||||
type_match: TokenType,
|
||||
type_nomatch: TokenType,
|
||||
) Token {
|
||||
if (self.match(expected)) {
|
||||
return self.makeToken(type_match);
|
||||
} else {
|
||||
return self.makeToken(type_nomatch);
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(self: *Scanner) u8 {
|
||||
if (self.isAtEnd()) return 0;
|
||||
return self.source[self.current];
|
||||
}
|
||||
|
||||
fn peekNext(self: *Scanner) u8 {
|
||||
if (self.current + 1 >= self.source.len) return 0;
|
||||
return self.source[self.current + 1];
|
||||
}
|
||||
|
||||
fn skipWhitespace(self: *Scanner) void {
|
||||
while (true) {
|
||||
var c = self.peek();
|
||||
switch (c) {
|
||||
' ', '\r', '\t' => blk: {
|
||||
_ = self.advance();
|
||||
},
|
||||
'\n' => blk: {
|
||||
self.line += 1;
|
||||
_ = self.advance();
|
||||
},
|
||||
else => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn doString(self: *Scanner) !Token {
|
||||
// consume entire string
|
||||
while (self.peek() != '"' and !self.isAtEnd()) {
|
||||
if (self.peek() == '\n') self.line += 1;
|
||||
_ = self.advance();
|
||||
}
|
||||
|
||||
// unterminated string.
|
||||
if (self.isAtEnd()) {
|
||||
return TokenError.Unterminated;
|
||||
}
|
||||
|
||||
// the closing ".
|
||||
_ = self.advance();
|
||||
|
||||
// trim the surrounding quotes.
|
||||
return self.makeToken(.STRING);
|
||||
}
|
||||
|
||||
/// Consume a number
|
||||
fn doNumber(self: *Scanner) Token {
|
||||
while (isDigit(self.peek())) {
|
||||
_ = self.advance();
|
||||
}
|
||||
|
||||
// check if its a number like 12.34, where the '.' character
|
||||
// exists and the one next to it is a digit.
|
||||
if (self.peek() == '.' and isDigit(self.peekNext())) {
|
||||
_ = self.advance();
|
||||
|
||||
while (isDigit(self.peek())) {
|
||||
_ = self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
return self.makeToken(.NUMBER);
|
||||
}
|
||||
|
||||
/// Either a keyword or an identifier come out of this.
|
||||
fn doIdentifier(self: *Scanner) Token {
|
||||
while (isAlphaNumeric(self.peek())) {
|
||||
_ = self.advance();
|
||||
}
|
||||
|
||||
// after reading the identifier, we check
|
||||
// if it is any of our keywords, if it is, then we add
|
||||
// the specificed keyword type. if not, just .IDENTIFIER
|
||||
var text = self.source[self.start..self.current];
|
||||
var type_opt = self.keywords.get(text);
|
||||
var toktype: TokenType = undefined;
|
||||
|
||||
if (type_opt) |kv| {
|
||||
toktype = @intToEnum(TokenType, kv.value);
|
||||
} else {
|
||||
toktype = TokenType.IDENTIFIER;
|
||||
}
|
||||
|
||||
return self.makeToken(toktype);
|
||||
}
|
||||
|
||||
pub fn scanToken(self: *Scanner) !?Token {
|
||||
self.skipWhitespace();
|
||||
self.start = self.current;
|
||||
|
||||
if (self.isAtEnd()) return self.makeToken(TokenType.EOF);
|
||||
|
||||
var c = self.advance();
|
||||
if (isAlpha(c)) return self.doIdentifier();
|
||||
if (isDigit(c)) return self.doNumber();
|
||||
|
||||
var token = switch (c) {
|
||||
'(' => self.makeToken(.LEFT_PAREN),
|
||||
')' => self.makeToken(.RIGHT_PAREN),
|
||||
'{' => self.makeToken(.LEFT_BRACE),
|
||||
'}' => self.makeToken(.RIGHT_BRACE),
|
||||
',' => self.makeToken(.COMMA),
|
||||
'.' => self.makeToken(.DOT),
|
||||
'-' => self.makeToken(.MINUS),
|
||||
'+' => self.makeToken(.PLUS),
|
||||
';' => self.makeToken(.SEMICOLON),
|
||||
'*' => self.makeToken(.STAR),
|
||||
|
||||
'!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG),
|
||||
'=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL),
|
||||
'<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS),
|
||||
'>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER),
|
||||
|
||||
'/' => blk: {
|
||||
if (self.peekNext() == '/') {
|
||||
while (self.peek() != '\n' and !self.isAtEnd()) {
|
||||
_ = self.advance();
|
||||
}
|
||||
|
||||
break :blk null;
|
||||
} else {
|
||||
break :blk self.makeToken(.SLASH);
|
||||
}
|
||||
},
|
||||
|
||||
'"' => try self.doString(),
|
||||
|
||||
else => return TokenError.Unexpected,
|
||||
};
|
||||
|
||||
return token;
|
||||
}
|
||||
};
|
230
src/scanner.zig
230
src/scanner.zig
|
@ -1,10 +1,15 @@
|
|||
const std = @import("std");
|
||||
const tokens = @import("token.zig");
|
||||
|
||||
const token = @import("token.zig");
|
||||
const main = @import("main.zig");
|
||||
const Token = tokens.Token;
|
||||
const TokenType = tokens.TokenType;
|
||||
|
||||
const TokenList = std.ArrayList(token.Token);
|
||||
const TokenType = token.TokenType;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub const TokenError = error{
|
||||
Unexpected,
|
||||
Unterminated,
|
||||
};
|
||||
|
||||
fn isDigit(char: u8) bool {
|
||||
return char >= '0' and char <= '9';
|
||||
|
@ -22,6 +27,8 @@ fn isAlphaNumeric(char: u8) bool {
|
|||
|
||||
pub const KeywordMap = std.AutoHashMap([]const u8, u6);
|
||||
|
||||
/// The book does say that C doesn't have hashmaps. but Zig does. and I can
|
||||
/// use it here.
|
||||
fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap {
|
||||
var map = KeywordMap.init(allocator);
|
||||
|
||||
|
@ -72,19 +79,20 @@ fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap {
|
|||
}
|
||||
|
||||
pub const Scanner = struct {
|
||||
source: []u8,
|
||||
tokens: TokenList,
|
||||
source: []const u8,
|
||||
keywords: KeywordMap,
|
||||
|
||||
start: usize = 0,
|
||||
current: usize = 0,
|
||||
line: usize = 1,
|
||||
|
||||
pub fn init(allocator: *std.mem.Allocator, data: []u8) !Scanner {
|
||||
allocator: *Allocator,
|
||||
|
||||
pub fn init(allocator: *Allocator, data: []const u8) !Scanner {
|
||||
return Scanner{
|
||||
.source = data,
|
||||
.tokens = TokenList.init(allocator),
|
||||
.keywords = try initKeywordMap(allocator),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -97,49 +105,16 @@ pub const Scanner = struct {
|
|||
return self.source[self.current - 1];
|
||||
}
|
||||
|
||||
pub fn currentLexeme(self: *Scanner) []u8 {
|
||||
pub fn currentLexeme(self: *Scanner) []const u8 {
|
||||
return self.source[self.start..self.current];
|
||||
}
|
||||
|
||||
fn addSimpleToken(self: *Scanner, ttype: token.TokenType) !void {
|
||||
try self.addToken(token.Token{
|
||||
.Simple = token.SimpleToken.init(
|
||||
ttype,
|
||||
self.currentLexeme(),
|
||||
self.line,
|
||||
{},
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
fn addSliceToken(self: *Scanner, ttype: token.TokenType, slice: []u8) !void {
|
||||
try self.addToken(token.Token{
|
||||
.Slice = token.SliceToken.init(
|
||||
ttype,
|
||||
self.currentLexeme(),
|
||||
self.line,
|
||||
slice,
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
/// Keep in mind Lox only has a single number type and that is a float one.
|
||||
fn addNumberToken(self: *Scanner, ttype: token.TokenType, num: f32) !void {
|
||||
try self.addToken(token.Token{
|
||||
.Number = token.NumberToken.init(
|
||||
ttype,
|
||||
self.currentLexeme(),
|
||||
self.line,
|
||||
num,
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
fn addToken(
|
||||
self: *Scanner,
|
||||
tok: token.Token,
|
||||
) !void {
|
||||
try self.tokens.append(tok);
|
||||
fn makeToken(self: *Scanner, ttype: TokenType) Token {
|
||||
return Token{
|
||||
.ttype = ttype,
|
||||
.lexeme = self.currentLexeme(),
|
||||
.line = self.line,
|
||||
};
|
||||
}
|
||||
|
||||
/// Check if the next character matches what is expected.
|
||||
|
@ -153,16 +128,16 @@ pub const Scanner = struct {
|
|||
|
||||
/// Add a SimpleToken of type_match if the next character is
|
||||
/// `expected`. Adds a SimpleToken of type_nomatch when it is not.
|
||||
fn addMatchToken(
|
||||
fn makeMatchToken(
|
||||
self: *Scanner,
|
||||
expected: u8,
|
||||
type_match: token.TokenType,
|
||||
type_nomatch: token.TokenType,
|
||||
) !void {
|
||||
type_match: TokenType,
|
||||
type_nomatch: TokenType,
|
||||
) Token {
|
||||
if (self.match(expected)) {
|
||||
try self.addSimpleToken(type_match);
|
||||
return self.makeToken(type_match);
|
||||
} else {
|
||||
try self.addSimpleToken(type_nomatch);
|
||||
return self.makeToken(type_nomatch);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -171,7 +146,28 @@ pub const Scanner = struct {
|
|||
return self.source[self.current];
|
||||
}
|
||||
|
||||
fn doString(self: *Scanner) !void {
|
||||
fn peekNext(self: *Scanner) u8 {
|
||||
if (self.current + 1 >= self.source.len) return 0;
|
||||
return self.source[self.current + 1];
|
||||
}
|
||||
|
||||
fn skipWhitespace(self: *Scanner) void {
|
||||
while (true) {
|
||||
var c = self.peek();
|
||||
switch (c) {
|
||||
' ', '\r', '\t' => blk: {
|
||||
_ = self.advance();
|
||||
},
|
||||
'\n' => blk: {
|
||||
self.line += 1;
|
||||
_ = self.advance();
|
||||
},
|
||||
else => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn doString(self: *Scanner) !Token {
|
||||
// consume entire string
|
||||
while (self.peek() != '"' and !self.isAtEnd()) {
|
||||
if (self.peek() == '\n') self.line += 1;
|
||||
|
@ -180,27 +176,18 @@ pub const Scanner = struct {
|
|||
|
||||
// unterminated string.
|
||||
if (self.isAtEnd()) {
|
||||
try main.doError(self.line, "Unterminated string.");
|
||||
return;
|
||||
return TokenError.Unterminated;
|
||||
}
|
||||
|
||||
// the closing ".
|
||||
_ = self.advance();
|
||||
|
||||
// trim the surrounding quotes.
|
||||
try self.addSliceToken(
|
||||
.STRING,
|
||||
self.source[self.start + 1 .. self.current - 1],
|
||||
);
|
||||
}
|
||||
|
||||
fn peekNext(self: *Scanner) u8 {
|
||||
if (self.current + 1 >= self.source.len) return 0;
|
||||
return self.source[self.current + 1];
|
||||
return self.makeToken(.STRING);
|
||||
}
|
||||
|
||||
/// Consume a number
|
||||
fn doNumber(self: *Scanner) !void {
|
||||
fn doNumber(self: *Scanner) Token {
|
||||
while (isDigit(self.peek())) {
|
||||
_ = self.advance();
|
||||
}
|
||||
|
@ -215,17 +202,11 @@ pub const Scanner = struct {
|
|||
}
|
||||
}
|
||||
|
||||
// after going through all of the number, we can just use fmt.parseFloat
|
||||
|
||||
var num = try std.fmt.parseFloat(
|
||||
f32,
|
||||
self.source[self.start..self.current],
|
||||
);
|
||||
|
||||
try self.addNumberToken(.NUMBER, num);
|
||||
return self.makeToken(.NUMBER);
|
||||
}
|
||||
|
||||
fn doIdentifier(self: *Scanner) !void {
|
||||
/// Either a keyword or an identifier come out of this.
|
||||
fn doIdentifier(self: *Scanner) Token {
|
||||
while (isAlphaNumeric(self.peek())) {
|
||||
_ = self.advance();
|
||||
}
|
||||
|
@ -243,90 +224,53 @@ pub const Scanner = struct {
|
|||
toktype = TokenType.IDENTIFIER;
|
||||
}
|
||||
|
||||
try self.addSimpleToken(toktype);
|
||||
return self.makeToken(toktype);
|
||||
}
|
||||
|
||||
/// Scan through our tokens and add them to the Scanner's token list.
|
||||
fn scanToken(self: *Scanner) !void {
|
||||
pub fn scanToken(self: *Scanner) !?Token {
|
||||
self.skipWhitespace();
|
||||
self.start = self.current;
|
||||
|
||||
if (self.isAtEnd()) return self.makeToken(TokenType.EOF);
|
||||
|
||||
var c = self.advance();
|
||||
if (isAlpha(c)) return self.doIdentifier();
|
||||
if (isDigit(c)) return self.doNumber();
|
||||
|
||||
switch (c) {
|
||||
'(' => try self.addSimpleToken(.LEFT_PAREN),
|
||||
')' => try self.addSimpleToken(.RIGHT_PAREN),
|
||||
'{' => try self.addSimpleToken(.LEFT_BRACE),
|
||||
'}' => try self.addSimpleToken(.RIGHT_BRACE),
|
||||
',' => try self.addSimpleToken(.COMMA),
|
||||
'.' => try self.addSimpleToken(.DOT),
|
||||
'-' => try self.addSimpleToken(.MINUS),
|
||||
'+' => try self.addSimpleToken(.PLUS),
|
||||
';' => try self.addSimpleToken(.SEMICOLON),
|
||||
'*' => try self.addSimpleToken(.STAR),
|
||||
var token = switch (c) {
|
||||
'(' => self.makeToken(.LEFT_PAREN),
|
||||
')' => self.makeToken(.RIGHT_PAREN),
|
||||
'{' => self.makeToken(.LEFT_BRACE),
|
||||
'}' => self.makeToken(.RIGHT_BRACE),
|
||||
',' => self.makeToken(.COMMA),
|
||||
'.' => self.makeToken(.DOT),
|
||||
'-' => self.makeToken(.MINUS),
|
||||
'+' => self.makeToken(.PLUS),
|
||||
';' => self.makeToken(.SEMICOLON),
|
||||
'*' => self.makeToken(.STAR),
|
||||
|
||||
'!' => try self.addMatchToken('=', .BANG_EQUAL, .BANG),
|
||||
'=' => try self.addMatchToken('=', .EQUAL_EQUAL, .EQUAL),
|
||||
'<' => try self.addMatchToken('=', .LESS_EQUAL, .LESS),
|
||||
'>' => try self.addMatchToken('=', .GREATER_EQUAL, .GREATER),
|
||||
'!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG),
|
||||
'=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL),
|
||||
'<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS),
|
||||
'>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER),
|
||||
|
||||
'/' => blk: {
|
||||
// consume comments
|
||||
if (self.match('/')) {
|
||||
if (self.peekNext() == '/') {
|
||||
while (self.peek() != '\n' and !self.isAtEnd()) {
|
||||
_ = self.advance();
|
||||
}
|
||||
} else if (self.match('*')) {
|
||||
// multiline block comments are messier to work with, but
|
||||
// we can still do it!
|
||||
while (true) {
|
||||
if (self.isAtEnd()) break;
|
||||
// check '*/'
|
||||
if (self.peek() == '*' and self.peekNext() == '/') {
|
||||
self.current += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
_ = self.advance();
|
||||
}
|
||||
break :blk null;
|
||||
} else {
|
||||
try self.addSimpleToken(.SLASH);
|
||||
break :blk self.makeToken(.SLASH);
|
||||
}
|
||||
},
|
||||
|
||||
' ', '\r', '\t' => blk: {},
|
||||
'\n' => blk: {
|
||||
self.line += 1;
|
||||
},
|
||||
|
||||
'"' => try self.doString(),
|
||||
|
||||
else => {
|
||||
if (isDigit(c)) {
|
||||
try self.doNumber();
|
||||
} else if (isAlpha(c)) {
|
||||
try self.doIdentifier();
|
||||
} else {
|
||||
try main.doError(self.line, "Unexpected character");
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
else => return TokenError.Unexpected,
|
||||
};
|
||||
|
||||
pub fn scanTokens(self: *Scanner) !TokenList {
|
||||
// while we aren't at the end, we're still consuming
|
||||
// tokens.
|
||||
while (!self.isAtEnd()) {
|
||||
self.start = self.current;
|
||||
try self.scanToken();
|
||||
}
|
||||
|
||||
try self.addToken(token.Token{
|
||||
.Simple = token.SimpleToken.init(
|
||||
.EOF,
|
||||
"",
|
||||
self.line,
|
||||
{},
|
||||
),
|
||||
});
|
||||
|
||||
return self.tokens;
|
||||
return token;
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue