From 2c7cf356b3da98834c327498519dcc1763eb5f11 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 16:50:48 -0300 Subject: [PATCH 1/4] scanner: add basic tokens and matched-tokens --- src/compiler.zig | 2 +- src/new_scanner.zig | 79 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index a223c67..50a356f 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -26,7 +26,7 @@ pub const Compiler = struct { var scanr = scanner.Scanner.init(self.allocator, self.src); var line: usize = 0; while (true) { - var token = scanr.scanToken(); + var token = try scanr.scanToken(); if (token.line != line) { try self.stdout.print("{} ", token.line); diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 88b0bab..24e305c 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -1,8 +1,16 @@ const std = @import("std"); const tokens = @import("token.zig"); +const Token = tokens.Token; +const TokenType = tokens.TokenType; + const Allocator = std.mem.Allocator; +pub const TokenError = error{ + Unexpected, + Unterminated, +}; + pub const Scanner = struct { source: []const u8, @@ -19,7 +27,74 @@ pub const Scanner = struct { }; } - pub fn scanToken(self: *Scanner) tokens.Token { - return tokens.Token{}; + fn isAtEnd(self: *Scanner) bool { + return self.current >= self.source.len; + } + + fn advance(self: *Scanner) u8 { + self.current += 1; + return self.source[self.current - 1]; + } + + fn makeToken(self: *Scanner, ttype: TokenType) Token { + return Token{ + .ttype = ttype, + .lexeme = self.source[self.start..self.current], + .line = self.line, + }; + } + + /// Check if the next character matches what is expected. + fn match(self: *Scanner, expected: u8) bool { + if (self.isAtEnd()) return false; + if (self.source[self.current] != expected) return false; + + self.current += 1; + return true; + } + + /// Add a SimpleToken of type_match if the next character is + /// `expected`. Adds a SimpleToken of type_nomatch when it is not. + fn makeMatchToken( + self: *Scanner, + expected: u8, + type_match: TokenType, + type_nomatch: TokenType, + ) Token { + if (self.match(expected)) { + return self.makeToken(type_match); + } else { + return self.makeToken(type_nomatch); + } + } + + pub fn scanToken(self: *Scanner) !tokens.Token { + self.start = self.current; + + if (self.isAtEnd()) return self.makeToken(TokenType.EOF); + + var c = self.advance(); + + var token = switch (c) { + '(' => self.makeToken(.LEFT_PAREN), + ')' => self.makeToken(.RIGHT_PAREN), + '{' => self.makeToken(.LEFT_BRACE), + '}' => self.makeToken(.RIGHT_BRACE), + ',' => self.makeToken(.COMMA), + '.' => self.makeToken(.DOT), + '-' => self.makeToken(.MINUS), + '+' => self.makeToken(.PLUS), + ';' => self.makeToken(.SEMICOLON), + '*' => self.makeToken(.STAR), + + '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), + '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), + '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), + '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), + + else => return TokenError.Unexpected, + }; + + return token; } }; From f4f1fe1fbc16ba4d2793ce8c07f1c26f2233e1b6 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 16:54:15 -0300 Subject: [PATCH 2/4] scanner: add whitespace handling --- src/new_scanner.zig | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 24e305c..0c97647 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -68,9 +68,30 @@ pub const Scanner = struct { } } - pub fn scanToken(self: *Scanner) !tokens.Token { - self.start = self.current; + fn peek(self: *Scanner) u8 { + if (self.isAtEnd()) return 0; + return self.source[self.current]; + } + fn skipWhitespace(self: *Scanner) void { + while (true) { + var c = self.peek(); + switch (c) { + ' ', '\r', '\t' => blk: { + _ = self.advance(); + }, + '\n' => blk: { + self.line += 1; + _ = self.advance(); + }, + else => return, + } + } + } + + pub fn scanToken(self: *Scanner) !tokens.Token { + self.skipWhitespace(); + self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); From 27b04e16123951068ab9cfd4963351b088df8297 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:07:22 -0300 Subject: [PATCH 3/4] scanner: add basic error handling, strings, comments --- src/compiler.zig | 28 ++++++++++++++++++++-------- src/new_scanner.zig | 45 +++++++++++++++++++++++++++++++++++++++++++-- src/scanner.zig | 2 +- 3 files changed, 64 insertions(+), 11 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index 50a356f..f0771ff 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -26,17 +26,29 @@ pub const Compiler = struct { var scanr = scanner.Scanner.init(self.allocator, self.src); var line: usize = 0; while (true) { - var token = try scanr.scanToken(); + var token_opt = scanr.scanToken() catch |err| { + std.debug.warn("Scan Error: {x}\n", err); + std.debug.warn( + "line: {}, cur lexeme: {}\n", + scanr.line, + scanr.currentLexeme(), + ); + break; + }; - if (token.line != line) { - try self.stdout.print("{} ", token.line); - line = token.line; + if (token_opt) |token| { + if (token.line != line) { + try self.stdout.print("{} ", token.line); + line = token.line; + } else { + try self.stdout.print(" | "); + } + + try self.stdout.print("{} '{}'\n", token.ttype, token.lexeme); + if (token.ttype == TokenType.EOF) break; } else { - try self.stdout.print(" | "); + break; } - - try self.stdout.print("{} '{}'\n", token.ttype, token.lexeme); - if (token.ttype == TokenType.EOF) break; } } }; diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 0c97647..f19b8a3 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -36,10 +36,14 @@ pub const Scanner = struct { return self.source[self.current - 1]; } + pub fn currentLexeme(self: *Scanner) []const u8 { + return self.source[self.start..self.current]; + } + fn makeToken(self: *Scanner, ttype: TokenType) Token { return Token{ .ttype = ttype, - .lexeme = self.source[self.start..self.current], + .lexeme = self.currentLexeme(), .line = self.line, }; } @@ -73,6 +77,11 @@ pub const Scanner = struct { return self.source[self.current]; } + fn peekNext(self: *Scanner) u8 { + if (self.isAtEnd()) return 0; + return self.source[self.current + 1]; + } + fn skipWhitespace(self: *Scanner) void { while (true) { var c = self.peek(); @@ -89,7 +98,26 @@ pub const Scanner = struct { } } - pub fn scanToken(self: *Scanner) !tokens.Token { + fn doString(self: *Scanner) !Token { + // consume entire string + while (self.peek() != '"' and !self.isAtEnd()) { + if (self.peek() == '\n') self.line += 1; + _ = self.advance(); + } + + // unterminated string. + if (self.isAtEnd()) { + return TokenError.Unterminated; + } + + // the closing ". + _ = self.advance(); + + // trim the surrounding quotes. + return self.makeToken(.STRING); + } + + pub fn scanToken(self: *Scanner) !?Token { self.skipWhitespace(); self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); @@ -113,6 +141,19 @@ pub const Scanner = struct { '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), + '/' => blk: { + if (self.peekNext() == '/') { + while (self.peek() != '\n' and !self.isAtEnd()) { + _ = self.advance(); + } + break :blk null; + } else { + break :blk self.makeToken(.SLASH); + } + }, + + '"' => try self.doString(), + else => return TokenError.Unexpected, }; diff --git a/src/scanner.zig b/src/scanner.zig index 0cffde2..ed5ec0e 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -98,7 +98,7 @@ pub const Scanner = struct { return self.source[self.current - 1]; } - fn currentLexeme(self: *Scanner) []u8 { + pub fn currentLexeme(self: *Scanner) []u8 { return self.source[self.start..self.current]; } From 9d1d253c9479cbf40e06f72105f118ceb7f05cc5 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:12:00 -0300 Subject: [PATCH 4/4] scanner: fix peekNext --- src/new_scanner.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index f19b8a3..b2005f7 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -79,7 +79,7 @@ pub const Scanner = struct { fn peekNext(self: *Scanner) u8 { if (self.isAtEnd()) return 0; - return self.source[self.current + 1]; + return self.source[self.current - 1]; } fn skipWhitespace(self: *Scanner) void {