From 9a2c50a53ef27e47753063b1f08458254261d4d5 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 21:23:51 -0300 Subject: [PATCH] add basic tokens and a basic lexer --- src/main.zig | 34 ++++++++++++++++-- src/scanner.zig | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ src/token.zig | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 3 deletions(-) create mode 100644 src/scanner.zig create mode 100644 src/token.zig diff --git a/src/main.zig b/src/main.zig index 40d72e9..71cfd6b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,8 +1,35 @@ const std = @import("std"); const Allocator = std.mem.Allocator; +const Scanner = @import("scanner.zig").Scanner; -fn run(data: []u8) void {} +pub var hadError = false; + +fn run(allocator: *Allocator, data: []u8) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + var scanner = Scanner.init(allocator, data); + var tokens = try scanner.scanTokens(); + var it = tokens.iterator(); + + while (it.next()) |token| { + try token.Simple.printToken(stdout); + hadError = false; + } +} + +pub fn doError(line: usize, message: []const u8) !void { + try errorReport(line, "", message); +} + +pub fn errorReport(line: usize, where: []const u8, message: []const u8) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + try stdout.print("[line {}] Error {}: {}\n", line, where, message); + hadError = true; +} fn runFile(allocator: *Allocator, path: []const u8) !void { var lox_file = try std.fs.File.openRead(path); @@ -12,7 +39,8 @@ fn runFile(allocator: *Allocator, path: []const u8) !void { var slice = try allocator.alloc(u8, total_bytes); _ = try lox_file.read(slice); - run(slice); + try run(allocator, slice); + if (hadError) std.os.exit(65); } fn runPrompt(allocator: *Allocator) !void { @@ -28,7 +56,7 @@ fn runPrompt(allocator: *Allocator) !void { return err; }; - run(line); + try run(allocator, line); } } diff --git a/src/scanner.zig b/src/scanner.zig new file mode 100644 index 0000000..c4f9100 --- /dev/null +++ b/src/scanner.zig @@ -0,0 +1,93 @@ +const std = @import("std"); + +const token = @import("token.zig"); +const main = @import("main.zig"); + +const TokenList = std.ArrayList(token.Token); + +pub const Scanner = struct { + source: []u8, + tokens: TokenList, + + start: usize = 0, + current: usize = 0, + line: usize = 1, + + pub fn init(allocator: *std.mem.Allocator, data: []u8) Scanner { + return Scanner{ + .source = data, + .tokens = TokenList.init(allocator), + }; + } + + fn isAtEnd(self: *Scanner) bool { + return self.current >= self.source.len; + } + + fn advance(self: *Scanner) u8 { + self.current += 1; + return self.source[self.current - 1]; + } + + fn currentLexeme(self: *Scanner) []u8 { + return self.source[self.start..self.current]; + } + + fn addSimpleToken(self: *Scanner, ttype: token.TokenType) !void { + try self.addToken(token.Token{ + .Simple = token.SimpleToken.init( + ttype, + self.currentLexeme(), + self.line, + {}, + ), + }); + } + + fn addToken( + self: *Scanner, + tok: token.Token, + ) !void { + try self.tokens.append(tok); + } + + fn scanToken(self: *Scanner) !void { + var c = self.advance(); + + switch (c) { + '(' => try self.addSimpleToken(.LEFT_PAREN), + ')' => try self.addSimpleToken(.RIGHT_PAREN), + '{' => try self.addSimpleToken(.LEFT_BRACE), + '}' => try self.addSimpleToken(.RIGHT_BRACE), + ',' => try self.addSimpleToken(.COMMA), + '.' => try self.addSimpleToken(.DOT), + '-' => try self.addSimpleToken(.MINUS), + '+' => try self.addSimpleToken(.PLUS), + ';' => try self.addSimpleToken(.SEMICOLON), + '*' => try self.addSimpleToken(.STAR), + else => { + try main.doError(self.line, "Unexpected character"); + }, + } + } + + pub fn scanTokens(self: *Scanner) !TokenList { + // while we aren't at the end, we're still consuming + // tokens. + while (!self.isAtEnd()) { + self.start = self.current; + try self.scanToken(); + } + + try self.addToken(token.Token{ + .Simple = token.SimpleToken.init( + .EOF, + "", + self.line, + {}, + ), + }); + + return self.tokens; + } +}; diff --git a/src/token.zig b/src/token.zig new file mode 100644 index 0000000..0ce1283 --- /dev/null +++ b/src/token.zig @@ -0,0 +1,93 @@ +const std = @import("std"); + +pub const TokenType = enum { + // Single-character tokens. + LEFT_PAREN, + RIGHT_PAREN, + LEFT_BRACE, + RIGHT_BRACE, + COMMA, + DOT, + MINUS, + PLUS, + SEMICOLON, + SLASH, + STAR, + + // One or two character tokens. + BANG, + BANG_EQUAL, + EQUAL, + EQUAL_EQUAL, + GREATER, + GREATER_EQUAL, + LESS, + LESS_EQUAL, + + // Literals. + IDENTIFIER, + STRING, + NUMBER, + + // Keywords. + AND, + CLASS, + ELSE, + FALSE, + FUN, + FOR, + IF, + NIL, + OR, + PRINT, + RETURN, + SUPER, + THIS, + TRUE, + VAR, + WHILE, + + EOF, +}; + +pub fn TokenFactory( + comptime T: type, +) type { + return struct { + const Self = @This(); + + ttype: TokenType, + lexeme: []u8, + line: usize, + literal: T, + + pub fn init( + ttype: TokenType, + lexeme: []u8, + line: usize, + literal: T, + ) Self { + return Self{ + .ttype = ttype, + .lexeme = lexeme, + .line = line, + .literal = literal, + }; + } + + pub fn printToken(self: Self, stdout: var) !void { + try stdout.print( + "Token(type={x}, lexeme='{}', line={}\n", + self.ttype, + self.lexeme, + self.line, + ); + } + }; +} + +pub const SimpleToken = TokenFactory(void); + +pub const Token = union { + Simple: SimpleToken, +};