From 566d8313f3c8ec83177e52a014be0eb1d49810bb Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:13:50 -0300 Subject: [PATCH 1/3] add number tokens --- src/new_scanner.zig | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index b2005f7..2b48f48 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -11,6 +11,20 @@ pub const TokenError = error{ Unterminated, }; +fn isDigit(char: u8) bool { + return char >= '0' and char <= '9'; +} + +fn isAlpha(c: u8) bool { + return (c >= 'a' and c <= 'z') or + (c >= 'A' and c <= 'Z') or + c == '_'; +} + +fn isAlphaNumeric(char: u8) bool { + return isAlpha(char) or isDigit(char); +} + pub const Scanner = struct { source: []const u8, @@ -117,12 +131,32 @@ pub const Scanner = struct { return self.makeToken(.STRING); } + /// Consume a number + fn doNumber(self: *Scanner) Token { + while (isDigit(self.peek())) { + _ = self.advance(); + } + + // check if its a number like 12.34, where the '.' character + // exists and the one next to it is a digit. + if (self.peek() == '.' and isDigit(self.peekNext())) { + _ = self.advance(); + + while (isDigit(self.peek())) { + _ = self.advance(); + } + } + + return self.makeToken(.NUMBER); + } + pub fn scanToken(self: *Scanner) !?Token { self.skipWhitespace(); self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); + if (isDigit(c)) return self.doNumber(); var token = switch (c) { '(' => self.makeToken(.LEFT_PAREN), From d62c58a1956989a5e001f981005b6590039a0fc0 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:15:27 -0300 Subject: [PATCH 2/3] scanner: add identifiers --- src/new_scanner.zig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 2b48f48..340523b 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -150,12 +150,21 @@ pub const Scanner = struct { return self.makeToken(.NUMBER); } + fn doIdentifier(self: *Scanner) Token { + while (isAlphaNumeric(self.peek())) { + _ = self.advance(); + } + + return self.makeToken(.IDENTIFIER); + } + pub fn scanToken(self: *Scanner) !?Token { self.skipWhitespace(); self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); + if (isAlpha(c)) return self.doIdentifier(); if (isDigit(c)) return self.doNumber(); var token = switch (c) { From 7d7aabbdd7df8da75cc510fd1165f475b5c7acc6 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:20:50 -0300 Subject: [PATCH 3/3] scanner: add keyword handling (copied off the old scanner) as with most things, lol --- src/compiler.zig | 2 +- src/new_scanner.zig | 75 +++++++++++++++++++++++++++++++++++++++++++-- src/scanner.zig | 1 - 3 files changed, 73 insertions(+), 5 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index f0771ff..fd96032 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -23,7 +23,7 @@ pub const Compiler = struct { } pub fn compile(self: *Compiler) !void { - var scanr = scanner.Scanner.init(self.allocator, self.src); + var scanr = try scanner.Scanner.init(self.allocator, self.src); var line: usize = 0; while (true) { var token_opt = scanr.scanToken() catch |err| { diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 340523b..8a1c436 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -25,8 +25,62 @@ fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } +pub const KeywordMap = std.AutoHashMap([]const u8, u6); + +/// The book does say that C doesn't have hashmaps. but Zig does. and I can +/// use it here. +fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { + var map = KeywordMap.init(allocator); + + const keywords = [][]const u8{ + "and"[0..], + "class"[0..], + "else"[0..], + "false"[0..], + "for"[0..], + "fun"[0..], + "if"[0..], + "nil"[0..], + "or"[0..], + "print"[0..], + "return"[0..], + "super"[0..], + "this"[0..], + "true"[0..], + "var"[0..], + "while"[0..], + }; + + const tags = []TokenType{ + TokenType.AND, + TokenType.CLASS, + TokenType.ELSE, + TokenType.FALSE, + TokenType.FOR, + TokenType.FUN, + TokenType.IF, + TokenType.NIL, + TokenType.OR, + TokenType.PRINT, + TokenType.RETURN, + TokenType.SUPER, + TokenType.THIS, + TokenType.TRUE, + TokenType.VAR, + TokenType.WHILE, + }; + + for (keywords) |keyword, idx| { + var tag = @enumToInt(tags[idx]); + _ = try map.put(keyword, tag); + } + + return map; +} + pub const Scanner = struct { source: []const u8, + keywords: KeywordMap, start: usize = 0, current: usize = 0, @@ -34,10 +88,11 @@ pub const Scanner = struct { allocator: *Allocator, - pub fn init(allocator: *Allocator, data: []const u8) Scanner { + pub fn init(allocator: *Allocator, data: []const u8) !Scanner { return Scanner{ - .allocator = allocator, .source = data, + .keywords = try initKeywordMap(allocator), + .allocator = allocator, }; } @@ -150,12 +205,26 @@ pub const Scanner = struct { return self.makeToken(.NUMBER); } + /// Either a keyword or an identifier come out of this. fn doIdentifier(self: *Scanner) Token { while (isAlphaNumeric(self.peek())) { _ = self.advance(); } - return self.makeToken(.IDENTIFIER); + // after reading the identifier, we check + // if it is any of our keywords, if it is, then we add + // the specificed keyword type. if not, just .IDENTIFIER + var text = self.source[self.start..self.current]; + var type_opt = self.keywords.get(text); + var toktype: TokenType = undefined; + + if (type_opt) |kv| { + toktype = @intToEnum(TokenType, kv.value); + } else { + toktype = TokenType.IDENTIFIER; + } + + return self.makeToken(toktype); } pub fn scanToken(self: *Scanner) !?Token { diff --git a/src/scanner.zig b/src/scanner.zig index ed5ec0e..b83a7a4 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -20,7 +20,6 @@ fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } -// hashmaps don't work on HashMaps for some reason. anyways. pub const KeywordMap = std.AutoHashMap([]const u8, u6); fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap {