add basic scanner logic
This commit is contained in:
		
							parent
							
								
									3d243eefff
								
							
						
					
					
						commit
						59083198c6
					
				
					 3 changed files with 197 additions and 7 deletions
				
			
		
							
								
								
									
										11
									
								
								src/main.zig
									
										
									
									
									
								
							
							
						
						
									
										11
									
								
								src/main.zig
									
										
									
									
									
								
							|  | @ -1,5 +1,7 @@ | |||
| const std = @import("std"); | ||||
| 
 | ||||
| const scanners = @import("scanner.zig"); | ||||
| 
 | ||||
| const Allocator = std.mem.Allocator; | ||||
| 
 | ||||
| pub const Result = error{ | ||||
|  | @ -10,6 +12,15 @@ pub const Result = error{ | |||
| fn run(allocator: *Allocator, data: []u8) !void { | ||||
|     var stdout_file = try std.io.getStdOut(); | ||||
|     const stdout = &stdout_file.outStream().stream; | ||||
| 
 | ||||
|     var scanner = scanners.Scanner.init(allocator, data); | ||||
| 
 | ||||
|     while (true) { | ||||
|         var tok = try scanner.nextToken(); | ||||
|         if (tok.ttype == .EOF) break; | ||||
|         try stdout.print("{x}\n", tok); | ||||
|     } | ||||
| 
 | ||||
|     return Result.Ok; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										182
									
								
								src/scanner.zig
									
										
									
									
									
								
							
							
						
						
									
										182
									
								
								src/scanner.zig
									
										
									
									
									
								
							|  | @ -1,9 +1,11 @@ | |||
| const std = @import("std"); | ||||
| const tokens = @import("tokens.std"); | ||||
| const tokens = @import("tokens.zig"); | ||||
| 
 | ||||
| const Allocator = std.mem.Allocator; | ||||
| const Token = tokens.Token; | ||||
| const TokenType = tokens.TokenType; | ||||
| 
 | ||||
| pub const TokenError = error{ | ||||
| pub const ScannerError = error{ | ||||
|     Unexpected, | ||||
|     Unterminated, | ||||
| }; | ||||
|  | @ -22,10 +24,182 @@ fn isAlphaNumeric(char: u8) bool { | |||
|     return isAlpha(char) or isDigit(char); | ||||
| } | ||||
| 
 | ||||
| const keywords = [][]const u8{ | ||||
|     "break", | ||||
|     "const", | ||||
|     "continue", | ||||
|     "defer", | ||||
|     "else", | ||||
|     "enum", | ||||
|     "fn", | ||||
|     "for", | ||||
|     "go", | ||||
|     "goto", | ||||
|     "if", | ||||
|     "import", | ||||
|     "in", | ||||
|     "interface", | ||||
|     "match", | ||||
|     "module", | ||||
|     "mut", | ||||
|     "or", | ||||
|     "return", | ||||
|     "struct", | ||||
|     "type", | ||||
| }; | ||||
| 
 | ||||
| const keyword_ttypes = []TokenType{ | ||||
|     .Break, | ||||
|     .Const, | ||||
|     .Continue, | ||||
|     .Defer, | ||||
|     .Else, | ||||
|     .Enum, | ||||
|     .Fn, | ||||
|     .For, | ||||
|     .Go, | ||||
|     .Goto, | ||||
|     .If, | ||||
|     .Import, | ||||
|     .In, | ||||
|     .Interface, | ||||
|     .Match, | ||||
|     .Module, | ||||
|     .Mut, | ||||
|     .Or, | ||||
|     .Return, | ||||
|     .Struct, | ||||
|     .Type, | ||||
| }; | ||||
| 
 | ||||
| fn getKeyword(keyword: []const u8) ?TokenType { | ||||
|     for (keywords) |kw, idx| { | ||||
|         if (std.mem.eql(u8, keyword, kw)) { | ||||
|             return keyword_ttypes[idx]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return null; | ||||
| } | ||||
| 
 | ||||
| /// Scanner for vlang tokens. | ||||
| pub const Scanner = struct { | ||||
|     allocator: *Allocator, | ||||
|     source: []u8, | ||||
| 
 | ||||
|     pub fn init(allocator: *Allocator) Scanner { | ||||
|         return Scanner{ .allocator = allocator }; | ||||
|     start: usize = 0, | ||||
|     current: usize = 0, | ||||
|     line: usize = 1, | ||||
| 
 | ||||
|     pub fn init(allocator: *Allocator, source: []u8) Scanner { | ||||
|         return Scanner{ .allocator = allocator, .source = source }; | ||||
|     } | ||||
| 
 | ||||
|     fn isAtEnd(self: *Scanner) bool { | ||||
|         return self.current >= self.source.len; | ||||
|     } | ||||
| 
 | ||||
|     fn advance(self: *Scanner) u8 { | ||||
|         self.current += 1; | ||||
|         return self.source[self.current - 1]; | ||||
|     } | ||||
| 
 | ||||
|     fn currentLexeme(self: *Scanner) []const u8 { | ||||
|         return self.source[self.start..self.current]; | ||||
|     } | ||||
| 
 | ||||
|     fn makeToken(self: *Scanner, ttype: TokenType) Token { | ||||
|         return Token{ | ||||
|             .ttype = ttype, | ||||
|             .lexeme = self.currentLexeme(), | ||||
|             .line = self.line, | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     fn makeTokenAdvance(self: *Scanner, ttype: TokenType) Token { | ||||
|         var tok = self.makeToken(ttype); | ||||
|         self.current += 1; | ||||
|         return tok; | ||||
|     } | ||||
| 
 | ||||
|     /// Check if the next character matches what is expected. | ||||
|     fn match(self: *Scanner, expected: u8) bool { | ||||
|         if (self.isAtEnd()) return false; | ||||
|         if (self.source[self.current] != expected) return false; | ||||
| 
 | ||||
|         self.current += 1; | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     /// Add a SimpleToken of type_match if the next character is | ||||
|     /// `expected`. Adds a SimpleToken of type_nomatch when it is not. | ||||
|     fn makeMatchToken( | ||||
|         self: *Scanner, | ||||
|         expected: u8, | ||||
|         type_match: TokenType, | ||||
|         type_nomatch: TokenType, | ||||
|     ) Token { | ||||
|         if (self.match(expected)) { | ||||
|             return self.makeToken(type_match); | ||||
|         } else { | ||||
|             return self.makeToken(type_nomatch); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn peek(self: *Scanner) u8 { | ||||
|         if (self.isAtEnd()) return 0; | ||||
|         return self.source[self.current]; | ||||
|     } | ||||
| 
 | ||||
|     fn peekNext(self: *Scanner) u8 { | ||||
|         if (self.current + 1 >= self.source.len) return 0; | ||||
|         return self.source[self.current + 1]; | ||||
|     } | ||||
| 
 | ||||
|     pub fn nextToken(self: *Scanner) !Token { | ||||
|         self.start = self.current; | ||||
| 
 | ||||
|         if (self.isAtEnd()) return self.makeToken(TokenType.EOF); | ||||
| 
 | ||||
|         var c = self.advance(); | ||||
| 
 | ||||
|         var token = switch (c) { | ||||
|             '(' => self.makeToken(.LeftParen), | ||||
|             ')' => self.makeToken(.RightParen), | ||||
|             '{' => self.makeToken(.LeftBrace), | ||||
|             '}' => self.makeToken(.RightBrace), | ||||
|             '[' => self.makeToken(.LeftSquare), | ||||
|             ']' => self.makeToken(.RightSquare), | ||||
|             '.' => self.makeToken(.Dot), | ||||
|             ';' => self.makeToken(.Semicolon), | ||||
|             ',' => self.makeToken(.Comma), | ||||
|             ':' => self.makeToken(.Colon), | ||||
|             '&' => self.makeToken(.Ampersand), | ||||
|             '|' => self.makeToken(.Pipe), | ||||
|             '?' => self.makeToken(.QuestionMark), | ||||
|             '$' => self.makeToken(.DollarSign), | ||||
| 
 | ||||
|             '!' => self.makeMatchToken('=', .BangEqual, .Bang), | ||||
|             '=' => self.makeMatchToken('=', .EqualEqual, .Equal), | ||||
| 
 | ||||
|             // there can be three tokens from a < | ||||
|             //  - <, which is LessThan | ||||
|             //  - <=, which is LessEqual | ||||
|             //  - <<, which is LeftDoubleChevron | ||||
|             '<' => blk: { | ||||
|                 if (self.match('=')) { | ||||
|                     break :blk self.makeToken(.LessEqual); | ||||
|                 } else if (self.match('<')) { | ||||
|                     break :blk self.makeToken(.LeftDoubleChevron); | ||||
|                 } else { | ||||
|                     break :blk self.makeToken(.Less); | ||||
|                 } | ||||
|             }, | ||||
|             '>' => self.makeMatchToken('=', .GreaterEqual, .Greater), | ||||
| 
 | ||||
|             else => return ScannerError.Unexpected, | ||||
|         }; | ||||
| 
 | ||||
|         return token; | ||||
|     } | ||||
| }; | ||||
|  |  | |||
|  | @ -30,9 +30,11 @@ pub const TokenType = enum { | |||
|     PlusEqual, | ||||
|     MinusEqual, | ||||
| 
 | ||||
|     LessThan, | ||||
|     // comparison ones | ||||
|     EqualEqual, | ||||
|     Less, | ||||
|     LessEqual, | ||||
|     GreaterThan, | ||||
|     Greater, | ||||
|     GreaterEqual, | ||||
|     Bang, | ||||
|     BangEqual, | ||||
|  | @ -65,9 +67,12 @@ pub const TokenType = enum { | |||
|     Return, | ||||
|     Struct, | ||||
|     Type, | ||||
| 
 | ||||
|     EOF, | ||||
| }; | ||||
| 
 | ||||
| pub const Token = struct { | ||||
|     ttype: TokenType, | ||||
|     lexeme: []u8, | ||||
|     lexeme: []const u8, | ||||
|     line: usize, | ||||
| }; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue