From 3d26da0144d10303ff9f8850dbc952305b52825c Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 16:04:09 -0300 Subject: [PATCH 01/68] remove examples and main parser grammar moving to lox described in https://craftinginterpreters.com to go with. better learn something first, then walk towards things like a static typed lang lol anyways if you were here for jorts as my own language thing do leave --- examples/add.jt | 23 ------------ examples/closures.jt | 22 ----------- examples/custom-types.jt | 11 ------ examples/extending-structs.jt | 15 -------- examples/function-overload.jt | 15 -------- examples/hello.jt | 11 ------ examples/higher-order-functions.jt | 26 ------------- examples/sockets.jt | 10 ----- examples/strings.jt | 15 -------- examples/struct-functions.jt | 60 ------------------------------ examples/structs.jt | 14 ------- jortsc/main.py | 8 ++-- jortsc/parser/parser.py | 44 ---------------------- 13 files changed, 3 insertions(+), 271 deletions(-) delete mode 100644 examples/add.jt delete mode 100644 examples/closures.jt delete mode 100644 examples/custom-types.jt delete mode 100644 examples/extending-structs.jt delete mode 100644 examples/function-overload.jt delete mode 100644 examples/hello.jt delete mode 100644 examples/higher-order-functions.jt delete mode 100644 examples/sockets.jt delete mode 100644 examples/strings.jt delete mode 100644 examples/struct-functions.jt delete mode 100644 examples/structs.jt delete mode 100644 jortsc/parser/parser.py diff --git a/examples/add.jt b/examples/add.jt deleted file mode 100644 index 447355f..0000000 --- a/examples/add.jt +++ /dev/null @@ -1,23 +0,0 @@ -import io - -fn add (int a, int b) -> int { - a + b -} - -// return type is void by default -fn main () { - // explicit types, or - int val = add(2, 2) - - // type inferred from the functions' return value - val := add(2, 2) - - // variables are immutable, however, you can update them with - // the value of the old one. - val = val + 1 - - // a shorthand is val++, same for val--. - - // string interpolation is implicit - io.puts("2 plus 2 = {val}") -} diff --git a/examples/closures.jt b/examples/closures.jt deleted file mode 100644 index 6a1291a..0000000 --- a/examples/closures.jt +++ /dev/null @@ -1,22 +0,0 @@ -import io - -fn main () { - x := 0 - - // since variable are immutable but updatable, x is 1 inside clojure, but - // 0 inside main() - fn inner() { - x++ - } - - inner() - - // shows 0 - io.puts("x is {x}") - - // however, if you explicitly update x: - x = inner() - - // shows 1 - io.puts("x is {x}") -} diff --git a/examples/custom-types.jt b/examples/custom-types.jt deleted file mode 100644 index 534e670..0000000 --- a/examples/custom-types.jt +++ /dev/null @@ -1,11 +0,0 @@ -import io - -// you can create your own types with 'type' -type T = int - -fn main () { - T a = 2 - - // since T is int, io.puts with an int works - io.puts(a) -} diff --git a/examples/extending-structs.jt b/examples/extending-structs.jt deleted file mode 100644 index 3d8d8bc..0000000 --- a/examples/extending-structs.jt +++ /dev/null @@ -1,15 +0,0 @@ - -struct A { - int a, - int b -} - -struct B <- A { - int c -} - -fn main () { - a := A{1, 2} - b := B{1, 2, 3} -} - diff --git a/examples/function-overload.jt b/examples/function-overload.jt deleted file mode 100644 index e47830a..0000000 --- a/examples/function-overload.jt +++ /dev/null @@ -1,15 +0,0 @@ -import io -import integer - -fn my_puts(string str) { - io.puts(str) -} - -fn my_puts(int my_int) { - io.puts(integer.to_str(my_int)) -} - -fn main () { - my_puts(2) - my_puts("aaa") -} diff --git a/examples/hello.jt b/examples/hello.jt deleted file mode 100644 index 4bc50f6..0000000 --- a/examples/hello.jt +++ /dev/null @@ -1,11 +0,0 @@ -import io - -// if a return type is not defined, it is implicitly void and so the function -// returns nil (the only instance of void) - -// main can return int or void, void mains are handled by jortsc -fn main () -> int { - // todo: put it back to io.puts - ioputs("pants") - 0 -} diff --git a/examples/higher-order-functions.jt b/examples/higher-order-functions.jt deleted file mode 100644 index 34776dd..0000000 --- a/examples/higher-order-functions.jt +++ /dev/null @@ -1,26 +0,0 @@ -import io - -// takes a function that receives two ints, returns an int -// Func is the function type keyword, to not switch it with fn (which declares -// a function) -fn function_tester (Func func ([int, int] -> int)) -> int { - func(2, 2) -} - -fn add(int a, int b) -> int { - a + b -} - -fn main () { - // passes the function add to function_tester - res := function_tester(add) - - // you can also create functions and put them in variables. not putting a - // function name on the fn block makes it return a Func instance to be put - // in a variable - anonymous := (fn () {}) - - // anonymous has type Func ([] -> void) - - io.puts("res = {res}") -} diff --git a/examples/sockets.jt b/examples/sockets.jt deleted file mode 100644 index 08ab425..0000000 --- a/examples/sockets.jt +++ /dev/null @@ -1,10 +0,0 @@ -import socket -import io - -fn main () { - sock := socket.tcp_connect("example.com", 80) - sock.send("HTTP/1.1\n") - frame := sock.recv(1024) - sock.close() - io.puts(frame) -} diff --git a/examples/strings.jt b/examples/strings.jt deleted file mode 100644 index 75f0931..0000000 --- a/examples/strings.jt +++ /dev/null @@ -1,15 +0,0 @@ -import io - -fn main () { - s := "this is a string" - io.puts(s) - - s := "this is {s}" - io.puts(s) - - s := s + 2 // invalid - - // this however, is valid, there is an io.puts that handles int, - // more on function overload in a bit - io.puts(2) -} diff --git a/examples/struct-functions.jt b/examples/struct-functions.jt deleted file mode 100644 index 38df73e..0000000 --- a/examples/struct-functions.jt +++ /dev/null @@ -1,60 +0,0 @@ -import io - -struct A { - int val1, - int val2 -} - -// self is injected and represents the struct A -// from the functions' definition -fn A:sum_fields() -> int { - self.val1 + self.val2 -} - -// type of sum_fields is: -// Func ([A] -> int) - -// the mut keyword signals that self is a "reference" -// to self, instead of a copy - -// however, what actually happens is that an instance of -// A is returned from the function implicitly - -fn mut A:incr_both_fields() { - self.val1++ - self.val2++ -} - -// and so, the type becomes: -// Func ([A] -> A) - -fn mut A:incr_and_sum () { - self.val1++ - self.val2++ - - self.val1 + self.val2 -} - -// type is: -// Func ([A] -> (A, int)) - -fn main () { - a := A{0, 0} - - a.incr_both_fields() - - /* - translates to: - a := incr_both_fields(a) - */ - - sum := a.sum_fields() - io.puts(sum) - - val = a.incr_and_sum() - - /* - translates to: - a, val := incr_and_sum(a) - */ -} diff --git a/examples/structs.jt b/examples/structs.jt deleted file mode 100644 index 229058d..0000000 --- a/examples/structs.jt +++ /dev/null @@ -1,14 +0,0 @@ -import io - -struct MyStruct { - int var1, - int var2, - int var3 -} - -fn main () { - st = MyStruct{1, 2, 3} - - // TODO: define a way for printable things - io.puts(st) -} diff --git a/jortsc/main.py b/jortsc/main.py index 16d1ec2..a1b1820 100644 --- a/jortsc/main.py +++ b/jortsc/main.py @@ -5,23 +5,21 @@ import pprint import logging from jortsc.parser.lexer import lex_jorts -from jortsc.parser.syntatic import syntatic +# from jortsc.parser.parser import parse logging.basicConfig(level=logging.DEBUG) def main(): """main entry point""" try: - in_data = sys.stdin.read() + in_data = sys.stdin.read().strip() except EOFError: pass + print(repr(in_data)) tokens = lex_jorts(in_data) pprint.pprint(tokens) - tree = syntatic(tokens) - print(tree) - if __name__ == '__main__': main() diff --git a/jortsc/parser/parser.py b/jortsc/parser/parser.py deleted file mode 100644 index 7333bd6..0000000 --- a/jortsc/parser/parser.py +++ /dev/null @@ -1,44 +0,0 @@ - -from lark import Lark - -GRAMMAR = """ -FN: "fn" -IMPORT: "import" -COMMA: "," -DOT: "." -SINGLE_COMMENT: "//" -NEWLINE: /(\\r?\\n)+\\s*/ -ANY: /.+/ -WHITESPACE: " " -INTEGER: /[0-9]+/ -ARROW: "->" -COM_START: "/*" -COM_END: "*/" -QUOTE: "\\"" - -identifier: WHITESPACE* ANY WHITESPACE* - -single_comment: SINGLE_COMMENT ANY* NEWLINE -multi_comment: COM_START ANY* COM_END - -import_stmt: IMPORT identifier NEWLINE - -fn_arg: identifier identifier -parameters: fn_arg (COMMA fn_arg) -fn_stmt: FN identifier? "(" parameters? ")" [ARROW identifier] "{" NEWLINE? [stmt NEWLINE]* "}" - -sign_int: "+" | "-" -string: QUOTE ANY* QUOTE -value: (sign_int* INTEGER) | string - -call_stmt: [identifier DOT] identifier "(" [value COMMA]* ")" - -stmt: value | import_stmt | fn_stmt | call_stmt - -start: (NEWLINE | stmt)* -""" - -def parse(string: str): - """Parse using Lark""" - parser = Lark(GRAMMAR, parser='lalr', debug=True) - return parser.parse(string) From b3ea9637bd515b9f8f0ae5af6cd1555dc8f91dd1 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 16:15:27 -0300 Subject: [PATCH 02/68] move to zig --- .gitignore | 117 +--------------- README.md | 18 +-- build.zig | 15 ++ jortsc/__init__.py | 3 - jortsc/main.py | 25 ---- jortsc/parser/__init__.py | 0 jortsc/parser/ast_nodes.py | 45 ------ jortsc/parser/lexer.py | 112 --------------- jortsc/parser/syntatic.py | 272 ------------------------------------- setup.py | 14 -- src/main.zig | 5 + 11 files changed, 23 insertions(+), 603 deletions(-) create mode 100644 build.zig delete mode 100644 jortsc/__init__.py delete mode 100644 jortsc/main.py delete mode 100644 jortsc/parser/__init__.py delete mode 100644 jortsc/parser/ast_nodes.py delete mode 100644 jortsc/parser/lexer.py delete mode 100644 jortsc/parser/syntatic.py delete mode 100644 setup.py create mode 100644 src/main.zig diff --git a/.gitignore b/.gitignore index 0447b8b..3cef7be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,116 +1 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +zig-cache/ diff --git a/README.md b/README.md index 9b3bc4e..2168721 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,5 @@ # jorts -jorts programming language +an interpreter for the lox language from https://craftinginterpreters.com -## installing - -```sh -git clone https://gitdab.com/luna/jorts -cd jorts -pip install --user --editable . -``` - -## using - -right now, its not pretty, nor finished - -``` -cat examples/hello.jt | jortsc -``` +this is a learning project. diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..371246c --- /dev/null +++ b/build.zig @@ -0,0 +1,15 @@ +const Builder = @import("std").build.Builder; + +pub fn build(b: *Builder) void { + const mode = b.standardReleaseOptions(); + const exe = b.addExecutable("jorts", "src/main.zig"); + exe.setBuildMode(mode); + + const run_cmd = exe.run(); + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + b.default_step.dependOn(&exe.step); + b.installArtifact(exe); +} diff --git a/jortsc/__init__.py b/jortsc/__init__.py deleted file mode 100644 index da3f54a..0000000 --- a/jortsc/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .main import main - -__all__ = ['main'] diff --git a/jortsc/main.py b/jortsc/main.py deleted file mode 100644 index a1b1820..0000000 --- a/jortsc/main.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/python3 - -import sys -import pprint -import logging - -from jortsc.parser.lexer import lex_jorts -# from jortsc.parser.parser import parse - -logging.basicConfig(level=logging.DEBUG) - -def main(): - """main entry point""" - try: - in_data = sys.stdin.read().strip() - except EOFError: - pass - - print(repr(in_data)) - tokens = lex_jorts(in_data) - pprint.pprint(tokens) - - -if __name__ == '__main__': - main() diff --git a/jortsc/parser/__init__.py b/jortsc/parser/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/jortsc/parser/ast_nodes.py b/jortsc/parser/ast_nodes.py deleted file mode 100644 index bff1c05..0000000 --- a/jortsc/parser/ast_nodes.py +++ /dev/null @@ -1,45 +0,0 @@ -from dataclasses import dataclass - -@dataclass -class TypedVar: - type_: str - name: str - - -@dataclass -class ReturnType: - type_: str - - -@dataclass -class Function: - name: str - arguments: str - ret_type: ReturnType - block: list - - -@dataclass -class Identifier: - name: str - - -@dataclass -class Import: - module: str - - -@dataclass -class String: - value: str - - -@dataclass -class Number: - value: str - - -@dataclass -class FunctionCall: - function: str - args: list diff --git a/jortsc/parser/lexer.py b/jortsc/parser/lexer.py deleted file mode 100644 index 7d61ff7..0000000 --- a/jortsc/parser/lexer.py +++ /dev/null @@ -1,112 +0,0 @@ -import re - -from dataclasses import dataclass -from enum import Enum, auto - - -class TokenType(Enum): - """Defines the type of a token""" - reserved = auto() - identifier = auto() - comment = auto() - comment_start = auto() - comment_end = auto() - whitespace = auto() - number = auto() - string = auto() - - -@dataclass -class Token: - value: str - type_: TokenType - - -class LexerError(Exception): - """Lexer error.""" - pass - - -TOKENS = [ - (r'[ \n\t]+', TokenType.whitespace), - - # single line comments and multiline comments - (r'//[^\n]*', TokenType.comment), - - # TODO: shouldnt this be /* */ instead of - # only tokenizing on the start and end? - (r'/\*', TokenType.comment_start), - (r'\*/', TokenType.comment_end), - - (r'fn', TokenType.reserved), - (r'if', TokenType.reserved), - (r'import', TokenType.reserved), - - (r'\(', TokenType.reserved), - (r'\)', TokenType.reserved), - - (r'\{', TokenType.reserved), - (r'\}', TokenType.reserved), - - (r'\-\>', TokenType.reserved), - (r'\.', TokenType.reserved), - - (r'\"[^\n]*\"', TokenType.string), - - # basic math ops - (r'[\+\-\/\*]', TokenType.reserved), - - (r'[0-9]+', TokenType.number), - (r'[A-Za-z][A-Za-z0-9_]*', TokenType.identifier) -] - - -def lex(string: str, token_defs: list) -> list: - """Generate tokens out of the given string.""" - pos = 0 - strlen = len(string) - tokens = [] - - # generate a dict for compiled regexes out of the token defs - # instead of compiling on each token definition per token. - compiled = {pattern: re.compile(pattern) - for pattern, _ in token_defs} - - # we use this instead of for pos in range(len(string)) because we - # need to increment pos to a whole token length's, and that wouldn't - # be easy on a for .. in range(..) - while pos < strlen: - valid = False - - for definition in token_defs: - pattern, tok_type = definition - regex = compiled[pattern] - - match = regex.match(string, pos) - - if not match: - continue - - text = match.group(0) - - # update pos to the end of the token - pos = match.end(0) - - valid = True - tokens.append(Token(text, tok_type)) - - # go to next token instead of checking other - # definitions for tokens, e.g if its a reserved token - # we shouldn't go down the path of an identifier. - break - - if not valid: - print(f'context: {pos} {len(string)} {string[pos-1:pos+20]!r}') - raise LexerError(f'Invalid character: {string[pos]!r}') - - return tokens - - -def lex_jorts(string: str) -> list: - """Lex with the jorts token definitions""" - return lex(string, TOKENS) diff --git a/jortsc/parser/syntatic.py b/jortsc/parser/syntatic.py deleted file mode 100644 index 7e9bc62..0000000 --- a/jortsc/parser/syntatic.py +++ /dev/null @@ -1,272 +0,0 @@ -from typing import Optional, Any, List - -from jortsc.parser.lexer import Token, TokenType -from jortsc.parser.ast_nodes import ( - Function, TypedVar, Identifier, Import, ReturnType, String, Number, - FunctionCall -) - - -class ParseError(Exception): - """Represents a parse error.""" - pass - - -class Reader: - """Main reader class""" - def __init__(self, tokens: List[Token]): - self.tokens = tokens - self.cur = 0 - - def __repr__(self): - return (f'') - - def peek(self) -> Optional[Token]: - """Peek at the current token.""" - try: - token = self.tokens[self.cur] - return token - except IndexError: - return None - - def next(self) -> Optional[Token]: - """Fetch the current token then skip to the next one.""" - token = self.peek() - self.cur += 1 - return token - - def expect(self, token_type: TokenType) -> Token: - """Check for a specific token type and error if it fails""" - token = self.next() - - if token.type_ != token_type: - raise ParseError(f'Expected {token_type}, got ' - f'{token.type_} {token.value!r}') - - return token - - def expect_val(self, value: str) -> Token: - """Check the next token to see if it matches against a given value, - instead of a type.""" - token = self.next() - - if token.value != value: - raise ParseError(f'Expected {value!r}, got ' - f'{token.type_} {token.value!r}') - - return token - - - def next_safe(self) -> Token: - """'Safe' version of next(). - - Raises an 'Unexpected EOF' error if next() returns None. - """ - token = self.next() - - if token is None: - raise ParseError('Unexpected EOF') - - return token - - def ignore(self, token_type: TokenType): - """Only increase self.cur if token_type is the upcoming token.""" - try: - assert self.tokens[self.cur].type_ == token_type - self.cur += 1 - except AssertionError: - pass - - -def _fn_read_args(reader: Reader, cur: List = None) -> List: - """Recursively read the arguments of the function.""" - if cur is None: - cur = [] - - # it can be an identifier for the arguments' type, OR a RPAREN - # if it is rparen, we stop - # if it isnt, we keep going until that happens - token = reader.next_safe() - - if token.value == ')': - return cur - - argtype = token - reader.expect(TokenType.whitespace) - argname = reader.next_safe() - - cur.append(TypedVar(argtype.value, argname.value)) - return _fn_read_args(reader, cur) - - -def _fn_ret_type(reader: Reader) -> ReturnType: - """Fetch the return type of a function. Defaults to void.""" - try: - reader.expect_val('->') - except ParseError: - return ReturnType('void') - - reader.ignore(TokenType.whitespace) - token = reader.expect(TokenType.identifier) - return ReturnType(token.value) - - -def read_function(reader: Reader): - """Read a function block.""" - reader.expect(TokenType.whitespace) - - token = reader.next() - - fn_name = '_anonymous' - fn_args = [] - - print('function token', token) - - if token.type_ == TokenType.identifier: - fn_name = token.value - - reader.expect(TokenType.whitespace) - reader.expect_val('(') - - fn_args = _fn_read_args(reader) - - reader.expect(TokenType.whitespace) - fn_ret_type = _fn_ret_type(reader) - - # only skip whitespace if we see it - reader.ignore(TokenType.whitespace) - block = read_start(reader) - elif token.value == '(': - fn_args = _fn_read_args(reader) - fn_ret_type = _fn_ret_type(reader) - block = read_start(reader) - - print('final function', fn_name, fn_args, fn_ret_type, block) - - return Function(fn_name, fn_args, fn_ret_type, block) - - -def read_import(reader): - """Read an import""" - reader.expect(TokenType.whitespace) - module = reader.next_safe() - return Import(module.value) - - -HANDLERS = { - 'fn': read_function, - 'import': read_import, -} - - -def read_reserved(token: Token, reader: Reader): - """Read reserved statements.""" - try: - handler = HANDLERS[token.value] - except KeyError: - raise ParseError(f'Unexpected reserved word {token.value!r}') - - return handler(reader) - - -def read_value(token: Token, _reader: Reader): - """Read a given value""" - if token.type_ == TokenType.string: - return String(token.value) - elif token.type_ == TokenType.number: - return Number(token.value) - - -def read_statement(token: Token, reader: Reader): - """Read a statement""" - # token is an identifier, so first check for a function call - - # TODO: handle more things than a function call - call_fn_name = token.value - token = reader.expect_val('(') - res = [] - - while True: - token = reader.next_safe() - - if token.value == ')': - break - - res.append(read_value(token, reader)) - - return FunctionCall(call_fn_name, res) - - -def read_start(reader: Reader): - """Read the start of a program.""" - print('reader', reader) - - token = reader.next() - - if token is None: - print('eof!') - return None - - ast = [] - res = [] - - # handle blocks - if token.value == '{': - # next can be a whitespace, or a } - token = reader.next() - - print('block start!, next:', token) - - if token.type_ == TokenType.whitespace: - # keep going on reading - while True: - token = reader.peek() - print('block append', token) - - if token.value == '}': - print('block end') - reader.next() - break - - res.extend(read_start(reader)) - elif token.value == '}': - res = [] - - # import, fn, etc - elif token.type_ == TokenType.reserved: - res = read_reserved(token, reader) - - elif token.type_ == TokenType.comment: - return [] - - elif token.type_ == TokenType.identifier: - res = read_statement(token, reader) - else: - res = read_value(token, reader) - - ast.append(res) - return ast - - -def read_loop(reader: Reader): - """Read the AST.""" - final_ast = [] - - while True: - ast = read_start(reader) - - # break when eof - if ast is None: - break - - # TODO: better ast cleanup - - final_ast.append(ast) - - return final_ast - - -def syntatic(tokens: List[Token]): - """Create an AST out of the tokens.""" - return read_loop(Reader(tokens)) diff --git a/setup.py b/setup.py deleted file mode 100644 index 45d4212..0000000 --- a/setup.py +++ /dev/null @@ -1,14 +0,0 @@ -from setuptools import setup - -setup( - name='jortsc', - version='0.1', - py_modules=['jortsc'], - install_requires=[ - 'lark-parser==0.6.7' - ], - entry_points=''' - [console_scripts] - jortsc=jortsc:main - ''' -) diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..128820d --- /dev/null +++ b/src/main.zig @@ -0,0 +1,5 @@ +const std = @import("std"); + +pub fn main() anyerror!void { + std.debug.warn("All your base are belong to us.\n"); +} From 31b0fa783c96b57972ded5795d6a2ca8e7615795 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 17:07:08 -0300 Subject: [PATCH 03/68] add file reads and main prompt --- src/main.zig | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/main.zig b/src/main.zig index 128820d..40d72e9 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,5 +1,53 @@ const std = @import("std"); -pub fn main() anyerror!void { - std.debug.warn("All your base are belong to us.\n"); +const Allocator = std.mem.Allocator; + +fn run(data: []u8) void {} + +fn runFile(allocator: *Allocator, path: []const u8) !void { + var lox_file = try std.fs.File.openRead(path); + defer lox_file.close(); + + const total_bytes = try lox_file.getEndPos(); + var slice = try allocator.alloc(u8, total_bytes); + _ = try lox_file.read(slice); + + run(slice); +} + +fn runPrompt(allocator: *Allocator) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + while (true) { + try stdout.print(">"); + var buffer = try std.Buffer.init(allocator, ""[0..]); + + var line = std.io.readLine(&buffer) catch |err| { + if (err == error.EndOfStream) return; + return err; + }; + + run(line); + } +} + +pub fn main() anyerror!void { + var da = std.heap.DirectAllocator.init(); + var arena = std.heap.ArenaAllocator.init(&da.allocator); + var allocator = &arena.allocator; + + var args_it = std.process.args(); + + const jorts_arg0 = try (args_it.next(allocator) orelse { + // if you ever reach this, tell me what is your os lmao + unreachable; + }); + + const lox_path = try (args_it.next(allocator) orelse { + try runPrompt(allocator); + return; + }); + + try runFile(allocator, lox_path); } From 9a2c50a53ef27e47753063b1f08458254261d4d5 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 21:23:51 -0300 Subject: [PATCH 04/68] add basic tokens and a basic lexer --- src/main.zig | 34 ++++++++++++++++-- src/scanner.zig | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ src/token.zig | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 3 deletions(-) create mode 100644 src/scanner.zig create mode 100644 src/token.zig diff --git a/src/main.zig b/src/main.zig index 40d72e9..71cfd6b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,8 +1,35 @@ const std = @import("std"); const Allocator = std.mem.Allocator; +const Scanner = @import("scanner.zig").Scanner; -fn run(data: []u8) void {} +pub var hadError = false; + +fn run(allocator: *Allocator, data: []u8) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + var scanner = Scanner.init(allocator, data); + var tokens = try scanner.scanTokens(); + var it = tokens.iterator(); + + while (it.next()) |token| { + try token.Simple.printToken(stdout); + hadError = false; + } +} + +pub fn doError(line: usize, message: []const u8) !void { + try errorReport(line, "", message); +} + +pub fn errorReport(line: usize, where: []const u8, message: []const u8) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + try stdout.print("[line {}] Error {}: {}\n", line, where, message); + hadError = true; +} fn runFile(allocator: *Allocator, path: []const u8) !void { var lox_file = try std.fs.File.openRead(path); @@ -12,7 +39,8 @@ fn runFile(allocator: *Allocator, path: []const u8) !void { var slice = try allocator.alloc(u8, total_bytes); _ = try lox_file.read(slice); - run(slice); + try run(allocator, slice); + if (hadError) std.os.exit(65); } fn runPrompt(allocator: *Allocator) !void { @@ -28,7 +56,7 @@ fn runPrompt(allocator: *Allocator) !void { return err; }; - run(line); + try run(allocator, line); } } diff --git a/src/scanner.zig b/src/scanner.zig new file mode 100644 index 0000000..c4f9100 --- /dev/null +++ b/src/scanner.zig @@ -0,0 +1,93 @@ +const std = @import("std"); + +const token = @import("token.zig"); +const main = @import("main.zig"); + +const TokenList = std.ArrayList(token.Token); + +pub const Scanner = struct { + source: []u8, + tokens: TokenList, + + start: usize = 0, + current: usize = 0, + line: usize = 1, + + pub fn init(allocator: *std.mem.Allocator, data: []u8) Scanner { + return Scanner{ + .source = data, + .tokens = TokenList.init(allocator), + }; + } + + fn isAtEnd(self: *Scanner) bool { + return self.current >= self.source.len; + } + + fn advance(self: *Scanner) u8 { + self.current += 1; + return self.source[self.current - 1]; + } + + fn currentLexeme(self: *Scanner) []u8 { + return self.source[self.start..self.current]; + } + + fn addSimpleToken(self: *Scanner, ttype: token.TokenType) !void { + try self.addToken(token.Token{ + .Simple = token.SimpleToken.init( + ttype, + self.currentLexeme(), + self.line, + {}, + ), + }); + } + + fn addToken( + self: *Scanner, + tok: token.Token, + ) !void { + try self.tokens.append(tok); + } + + fn scanToken(self: *Scanner) !void { + var c = self.advance(); + + switch (c) { + '(' => try self.addSimpleToken(.LEFT_PAREN), + ')' => try self.addSimpleToken(.RIGHT_PAREN), + '{' => try self.addSimpleToken(.LEFT_BRACE), + '}' => try self.addSimpleToken(.RIGHT_BRACE), + ',' => try self.addSimpleToken(.COMMA), + '.' => try self.addSimpleToken(.DOT), + '-' => try self.addSimpleToken(.MINUS), + '+' => try self.addSimpleToken(.PLUS), + ';' => try self.addSimpleToken(.SEMICOLON), + '*' => try self.addSimpleToken(.STAR), + else => { + try main.doError(self.line, "Unexpected character"); + }, + } + } + + pub fn scanTokens(self: *Scanner) !TokenList { + // while we aren't at the end, we're still consuming + // tokens. + while (!self.isAtEnd()) { + self.start = self.current; + try self.scanToken(); + } + + try self.addToken(token.Token{ + .Simple = token.SimpleToken.init( + .EOF, + "", + self.line, + {}, + ), + }); + + return self.tokens; + } +}; diff --git a/src/token.zig b/src/token.zig new file mode 100644 index 0000000..0ce1283 --- /dev/null +++ b/src/token.zig @@ -0,0 +1,93 @@ +const std = @import("std"); + +pub const TokenType = enum { + // Single-character tokens. + LEFT_PAREN, + RIGHT_PAREN, + LEFT_BRACE, + RIGHT_BRACE, + COMMA, + DOT, + MINUS, + PLUS, + SEMICOLON, + SLASH, + STAR, + + // One or two character tokens. + BANG, + BANG_EQUAL, + EQUAL, + EQUAL_EQUAL, + GREATER, + GREATER_EQUAL, + LESS, + LESS_EQUAL, + + // Literals. + IDENTIFIER, + STRING, + NUMBER, + + // Keywords. + AND, + CLASS, + ELSE, + FALSE, + FUN, + FOR, + IF, + NIL, + OR, + PRINT, + RETURN, + SUPER, + THIS, + TRUE, + VAR, + WHILE, + + EOF, +}; + +pub fn TokenFactory( + comptime T: type, +) type { + return struct { + const Self = @This(); + + ttype: TokenType, + lexeme: []u8, + line: usize, + literal: T, + + pub fn init( + ttype: TokenType, + lexeme: []u8, + line: usize, + literal: T, + ) Self { + return Self{ + .ttype = ttype, + .lexeme = lexeme, + .line = line, + .literal = literal, + }; + } + + pub fn printToken(self: Self, stdout: var) !void { + try stdout.print( + "Token(type={x}, lexeme='{}', line={}\n", + self.ttype, + self.lexeme, + self.line, + ); + } + }; +} + +pub const SimpleToken = TokenFactory(void); + +pub const Token = union { + Simple: SimpleToken, +}; From d1db7a0bd976416ea5f38744f0075fa2d47ba56e Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 21:46:18 -0300 Subject: [PATCH 05/68] add consumption of comments and strings --- src/main.zig | 10 +++++- src/scanner.zig | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ src/token.zig | 26 +++++++++++---- 3 files changed, 116 insertions(+), 8 deletions(-) diff --git a/src/main.zig b/src/main.zig index 71cfd6b..3423204 100644 --- a/src/main.zig +++ b/src/main.zig @@ -14,7 +14,15 @@ fn run(allocator: *Allocator, data: []u8) !void { var it = tokens.iterator(); while (it.next()) |token| { - try token.Simple.printToken(stdout); + switch (token) { + .Simple => |value| { + try value.printToken(stdout); + }, + .Slice => |value| { + try value.printToken(stdout); + }, + } + hadError = false; } } diff --git a/src/scanner.zig b/src/scanner.zig index c4f9100..8bbc5a8 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -44,6 +44,17 @@ pub const Scanner = struct { }); } + fn addSliceToken(self: *Scanner, ttype: token.TokenType, slice: []u8) !void { + try self.addToken(token.Token{ + .Slice = token.SliceToken.init( + ttype, + self.currentLexeme(), + self.line, + slice, + ), + }); + } + fn addToken( self: *Scanner, tok: token.Token, @@ -51,6 +62,59 @@ pub const Scanner = struct { try self.tokens.append(tok); } + /// Check if the next character matches what is expected. + fn match(self: *Scanner, expected: u8) bool { + if (self.isAtEnd()) return false; + if (self.source[self.current] != expected) return false; + + self.current += 1; + return true; + } + + /// Add a SimpleToken of type_match if the next character is + /// `expected`. Adds a SimpleToken of type_nomatch when it is not. + fn addMatchToken( + self: *Scanner, + expected: u8, + type_match: token.TokenType, + type_nomatch: token.TokenType, + ) !void { + if (self.match(expected)) { + try self.addSimpleToken(type_match); + } else { + try self.addSimpleToken(type_nomatch); + } + } + + fn peek(self: *Scanner) u8 { + if (self.isAtEnd()) return 0; + return self.source[self.current]; + } + + fn doString(self: *Scanner) !void { + // consume entire string + while (self.peek() != '"' and !self.isAtEnd()) { + if (self.peek() == '\n') self.line += 1; + _ = self.advance(); + } + + // unterminated string. + if (self.isAtEnd()) { + try main.doError(self.line, "Unterminated string."); + return; + } + + // the closing ". + _ = self.advance(); + + // trim the surrounding quotes. + try self.addSliceToken( + .STRING, + self.source[self.start + 1 .. self.current - 1], + ); + } + + /// Scan through our tokens and add them to the Scanner's token list. fn scanToken(self: *Scanner) !void { var c = self.advance(); @@ -65,6 +129,30 @@ pub const Scanner = struct { '+' => try self.addSimpleToken(.PLUS), ';' => try self.addSimpleToken(.SEMICOLON), '*' => try self.addSimpleToken(.STAR), + + '!' => try self.addMatchToken('=', .BANG_EQUAL, .BANG), + '=' => try self.addMatchToken('=', .EQUAL_EQUAL, .EQUAL), + '<' => try self.addMatchToken('=', .LESS_EQUAL, .LESS), + '>' => try self.addMatchToken('=', .GREATER_EQUAL, .GREATER), + + '/' => blk: { + // consume comments + if (self.match('/')) { + while (self.peek() != '\n' and !self.isAtEnd()) { + _ = self.advance(); + } + } else { + try self.addSimpleToken(.SLASH); + } + }, + + ' ', '\r', '\t' => blk: {}, + '\n' => blk: { + self.line += 1; + }, + + '"' => try self.doString(), + else => { try main.doError(self.line, "Unexpected character"); }, diff --git a/src/token.zig b/src/token.zig index 0ce1283..4460dad 100644 --- a/src/token.zig +++ b/src/token.zig @@ -76,18 +76,30 @@ pub fn TokenFactory( } pub fn printToken(self: Self, stdout: var) !void { - try stdout.print( - "Token(type={x}, lexeme='{}', line={}\n", - self.ttype, - self.lexeme, - self.line, - ); + if (T == void) { + try stdout.print( + "Token(type={x}, lexeme='{}', line={})\n", + self.ttype, + self.lexeme, + self.line, + ); + } else { + try stdout.print( + "Token(type={x}, lexeme='{}', line={} literal='{}')\n", + self.ttype, + self.lexeme, + self.line, + self.literal, + ); + } } }; } pub const SimpleToken = TokenFactory(void); +pub const SliceToken = TokenFactory([]u8); -pub const Token = union { +pub const Token = union(enum) { Simple: SimpleToken, + Slice: SliceToken, }; From 69aa7b493da7a0536199ed4a69c734567ab6cbfc Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 22:08:41 -0300 Subject: [PATCH 06/68] add number tokens --- src/main.zig | 3 +++ src/scanner.zig | 53 ++++++++++++++++++++++++++++++++++++++++++++++++- src/token.zig | 2 ++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index 3423204..807041d 100644 --- a/src/main.zig +++ b/src/main.zig @@ -21,6 +21,9 @@ fn run(allocator: *Allocator, data: []u8) !void { .Slice => |value| { try value.printToken(stdout); }, + .Number => |value| { + try value.printToken(stdout); + }, } hadError = false; diff --git a/src/scanner.zig b/src/scanner.zig index 8bbc5a8..051a98f 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -5,6 +5,10 @@ const main = @import("main.zig"); const TokenList = std.ArrayList(token.Token); +fn isDigit(char: u8) bool { + return char >= '0' and char <= '9'; +} + pub const Scanner = struct { source: []u8, tokens: TokenList, @@ -55,6 +59,18 @@ pub const Scanner = struct { }); } + /// Keep in mind Lox only has a single number type and that is a float one. + fn addNumberToken(self: *Scanner, ttype: token.TokenType, num: f32) !void { + try self.addToken(token.Token{ + .Number = token.NumberToken.init( + ttype, + self.currentLexeme(), + self.line, + num, + ), + }); + } + fn addToken( self: *Scanner, tok: token.Token, @@ -114,6 +130,37 @@ pub const Scanner = struct { ); } + fn peekNext(self: *Scanner) u8 { + if (self.current + 1 >= self.source.len) return 0; + return self.source[self.current + 1]; + } + + /// Consume a number + fn doNumber(self: *Scanner) !void { + while (isDigit(self.peek())) { + _ = self.advance(); + } + + // check if its a number like 12.34, where the '.' character + // exists and the one next to it is a digit. + if (self.peek() == '.' and isDigit(self.peekNext())) { + _ = self.advance(); + + while (isDigit(self.peek())) { + _ = self.advance(); + } + } + + // after going through all of the number, we can just use fmt.parseFloat + + var num = try std.fmt.parseFloat( + f32, + self.source[self.start..self.current], + ); + + try self.addNumberToken(.NUMBER, num); + } + /// Scan through our tokens and add them to the Scanner's token list. fn scanToken(self: *Scanner) !void { var c = self.advance(); @@ -154,7 +201,11 @@ pub const Scanner = struct { '"' => try self.doString(), else => { - try main.doError(self.line, "Unexpected character"); + if (isDigit(c)) { + try self.doNumber(); + } else { + try main.doError(self.line, "Unexpected character"); + } }, } } diff --git a/src/token.zig b/src/token.zig index 4460dad..c4400a3 100644 --- a/src/token.zig +++ b/src/token.zig @@ -98,8 +98,10 @@ pub fn TokenFactory( pub const SimpleToken = TokenFactory(void); pub const SliceToken = TokenFactory([]u8); +pub const NumberToken = TokenFactory(f32); pub const Token = union(enum) { Simple: SimpleToken, Slice: SliceToken, + Number: NumberToken, }; From 9d4c1249b4db4a1ea0800893f29e80ac514f12e7 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 22:34:10 -0300 Subject: [PATCH 07/68] add keyword map when scanner is initialized - add basic identifier reading --- src/main.zig | 2 +- src/scanner.zig | 77 ++++++++++++++++++++++++++++++++++++++++++++++++- src/token.zig | 2 +- 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/main.zig b/src/main.zig index 807041d..3dd56e1 100644 --- a/src/main.zig +++ b/src/main.zig @@ -9,7 +9,7 @@ fn run(allocator: *Allocator, data: []u8) !void { var stdout_file = try std.io.getStdOut(); const stdout = &stdout_file.outStream().stream; - var scanner = Scanner.init(allocator, data); + var scanner = try Scanner.init(allocator, data); var tokens = try scanner.scanTokens(); var it = tokens.iterator(); diff --git a/src/scanner.zig b/src/scanner.zig index 051a98f..020cd53 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -4,23 +4,88 @@ const token = @import("token.zig"); const main = @import("main.zig"); const TokenList = std.ArrayList(token.Token); +const TokenType = token.TokenType; fn isDigit(char: u8) bool { return char >= '0' and char <= '9'; } +fn isAlpha(c: u8) bool { + return (c >= 'a' and c <= 'z') or + (c >= 'A' and c <= 'Z') or + c == '_'; +} + +fn isAlphaNumeric(char: u8) bool { + return isAlpha(char) or isDigit(char); +} + +// hashmaps don't work on HashMaps for some reason. anyways. +pub const KeywordMap = std.AutoHashMap([]const u8, u6); + +fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { + var map = KeywordMap.init(allocator); + + const keywords = [][]const u8{ + "and"[0..], + "class"[0..], + "else"[0..], + "false"[0..], + "for"[0..], + "fun"[0..], + "if"[0..], + "nil"[0..], + "or"[0..], + "print"[0..], + "return"[0..], + "super"[0..], + "this"[0..], + "true"[0..], + "var"[0..], + "while"[0..], + }; + + const tags = []TokenType{ + TokenType.AND, + TokenType.CLASS, + TokenType.ELSE, + TokenType.FALSE, + TokenType.FOR, + TokenType.FUN, + TokenType.IF, + TokenType.NIL, + TokenType.OR, + TokenType.PRINT, + TokenType.RETURN, + TokenType.SUPER, + TokenType.THIS, + TokenType.TRUE, + TokenType.VAR, + TokenType.WHILE, + }; + + for (keywords) |keyword, idx| { + var tag = @enumToInt(tags[idx]); + _ = try map.put(keyword, tag); + } + + return map; +} + pub const Scanner = struct { source: []u8, tokens: TokenList, + keywords: KeywordMap, start: usize = 0, current: usize = 0, line: usize = 1, - pub fn init(allocator: *std.mem.Allocator, data: []u8) Scanner { + pub fn init(allocator: *std.mem.Allocator, data: []u8) !Scanner { return Scanner{ .source = data, .tokens = TokenList.init(allocator), + .keywords = try initKeywordMap(allocator), }; } @@ -161,6 +226,14 @@ pub const Scanner = struct { try self.addNumberToken(.NUMBER, num); } + fn doIdentifier(self: *Scanner) !void { + while (isAlphaNumeric(self.peek())) { + _ = self.advance(); + } + + try self.addSimpleToken(.IDENTIFIER); + } + /// Scan through our tokens and add them to the Scanner's token list. fn scanToken(self: *Scanner) !void { var c = self.advance(); @@ -203,6 +276,8 @@ pub const Scanner = struct { else => { if (isDigit(c)) { try self.doNumber(); + } else if (isAlpha(c)) { + try self.doIdentifier(); } else { try main.doError(self.line, "Unexpected character"); } diff --git a/src/token.zig b/src/token.zig index c4400a3..9e88ee1 100644 --- a/src/token.zig +++ b/src/token.zig @@ -1,6 +1,6 @@ const std = @import("std"); -pub const TokenType = enum { +pub const TokenType = enum(u6) { // Single-character tokens. LEFT_PAREN, RIGHT_PAREN, From bba969922fb7dc8a80ef52285cafbc625f964335 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 22:39:53 -0300 Subject: [PATCH 08/68] add reading of keywords on doIdentifier --- src/scanner.zig | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/scanner.zig b/src/scanner.zig index 020cd53..f2073d1 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -231,7 +231,20 @@ pub const Scanner = struct { _ = self.advance(); } - try self.addSimpleToken(.IDENTIFIER); + // after reading the identifier, we check + // if it is any of our keywords, if it is, then we add + // the specificed keyword type. if not, just .IDENTIFIER + var text = self.source[self.start..self.current]; + var type_opt = self.keywords.get(text); + var toktype: TokenType = undefined; + + if (type_opt) |kv| { + toktype = @intToEnum(TokenType, kv.value); + } else { + toktype = TokenType.IDENTIFIER; + } + + try self.addSimpleToken(toktype); } /// Scan through our tokens and add them to the Scanner's token list. From 756f85d77de1490c7df09d36463801a3b5349172 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 22:45:23 -0300 Subject: [PATCH 09/68] add multiline block comments --- src/scanner.zig | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/scanner.zig b/src/scanner.zig index f2073d1..0cffde2 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -272,6 +272,19 @@ pub const Scanner = struct { // consume comments if (self.match('/')) { while (self.peek() != '\n' and !self.isAtEnd()) { + _ = self.advance(); + } + } else if (self.match('*')) { + // multiline block comments are messier to work with, but + // we can still do it! + while (true) { + if (self.isAtEnd()) break; + // check '*/' + if (self.peek() == '*' and self.peekNext() == '/') { + self.current += 2; + break; + } + _ = self.advance(); } } else { From 3b73978f40c5b198b1769ffc6c67547afe535ec0 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 23:37:45 -0300 Subject: [PATCH 10/68] add make_exprs.py script --- scripts/make_exprs.py | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100755 scripts/make_exprs.py diff --git a/scripts/make_exprs.py b/scripts/make_exprs.py new file mode 100755 index 0000000..97ed36f --- /dev/null +++ b/scripts/make_exprs.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3.6 + +from pathlib import Path + +EXPR_TYPES = { + 'Binary': ('left: Expr', 'operator: Token', 'right: Expr'), + 'Grouping': ('expression: Expr'), + 'Unary': ('operator: Token', 'right: Expr'), + + # NOTE: when adding new Literals, add new Literal types, instead of just + # doing Literal with an 'Object value'. it won't work. +} + +def _gen_expr_decls(): + res = [] + + for expr_type, expr_params in EXPR_TYPES.items(): + res.append(f'pub const {expr_type} = struct {{') + + for param in expr_params: + res.append(f'{param},') + + return '\n'.join(res) + + +def do_base_union(union_name: str): + res = [ + 'const Token = @import("token.zig").Token;\n' + ] + + res.extend(_gen_expr_decls()) + res.append(f'pub const {union_name} = union {{') + + for expr_type in EXPR_TYPES: + res.append(f'{expr_type}: {expr_type},') + + res.append('}\n') + + return '\n'.join(res) + + +def main(): + expr_file = Path('./src') / 'expr.zig' + expr_file.unlink() + expr_file.write_text(do_base_union('Expr')) + + +if __name__ == '__main__': + main() From 8007df6853ce4832ce5e7dc20a4253efc7d49a6f Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 23:38:21 -0300 Subject: [PATCH 11/68] add mypy_cache to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3cef7be..63487f0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ zig-cache/ +*.mypy_cache* From 6ff75a09267f078127c03616c8da2909f791672b Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 31 May 2019 23:43:46 -0300 Subject: [PATCH 12/68] finish make_exprs.py script - add src/expr.zig --- scripts/make_exprs.py | 22 ++++++++++++++++------ src/expr.zig | 28 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 src/expr.zig diff --git a/scripts/make_exprs.py b/scripts/make_exprs.py index 97ed36f..df4020f 100755 --- a/scripts/make_exprs.py +++ b/scripts/make_exprs.py @@ -4,7 +4,7 @@ from pathlib import Path EXPR_TYPES = { 'Binary': ('left: Expr', 'operator: Token', 'right: Expr'), - 'Grouping': ('expression: Expr'), + 'Grouping': ('expression: Expr',), 'Unary': ('operator: Token', 'right: Expr'), # NOTE: when adding new Literals, add new Literal types, instead of just @@ -18,7 +18,9 @@ def _gen_expr_decls(): res.append(f'pub const {expr_type} = struct {{') for param in expr_params: - res.append(f'{param},') + res.append(f' {param},') + + res.append('};\n') return '\n'.join(res) @@ -28,19 +30,27 @@ def do_base_union(union_name: str): 'const Token = @import("token.zig").Token;\n' ] - res.extend(_gen_expr_decls()) - res.append(f'pub const {union_name} = union {{') + res.append(_gen_expr_decls()) + + res.append(f'pub const {union_name}Type = enum {{') + for expr_type in EXPR_TYPES: + res.append(f' {expr_type},') + res.append('};\n') + + + res.append(f'pub const {union_name} = union({union_name}Type) {{') for expr_type in EXPR_TYPES: - res.append(f'{expr_type}: {expr_type},') + res.append(f' {expr_type}: {expr_type},') - res.append('}\n') + res.append('};\n') return '\n'.join(res) def main(): expr_file = Path('./src') / 'expr.zig' + expr_file.touch() expr_file.unlink() expr_file.write_text(do_base_union('Expr')) diff --git a/src/expr.zig b/src/expr.zig new file mode 100644 index 0000000..e00e76f --- /dev/null +++ b/src/expr.zig @@ -0,0 +1,28 @@ +const Token = @import("token.zig").Token; + +pub const Binary = struct { + left: Expr, + operator: Token, + right: Expr, +}; + +pub const Grouping = struct { + expression: Expr, +}; + +pub const Unary = struct { + operator: Token, + right: Expr, +}; + +pub const ExprType = enum { + Binary, + Grouping, + Unary, +}; + +pub const Expr = union(ExprType) { + Binary: Binary, + Grouping: Grouping, + Unary: Unary, +}; From 727a25963896416ddb49adff324e46316e3386bc Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 00:04:04 -0300 Subject: [PATCH 13/68] rm make_exprs.py, moving to the c part --- .gitignore | 1 - scripts/make_exprs.py | 59 ------------------------------------------- 2 files changed, 60 deletions(-) delete mode 100755 scripts/make_exprs.py diff --git a/.gitignore b/.gitignore index 63487f0..3cef7be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1 @@ zig-cache/ -*.mypy_cache* diff --git a/scripts/make_exprs.py b/scripts/make_exprs.py deleted file mode 100755 index df4020f..0000000 --- a/scripts/make_exprs.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/python3.6 - -from pathlib import Path - -EXPR_TYPES = { - 'Binary': ('left: Expr', 'operator: Token', 'right: Expr'), - 'Grouping': ('expression: Expr',), - 'Unary': ('operator: Token', 'right: Expr'), - - # NOTE: when adding new Literals, add new Literal types, instead of just - # doing Literal with an 'Object value'. it won't work. -} - -def _gen_expr_decls(): - res = [] - - for expr_type, expr_params in EXPR_TYPES.items(): - res.append(f'pub const {expr_type} = struct {{') - - for param in expr_params: - res.append(f' {param},') - - res.append('};\n') - - return '\n'.join(res) - - -def do_base_union(union_name: str): - res = [ - 'const Token = @import("token.zig").Token;\n' - ] - - res.append(_gen_expr_decls()) - - res.append(f'pub const {union_name}Type = enum {{') - for expr_type in EXPR_TYPES: - res.append(f' {expr_type},') - res.append('};\n') - - - res.append(f'pub const {union_name} = union({union_name}Type) {{') - - for expr_type in EXPR_TYPES: - res.append(f' {expr_type}: {expr_type},') - - res.append('};\n') - - return '\n'.join(res) - - -def main(): - expr_file = Path('./src') / 'expr.zig' - expr_file.touch() - expr_file.unlink() - expr_file.write_text(do_base_union('Expr')) - - -if __name__ == '__main__': - main() From c4401dc8cfc2dcf5c46cab13c5d9865831d5529f Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 01:20:06 -0300 Subject: [PATCH 14/68] moving to the virtual machine part of the book, pt 1 the java part would still fuck me up since it lies on OOP stuff that zig doesnt provide, so i'm skipping towards the C part of the book which will hopefully be more understandable from a zig perspective. --- src/chunk.zig | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ src/compiler.zig | 13 +++++++++ src/expr.zig | 28 ------------------- src/main.zig | 41 +++++++++++++++++++++++----- src/vm.zig | 7 +++++ 5 files changed, 125 insertions(+), 35 deletions(-) create mode 100644 src/chunk.zig create mode 100644 src/compiler.zig delete mode 100644 src/expr.zig create mode 100644 src/vm.zig diff --git a/src/chunk.zig b/src/chunk.zig new file mode 100644 index 0000000..5553975 --- /dev/null +++ b/src/chunk.zig @@ -0,0 +1,71 @@ +const std = @import("std"); + +const Allocator = std.mem.Allocator; + +// hack. ugly hack. zig has compiler crash. +const AllOpcodes = struct { + pub Return: u8 = 0, +}; + +pub const OpCode = AllOpcodes{}; + +fn simpleInstruction( + stdout: var, + comptime name: []const u8, + offset: usize, +) !usize { + try stdout.print("{}\n", name); + return offset + 1; +} + +pub const Chunk = struct { + count: usize, + code: []u8, + allocator: *Allocator, + + pub fn init(allocator: *Allocator) !Chunk { + return Chunk{ + .count = 0, + .allocator = allocator, + .code = try allocator.alloc(u8, 0), + }; + } + + pub fn write(self: *Chunk, byte: u8) !void { + if (self.code.len < self.count + 1) { + self.code = try self.allocator.realloc( + self.code, + self.count + 1, + ); + } + + self.code[self.count] = byte; + self.count += 1; + } + + pub fn disassembleInstruction( + self: *Chunk, + stdout: var, + index: usize, + ) !usize { + try stdout.print("{} ", index); + + var instruction = self.code[index]; + + if (instruction == 0) { + return try simpleInstruction(stdout, "OP_RETURN", index); + } else { + try stdout.print("Unknown opcode: {}\n", instruction); + return index + 1; + } + } + + pub fn disassemble(self: *Chunk, stdout: var, name: []const u8) !void { + try stdout.print("== {} ==\n", name); + + var i: usize = 0; + while (i < self.count) : (i += 1) { + i = try self.disassembleInstruction(stdout, i); + } + } +}; diff --git a/src/compiler.zig b/src/compiler.zig new file mode 100644 index 0000000..28e9f15 --- /dev/null +++ b/src/compiler.zig @@ -0,0 +1,13 @@ +const token = @import("token.zig"); +const scanner = @import("scanner.zig"); +const main = @import("main.zig"); + +pub const Compiler = struct { + tokens: *scanner.TokenList, + + fn init(tokens: *scanner.TokenList) Compiler { + return Compiler{ .tokens = tokens }; + } + + fn advance(self: *Compiler) void {} +}; diff --git a/src/expr.zig b/src/expr.zig deleted file mode 100644 index e00e76f..0000000 --- a/src/expr.zig +++ /dev/null @@ -1,28 +0,0 @@ -const Token = @import("token.zig").Token; - -pub const Binary = struct { - left: Expr, - operator: Token, - right: Expr, -}; - -pub const Grouping = struct { - expression: Expr, -}; - -pub const Unary = struct { - operator: Token, - right: Expr, -}; - -pub const ExprType = enum { - Binary, - Grouping, - Unary, -}; - -pub const Expr = union(ExprType) { - Binary: Binary, - Grouping: Grouping, - Unary: Unary, -}; diff --git a/src/main.zig b/src/main.zig index 3dd56e1..c895b02 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,7 +1,11 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const Scanner = @import("scanner.zig").Scanner; + +// const Scanner = @import("scanner.zig").Scanner; +const chunk = @import("chunk.zig"); + +//const Compiler = @import("compiler.zig").Compiler; pub var hadError = false; @@ -11,6 +15,7 @@ fn run(allocator: *Allocator, data: []u8) !void { var scanner = try Scanner.init(allocator, data); var tokens = try scanner.scanTokens(); + var it = tokens.iterator(); while (it.next()) |token| { @@ -30,6 +35,8 @@ fn run(allocator: *Allocator, data: []u8) !void { } } +// fn run() !void {} + pub fn doError(line: usize, message: []const u8) !void { try errorReport(line, "", message); } @@ -71,22 +78,42 @@ fn runPrompt(allocator: *Allocator) !void { } } -pub fn main() anyerror!void { +pub fn mainOld() anyerror!void { var da = std.heap.DirectAllocator.init(); var arena = std.heap.ArenaAllocator.init(&da.allocator); + defer arena.deinit(); var allocator = &arena.allocator; var args_it = std.process.args(); - const jorts_arg0 = try (args_it.next(allocator) orelse { + var jorts_arg0 = try (args_it.next(allocator) orelse { // if you ever reach this, tell me what is your os lmao unreachable; }); - const lox_path = try (args_it.next(allocator) orelse { - try runPrompt(allocator); - return; + var lox_path = try (args_it.next(allocator) orelse { + // try runPrompt(allocator); + unreachable; }); - try runFile(allocator, lox_path); + //var vm = VM.init(); + //try runFile(allocator, lox_path); +} + +pub fn main() !void { + var da = std.heap.DirectAllocator.init(); + var arena = std.heap.ArenaAllocator.init(&da.allocator); + defer arena.deinit(); + var allocator = &arena.allocator; + + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + var chk = try chunk.Chunk.init(allocator); + + // this crashes zig??? lol + //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); + try chk.write(chunk.OpCode.Return); + + try chk.disassemble(stdout, "test chunk"); } diff --git a/src/vm.zig b/src/vm.zig new file mode 100644 index 0000000..b70fb93 --- /dev/null +++ b/src/vm.zig @@ -0,0 +1,7 @@ +const Chunk = @import("chunk.zig"); + +pub const VM = struct { + chunk: *Chunk, + + pub fn init() VM {} +}; From ba78b39300821d0367aa6abdc0b4b86226231bfe Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 01:46:01 -0300 Subject: [PATCH 15/68] add constant values to the virtual machine --- src/chunk.zig | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- src/main.zig | 7 ++++++- src/value.zig | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 src/value.zig diff --git a/src/chunk.zig b/src/chunk.zig index 5553975..cbb5317 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -1,10 +1,12 @@ const std = @import("std"); +const value = @import("value.zig"); const Allocator = std.mem.Allocator; // hack. ugly hack. zig has compiler crash. const AllOpcodes = struct { - pub Return: u8 = 0, + pub Constant: u8 = 0, + pub Return: u8 = 1, }; pub const OpCode = AllOpcodes{}; @@ -18,31 +20,63 @@ fn simpleInstruction( return offset + 1; } +fn constantInstruction( + stdout: var, + comptime name: []const u8, + chunk: *Chunk, + offset: usize, +) !usize { + // get the constant's index in constants slice + var idx = chunk.code[offset + 1]; + + try stdout.print("\t{}\t{} '", name, idx); + try value.printValue(stdout, chunk.constants.values[idx]); + try stdout.print("'\n"); + + return offset + 2; +} + pub const Chunk = struct { count: usize, + lines: []usize, code: []u8, + allocator: *Allocator, + constants: value.ValueList, pub fn init(allocator: *Allocator) !Chunk { return Chunk{ .count = 0, .allocator = allocator, .code = try allocator.alloc(u8, 0), + .lines = try allocator.alloc(usize, 0), + .constants = try value.ValueList.init(allocator), }; } - pub fn write(self: *Chunk, byte: u8) !void { + pub fn write(self: *Chunk, byte: u8, line: usize) !void { if (self.code.len < self.count + 1) { self.code = try self.allocator.realloc( self.code, self.count + 1, ); + + self.lines = try self.allocator.realloc( + self.lines, + self.count + 1, + ); } self.code[self.count] = byte; + self.lines[self.count] = line; self.count += 1; } + pub fn addConstant(self: *Chunk, val: value.Value) !u8 { + try self.constants.write(val); + return self.constants.count - 1; + } + pub fn disassembleInstruction( self: *Chunk, stdout: var, @@ -50,10 +84,18 @@ pub const Chunk = struct { ) !usize { try stdout.print("{} ", index); + if (index > 0 and self.lines[index] == self.lines[index - 1]) { + try stdout.print(" | "); + } else { + try stdout.print("{} ", self.lines[index]); + } + var instruction = self.code[index]; - if (instruction == 0) { + if (instruction == OpCode.Return) { return try simpleInstruction(stdout, "OP_RETURN", index); + } else if (instruction == OpCode.Constant) { + return try constantInstruction(stdout, "OP_CONSTANT", self, index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; @@ -64,7 +106,7 @@ pub const Chunk = struct { try stdout.print("== {} ==\n", name); var i: usize = 0; - while (i < self.count) : (i += 1) { + while (i < self.count) { i = try self.disassembleInstruction(stdout, i); } } diff --git a/src/main.zig b/src/main.zig index c895b02..46801f3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -113,7 +113,12 @@ pub fn main() !void { // this crashes zig??? lol //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); - try chk.write(chunk.OpCode.Return); + //try chk.write(chunk.OpCode.Return); + + var constant = try chk.addConstant(1.2); + try chk.write(chunk.OpCode.Constant, 123); + try chk.write(constant, 123); + try chk.write(chunk.OpCode.Return, 123); try chk.disassemble(stdout, "test chunk"); } diff --git a/src/value.zig b/src/value.zig new file mode 100644 index 0000000..a84a79e --- /dev/null +++ b/src/value.zig @@ -0,0 +1,36 @@ +const std = @import("std"); + +const Allocator = std.mem.Allocator; + +// NOTE: right now, only numbers. +pub const Value = f64; + +pub fn printValue(stdout: var, value: Value) !void { + try stdout.print("{}", value); +} + +pub const ValueList = struct { + count: u8, + values: []Value, + allocator: *Allocator, + + pub fn init(allocator: *Allocator) !ValueList { + return ValueList{ + .count = 0, + .allocator = allocator, + .values = try allocator.alloc(Value, 0), + }; + } + + pub fn write(self: *ValueList, value: Value) !void { + if (self.values.len < self.count + 1) { + self.values = try self.allocator.realloc( + self.values, + self.count + 1, + ); + } + + self.values[self.count] = value; + self.count += 1; + } +}; From 2d33e03efb9d9c0c3d40929ca3745638f9d5b99f Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 02:06:23 -0300 Subject: [PATCH 16/68] add incomplete ConstantLong instruction - move ValueList's count to usize for ConstantLong --- src/chunk.zig | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/main.zig | 4 +--- src/value.zig | 2 +- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index cbb5317..148b42e 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -6,7 +6,8 @@ const Allocator = std.mem.Allocator; // hack. ugly hack. zig has compiler crash. const AllOpcodes = struct { pub Constant: u8 = 0, - pub Return: u8 = 1, + pub ConstantLong: u8 = 1, + pub Return: u8 = 2, }; pub const OpCode = AllOpcodes{}; @@ -36,6 +37,32 @@ fn constantInstruction( return offset + 2; } +fn constantLongInstruction( + stdout: var, + comptime name: []const u8, + chunk: *Chunk, + offset: usize, +) !usize { + // get the constant's index in constants slice + var v0: u8 = chunk.code[offset + 3]; + var v1: u8 = chunk.code[offset + 2]; + var v2: u8 = chunk.code[offset + 1]; + + // TODO: this does not work. just decreased first term + // to fix a compile error. + + // we should also move the actual printing into its own + // function too since constantInstruction and + // constantLongInstruction share code. + var idx: u24 = (v2 << 4) | (v1 << 7) | v0; + + try stdout.print("\t{}\t{} '", name, idx); + try value.printValue(stdout, chunk.constants.values[idx]); + try stdout.print("'\n"); + + return offset + 2; +} + pub const Chunk = struct { count: usize, lines: []usize, @@ -77,6 +104,24 @@ pub const Chunk = struct { return self.constants.count - 1; } + pub fn writeConstant(self: *Chunk, val: value.Value, line: usize) !void { + try self.constants.write(val); + var constant_idx = self.constants.count - 1; + + if (constant_idx < 256) { + try self.write(OpCode.Constant, line); + try self.write(@intCast(u8, constant_idx), line); + } else { + // TODO: convert the usize to u24, and from + // u24, split it into three u8's. + + // also convert from u8 back to u24. + // i know that we can do from two u8's to go to a u16 + // with (v1 << 7) | v0. + try self.write(0, line); + } + } + pub fn disassembleInstruction( self: *Chunk, stdout: var, @@ -96,6 +141,13 @@ pub const Chunk = struct { return try simpleInstruction(stdout, "OP_RETURN", index); } else if (instruction == OpCode.Constant) { return try constantInstruction(stdout, "OP_CONSTANT", self, index); + } else if (instruction == OpCode.ConstantLong) { + return try constantLongInstruction( + stdout, + "OP_CONSTANT_LONG", + self, + index, + ); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/main.zig b/src/main.zig index 46801f3..9b19dd9 100644 --- a/src/main.zig +++ b/src/main.zig @@ -115,9 +115,7 @@ pub fn main() !void { //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); //try chk.write(chunk.OpCode.Return); - var constant = try chk.addConstant(1.2); - try chk.write(chunk.OpCode.Constant, 123); - try chk.write(constant, 123); + var constant = try chk.writeConstant(1.2, 123); try chk.write(chunk.OpCode.Return, 123); try chk.disassemble(stdout, "test chunk"); diff --git a/src/value.zig b/src/value.zig index a84a79e..b97d402 100644 --- a/src/value.zig +++ b/src/value.zig @@ -10,7 +10,7 @@ pub fn printValue(stdout: var, value: Value) !void { } pub const ValueList = struct { - count: u8, + count: usize, values: []Value, allocator: *Allocator, From 14fa63e1f6227bbdb3282abbd993ce79d681ac3c Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 14:18:44 -0300 Subject: [PATCH 17/68] finish impl for ConstantLong --- src/chunk.zig | 46 +++++++++++++++++++++++----------------------- src/main.zig | 5 ++++- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index 148b42e..491d52b 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -15,26 +15,26 @@ pub const OpCode = AllOpcodes{}; fn simpleInstruction( stdout: var, comptime name: []const u8, - offset: usize, + index: usize, ) !usize { try stdout.print("{}\n", name); - return offset + 1; + return index + 1; } fn constantInstruction( stdout: var, comptime name: []const u8, chunk: *Chunk, - offset: usize, + index: usize, ) !usize { // get the constant's index in constants slice - var idx = chunk.code[offset + 1]; + var idx = chunk.code[index + 1]; try stdout.print("\t{}\t{} '", name, idx); try value.printValue(stdout, chunk.constants.values[idx]); try stdout.print("'\n"); - return offset + 2; + return index + 2; } fn constantLongInstruction( @@ -43,24 +43,19 @@ fn constantLongInstruction( chunk: *Chunk, offset: usize, ) !usize { - // get the constant's index in constants slice - var v0: u8 = chunk.code[offset + 3]; - var v1: u8 = chunk.code[offset + 2]; - var v2: u8 = chunk.code[offset + 1]; + // constantLong uses three u8's that encode a u24 as the + // contants' index. + var v3: u8 = chunk.code[offset + 1]; + var v2: u8 = chunk.code[offset + 2]; + var v1: u8 = chunk.code[offset + 3]; - // TODO: this does not work. just decreased first term - // to fix a compile error. - - // we should also move the actual printing into its own - // function too since constantInstruction and - // constantLongInstruction share code. - var idx: u24 = (v2 << 4) | (v1 << 7) | v0; + var idx: u24 = (@intCast(u24, v3) << 16) | (@intCast(u24, v2) << 8) | v1; try stdout.print("\t{}\t{} '", name, idx); try value.printValue(stdout, chunk.constants.values[idx]); try stdout.print("'\n"); - return offset + 2; + return offset + 4; } pub const Chunk = struct { @@ -112,13 +107,18 @@ pub const Chunk = struct { try self.write(OpCode.Constant, line); try self.write(@intCast(u8, constant_idx), line); } else { - // TODO: convert the usize to u24, and from - // u24, split it into three u8's. + var idx_u24: u24 = @intCast(u24, constant_idx); - // also convert from u8 back to u24. - // i know that we can do from two u8's to go to a u16 - // with (v1 << 7) | v0. - try self.write(0, line); + const mask = @intCast(u24, 0xff); + + const v1: u8 = @intCast(u8, idx_u24 & mask); + const v2: u8 = @intCast(u8, (idx_u24 >> 8) & mask); + const v3: u8 = @intCast(u8, (idx_u24 >> 16) & mask); + + try self.write(OpCode.ConstantLong, line); + try self.write(v3, line); + try self.write(v2, line); + try self.write(v1, line); } } diff --git a/src/main.zig b/src/main.zig index 9b19dd9..99628ac 100644 --- a/src/main.zig +++ b/src/main.zig @@ -115,7 +115,10 @@ pub fn main() !void { //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); //try chk.write(chunk.OpCode.Return); - var constant = try chk.writeConstant(1.2, 123); + var i: usize = 0; + while (i < 260) : (i += 1) { + var constant = try chk.writeConstant(1.2, 123); + } try chk.write(chunk.OpCode.Return, 123); try chk.disassemble(stdout, "test chunk"); From dae3c259fdcd6d8a091decaae65ab8ace451bf40 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 14:55:11 -0300 Subject: [PATCH 18/68] add basic virtual machine code --- src/main.zig | 15 ++++++---- src/vm.zig | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 9 deletions(-) diff --git a/src/main.zig b/src/main.zig index 99628ac..1c982e3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,6 +4,7 @@ const Allocator = std.mem.Allocator; // const Scanner = @import("scanner.zig").Scanner; const chunk = @import("chunk.zig"); +const vm = @import("vm.zig"); //const Compiler = @import("compiler.zig").Compiler; @@ -107,7 +108,7 @@ pub fn main() !void { var allocator = &arena.allocator; var stdout_file = try std.io.getStdOut(); - const stdout = &stdout_file.outStream().stream; + var stdout = &stdout_file.outStream().stream; var chk = try chunk.Chunk.init(allocator); @@ -115,11 +116,13 @@ pub fn main() !void { //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); //try chk.write(chunk.OpCode.Return); - var i: usize = 0; - while (i < 260) : (i += 1) { - var constant = try chk.writeConstant(1.2, 123); - } + var constant = try chk.writeConstant(1.2, 123); try chk.write(chunk.OpCode.Return, 123); - try chk.disassemble(stdout, "test chunk"); + + var vmach = vm.VM.init(stdout, &chk); + + std.debug.warn("vm start\n"); + _ = try vmach.interpret(); + std.debug.warn("vm end\n"); } diff --git a/src/vm.zig b/src/vm.zig index b70fb93..fd510b3 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -1,7 +1,78 @@ -const Chunk = @import("chunk.zig"); +const std = @import("std"); +const chunk = @import("chunk.zig"); +const value = @import("value.zig"); + +const Chunk = chunk.Chunk; +const StdOut = *std.io.OutStream(std.fs.File.WriteError); + +pub const InterpretResult = enum { + Ok, + CompileError, + RuntimeError, +}; pub const VM = struct { - chunk: *Chunk, + chk: *Chunk, + ip: usize, + stdout: StdOut, - pub fn init() VM {} + pub fn init(stdout: StdOut, chk: *Chunk) VM { + return VM{ + .stdout = stdout, + .chk = chk, + .ip = 0, + }; + } + + fn readByte(self: *VM) u8 { + var byte: u8 = self.chk.code[self.ip]; + self.ip += 1; + return byte; + } + + fn readConst(self: *VM) value.Value { + return self.chk.constants.values[self.readByte()]; + } + + fn readConstLong(self: *VM) value.Value { + const v3 = self.readByte(); + const v2 = self.readByte(); + const v1 = self.readByte(); + const const_idx = (@intCast(u24, v3) << 16) | + (@intCast(u24, v2) << 8) | + v1; + + return self.chk.constants.values[const_idx]; + } + + fn run(self: *VM) !InterpretResult { + while (true) { + var instruction = self.readByte(); + + switch (instruction) { + chunk.OpCode.Constant => blk: { + var constant = self.readConst(); + try value.printValue(self.stdout, constant); + break :blk; + }, + chunk.OpCode.ConstantLong => blk: { + var constant = self.readConstLong(); + try value.printValue(self.stdout, constant); + break :blk; + }, + chunk.OpCode.Return => blk: { + return InterpretResult.Ok; + }, + else => blk: { + std.debug.warn("Unknown instruction: {x}\n", instruction); + return InterpretResult.RuntimeError; + }, + } + } + } + + pub fn interpret(self: *VM) !InterpretResult { + self.ip = 0; + return try self.run(); + } }; From 456bc951382741e34c4d17af01406ff7bda32f7b Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 15:01:39 -0300 Subject: [PATCH 19/68] vm: add debug flags --- src/main.zig | 6 +----- src/vm.zig | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/main.zig b/src/main.zig index 1c982e3..1d4bdc7 100644 --- a/src/main.zig +++ b/src/main.zig @@ -118,11 +118,7 @@ pub fn main() !void { var constant = try chk.writeConstant(1.2, 123); try chk.write(chunk.OpCode.Return, 123); - try chk.disassemble(stdout, "test chunk"); - var vmach = vm.VM.init(stdout, &chk); - - std.debug.warn("vm start\n"); + var vmach = vm.VM.init(stdout, &chk, true); _ = try vmach.interpret(); - std.debug.warn("vm end\n"); } diff --git a/src/vm.zig b/src/vm.zig index fd510b3..1765c6c 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -15,15 +15,23 @@ pub const VM = struct { chk: *Chunk, ip: usize, stdout: StdOut, + debug_flag: bool, - pub fn init(stdout: StdOut, chk: *Chunk) VM { + pub fn init(stdout: StdOut, chk: *Chunk, debug_flag: bool) VM { return VM{ .stdout = stdout, .chk = chk, .ip = 0, + .debug_flag = debug_flag, }; } + pub fn debug(self: *VM, comptime fmt: []const u8, args: ...) void { + if (self.debug_flag) { + std.debug.warn(fmt, args); + } + } + fn readByte(self: *VM) u8 { var byte: u8 = self.chk.code[self.ip]; self.ip += 1; @@ -47,17 +55,23 @@ pub const VM = struct { fn run(self: *VM) !InterpretResult { while (true) { + if (self.debug_flag) { + _ = try self.chk.disassembleInstruction(self.stdout, self.ip); + } + var instruction = self.readByte(); switch (instruction) { chunk.OpCode.Constant => blk: { var constant = self.readConst(); try value.printValue(self.stdout, constant); + try self.stdout.write("\n"); break :blk; }, chunk.OpCode.ConstantLong => blk: { var constant = self.readConstLong(); try value.printValue(self.stdout, constant); + try self.stdout.write("\n"); break :blk; }, chunk.OpCode.Return => blk: { @@ -73,6 +87,10 @@ pub const VM = struct { pub fn interpret(self: *VM) !InterpretResult { self.ip = 0; - return try self.run(); + + self.debug("VM start\n"); + var res = try self.run(); + self.debug("VM end\n"); + return res; } }; From 61e463713c8b5ddc8fa7922cefae3d93b53d83d3 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 15:23:23 -0300 Subject: [PATCH 20/68] vm: add stack --- src/vm.zig | 56 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/src/vm.zig b/src/vm.zig index 1765c6c..b4243b8 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -5,6 +5,8 @@ const value = @import("value.zig"); const Chunk = chunk.Chunk; const StdOut = *std.io.OutStream(std.fs.File.WriteError); +pub const STACK_MAX = 256; + pub const InterpretResult = enum { Ok, CompileError, @@ -13,17 +15,31 @@ pub const InterpretResult = enum { pub const VM = struct { chk: *Chunk, - ip: usize, + ip: usize = 0, + + stack: [STACK_MAX]value.Value, + stackTop: usize = 0, + stdout: StdOut, debug_flag: bool, + fn resetStack(self: *VM) void { + self.stackTop = 0; + } + pub fn init(stdout: StdOut, chk: *Chunk, debug_flag: bool) VM { - return VM{ - .stdout = stdout, + var self = VM{ .chk = chk, - .ip = 0, + + // TODO move this to a nil value or something. + .stack = []value.Value{0} ** STACK_MAX, + .stdout = stdout, .debug_flag = debug_flag, }; + + self.resetStack(); + + return self; } pub fn debug(self: *VM, comptime fmt: []const u8, args: ...) void { @@ -53,9 +69,22 @@ pub const VM = struct { return self.chk.constants.values[const_idx]; } + fn debugStack(self: *VM) !void { + try self.stdout.print(" "); + for (self.stack) |val, idx| { + if (idx >= self.stackTop) break; + + try self.stdout.print("[ "); + try value.printValue(self.stdout, val); + try self.stdout.print(" ]"); + } + try self.stdout.print("\n"); + } + fn run(self: *VM) !InterpretResult { while (true) { if (self.debug_flag) { + try self.debugStack(); _ = try self.chk.disassembleInstruction(self.stdout, self.ip); } @@ -64,17 +93,18 @@ pub const VM = struct { switch (instruction) { chunk.OpCode.Constant => blk: { var constant = self.readConst(); - try value.printValue(self.stdout, constant); - try self.stdout.write("\n"); + self.push(constant); break :blk; }, chunk.OpCode.ConstantLong => blk: { var constant = self.readConstLong(); - try value.printValue(self.stdout, constant); - try self.stdout.write("\n"); + self.push(constant); break :blk; }, + chunk.OpCode.Return => blk: { + try value.printValue(self.stdout, self.pop()); + try self.stdout.print("\n"); return InterpretResult.Ok; }, else => blk: { @@ -93,4 +123,14 @@ pub const VM = struct { self.debug("VM end\n"); return res; } + + pub fn push(self: *VM, val: value.Value) void { + self.stack[self.stackTop] = val; + self.stackTop += 1; + } + + pub fn pop(self: *VM) value.Value { + self.stackTop -= 1; + return self.stack[self.stackTop]; + } }; From 282267670714be07f05713191ca91cf31ea09f9e Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 15:27:19 -0300 Subject: [PATCH 21/68] vm: add negate opcode --- src/chunk.zig | 3 +++ src/main.zig | 1 + src/vm.zig | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/chunk.zig b/src/chunk.zig index 491d52b..db17a34 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -8,6 +8,7 @@ const AllOpcodes = struct { pub Constant: u8 = 0, pub ConstantLong: u8 = 1, pub Return: u8 = 2, + pub Negate: u8 = 3, }; pub const OpCode = AllOpcodes{}; @@ -148,6 +149,8 @@ pub const Chunk = struct { self, index, ); + } else if (instruction == OpCode.Negate) { + return try simpleInstruction(stdout, "OP_NEGATE", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/main.zig b/src/main.zig index 1d4bdc7..d493e1a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -117,6 +117,7 @@ pub fn main() !void { //try chk.write(chunk.OpCode.Return); var constant = try chk.writeConstant(1.2, 123); + try chk.write(chunk.OpCode.Negate, 123); try chk.write(chunk.OpCode.Return, 123); var vmach = vm.VM.init(stdout, &chk, true); diff --git a/src/vm.zig b/src/vm.zig index b4243b8..5d74853 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -107,6 +107,8 @@ pub const VM = struct { try self.stdout.print("\n"); return InterpretResult.Ok; }, + + chunk.OpCode.Negate => self.push(-self.pop()), else => blk: { std.debug.warn("Unknown instruction: {x}\n", instruction); return InterpretResult.RuntimeError; From 3377d1675cba2e96c427682d67f453594b550df2 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 15:40:18 -0300 Subject: [PATCH 22/68] vm, chunk: add binary operators --- src/chunk.zig | 20 ++++++++++++++++---- src/main.zig | 7 +++++-- src/vm.zig | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index db17a34..a3008c7 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -5,10 +5,14 @@ const Allocator = std.mem.Allocator; // hack. ugly hack. zig has compiler crash. const AllOpcodes = struct { - pub Constant: u8 = 0, - pub ConstantLong: u8 = 1, - pub Return: u8 = 2, - pub Negate: u8 = 3, + pub Return: u8 = 0, + pub Constant: u8 = 1, + pub ConstantLong: u8 = 2, + pub Add: u8 = 3, + pub Subtract: u8 = 4, + pub Multiply: u8 = 5, + pub Divide: u8 = 6, + pub Negate: u8 = 7, }; pub const OpCode = AllOpcodes{}; @@ -151,6 +155,14 @@ pub const Chunk = struct { ); } else if (instruction == OpCode.Negate) { return try simpleInstruction(stdout, "OP_NEGATE", index); + } else if (instruction == OpCode.Add) { + return try simpleInstruction(stdout, "OP_ADD", index); + } else if (instruction == OpCode.Subtract) { + return try simpleInstruction(stdout, "OP_SUBTRACT", index); + } else if (instruction == OpCode.Multiply) { + return try simpleInstruction(stdout, "OP_MULTIPLY", index); + } else if (instruction == OpCode.Divide) { + return try simpleInstruction(stdout, "OP_DIVIDE", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/main.zig b/src/main.zig index d493e1a..8cd531f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -116,8 +116,11 @@ pub fn main() !void { //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); //try chk.write(chunk.OpCode.Return); - var constant = try chk.writeConstant(1.2, 123); - try chk.write(chunk.OpCode.Negate, 123); + try chk.writeConstant(1.2, 123); + try chk.writeConstant(3.4, 123); + try chk.write(chunk.OpCode.Add, 123); + try chk.writeConstant(5.6, 123); + try chk.write(chunk.OpCode.Divide, 123); try chk.write(chunk.OpCode.Return, 123); var vmach = vm.VM.init(stdout, &chk, true); diff --git a/src/vm.zig b/src/vm.zig index 5d74853..990f81f 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -81,6 +81,35 @@ pub const VM = struct { try self.stdout.print("\n"); } + /// gets a f64 out of a value on the top of the stack. + fn popNum(self: *VM) f64 { + return self.pop(); + } + + fn doAdd(self: *VM) void { + var b = self.popNum(); + var a = self.popNum(); + self.push(a + b); + } + + fn doSub(self: *VM) void { + var b = self.popNum(); + var a = self.popNum(); + self.push(a * b); + } + + fn doMul(self: *VM) void { + var b = self.popNum(); + var a = self.popNum(); + self.push(a * b); + } + + fn doDiv(self: *VM) void { + var b = self.popNum(); + var a = self.popNum(); + self.push(a / b); + } + fn run(self: *VM) !InterpretResult { while (true) { if (self.debug_flag) { @@ -108,6 +137,10 @@ pub const VM = struct { return InterpretResult.Ok; }, + chunk.OpCode.Add => self.doAdd(), + chunk.OpCode.Subtract => self.doSub(), + chunk.OpCode.Multiply => self.doMul(), + chunk.OpCode.Divide => self.doDiv(), chunk.OpCode.Negate => self.push(-self.pop()), else => blk: { std.debug.warn("Unknown instruction: {x}\n", instruction); From 088674bf0b17cf494a4f57aba7685a73d14017df Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 15:45:30 -0300 Subject: [PATCH 23/68] vm: add dynamically-sized growing stack --- src/main.zig | 2 +- src/vm.zig | 60 ++++++++++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/src/main.zig b/src/main.zig index 8cd531f..b43afc1 100644 --- a/src/main.zig +++ b/src/main.zig @@ -123,6 +123,6 @@ pub fn main() !void { try chk.write(chunk.OpCode.Divide, 123); try chk.write(chunk.OpCode.Return, 123); - var vmach = vm.VM.init(stdout, &chk, true); + var vmach = try vm.VM.init(allocator, stdout, &chk, true); _ = try vmach.interpret(); } diff --git a/src/vm.zig b/src/vm.zig index 990f81f..467651a 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -3,9 +3,9 @@ const chunk = @import("chunk.zig"); const value = @import("value.zig"); const Chunk = chunk.Chunk; -const StdOut = *std.io.OutStream(std.fs.File.WriteError); +const Value = value.Value; -pub const STACK_MAX = 256; +const StdOut = *std.io.OutStream(std.fs.File.WriteError); pub const InterpretResult = enum { Ok, @@ -17,24 +17,30 @@ pub const VM = struct { chk: *Chunk, ip: usize = 0, - stack: [STACK_MAX]value.Value, + stack: []Value, stackTop: usize = 0, stdout: StdOut, debug_flag: bool, + allocator: *std.mem.Allocator, fn resetStack(self: *VM) void { self.stackTop = 0; } - pub fn init(stdout: StdOut, chk: *Chunk, debug_flag: bool) VM { + pub fn init( + allocator: *std.mem.Allocator, + stdout: StdOut, + chk: *Chunk, + debug_flag: bool, + ) !VM { var self = VM{ .chk = chk, - // TODO move this to a nil value or something. - .stack = []value.Value{0} ** STACK_MAX, + .stack = try allocator.alloc(Value, 256), .stdout = stdout, .debug_flag = debug_flag, + .allocator = allocator, }; self.resetStack(); @@ -54,11 +60,11 @@ pub const VM = struct { return byte; } - fn readConst(self: *VM) value.Value { + fn readConst(self: *VM) Value { return self.chk.constants.values[self.readByte()]; } - fn readConstLong(self: *VM) value.Value { + fn readConstLong(self: *VM) Value { const v3 = self.readByte(); const v2 = self.readByte(); const v1 = self.readByte(); @@ -86,28 +92,28 @@ pub const VM = struct { return self.pop(); } - fn doAdd(self: *VM) void { + fn doAdd(self: *VM) !void { var b = self.popNum(); var a = self.popNum(); - self.push(a + b); + try self.push(a + b); } - fn doSub(self: *VM) void { + fn doSub(self: *VM) !void { var b = self.popNum(); var a = self.popNum(); - self.push(a * b); + try self.push(a * b); } - fn doMul(self: *VM) void { + fn doMul(self: *VM) !void { var b = self.popNum(); var a = self.popNum(); - self.push(a * b); + try self.push(a * b); } - fn doDiv(self: *VM) void { + fn doDiv(self: *VM) !void { var b = self.popNum(); var a = self.popNum(); - self.push(a / b); + try self.push(a / b); } fn run(self: *VM) !InterpretResult { @@ -122,12 +128,12 @@ pub const VM = struct { switch (instruction) { chunk.OpCode.Constant => blk: { var constant = self.readConst(); - self.push(constant); + try self.push(constant); break :blk; }, chunk.OpCode.ConstantLong => blk: { var constant = self.readConstLong(); - self.push(constant); + try self.push(constant); break :blk; }, @@ -137,11 +143,11 @@ pub const VM = struct { return InterpretResult.Ok; }, - chunk.OpCode.Add => self.doAdd(), - chunk.OpCode.Subtract => self.doSub(), - chunk.OpCode.Multiply => self.doMul(), - chunk.OpCode.Divide => self.doDiv(), - chunk.OpCode.Negate => self.push(-self.pop()), + chunk.OpCode.Add => try self.doAdd(), + chunk.OpCode.Subtract => try self.doSub(), + chunk.OpCode.Multiply => try self.doMul(), + chunk.OpCode.Divide => try self.doDiv(), + chunk.OpCode.Negate => try self.push(-self.pop()), else => blk: { std.debug.warn("Unknown instruction: {x}\n", instruction); return InterpretResult.RuntimeError; @@ -159,12 +165,16 @@ pub const VM = struct { return res; } - pub fn push(self: *VM, val: value.Value) void { + pub fn push(self: *VM, val: Value) !void { + if (self.stackTop > 0 and self.stackTop - 1 > self.stack.len) { + self.stack = try self.allocator.realloc(self.stack, self.stack.len + 1); + } + self.stack[self.stackTop] = val; self.stackTop += 1; } - pub fn pop(self: *VM) value.Value { + pub fn pop(self: *VM) Value { self.stackTop -= 1; return self.stack[self.stackTop]; } From a9dca436bd81a660de4451ae4cbcb9086d72027f Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 16:12:39 -0300 Subject: [PATCH 24/68] remove chunk running code, add draft compiler struct --- src/compiler.zig | 16 +++++++++------- src/main.zig | 9 +++++---- src/vm.zig | 25 +++++++++++++++---------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index 28e9f15..e029b9c 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -1,13 +1,15 @@ -const token = @import("token.zig"); -const scanner = @import("scanner.zig"); -const main = @import("main.zig"); +const std = @import("std"); +const scanner = @import("new_scanner.zig"); + +const Allocator = std.mem.Allocator; pub const Compiler = struct { - tokens: *scanner.TokenList, + src: []const u8, + allocator: *Allocator, - fn init(tokens: *scanner.TokenList) Compiler { - return Compiler{ .tokens = tokens }; + pub fn init(allocator: *Allocator, source: []const u8) Compiler { + return Compiler{ .src = source, .allocator = allocator }; } - fn advance(self: *Compiler) void {} + pub fn compile(self: *Compiler) void {} }; diff --git a/src/main.zig b/src/main.zig index b43afc1..e74c6f8 100644 --- a/src/main.zig +++ b/src/main.zig @@ -58,8 +58,9 @@ fn runFile(allocator: *Allocator, path: []const u8) !void { var slice = try allocator.alloc(u8, total_bytes); _ = try lox_file.read(slice); - try run(allocator, slice); - if (hadError) std.os.exit(65); + var res = try run(allocator, slice); + if (res == vm.InterpretResult.CompileError) std.os.exit(65); + if (res == vm.InterpretResult.RuntimeError) std.os.exit(70); } fn runPrompt(allocator: *Allocator) !void { @@ -123,6 +124,6 @@ pub fn main() !void { try chk.write(chunk.OpCode.Divide, 123); try chk.write(chunk.OpCode.Return, 123); - var vmach = try vm.VM.init(allocator, stdout, &chk, true); - _ = try vmach.interpret(); + var vmach = try vm.VM.init(allocator, stdout, "", true); + _ = vmach.interpret(); } diff --git a/src/vm.zig b/src/vm.zig index 467651a..d09511a 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -1,9 +1,11 @@ const std = @import("std"); const chunk = @import("chunk.zig"); const value = @import("value.zig"); +const compiler = @import("compiler.zig"); const Chunk = chunk.Chunk; const Value = value.Value; +const Compiler = compiler.Compiler; const StdOut = *std.io.OutStream(std.fs.File.WriteError); @@ -14,7 +16,8 @@ pub const InterpretResult = enum { }; pub const VM = struct { - chk: *Chunk, + chk: *Chunk = undefined, + src: []const u8, ip: usize = 0, stack: []Value, @@ -31,11 +34,11 @@ pub const VM = struct { pub fn init( allocator: *std.mem.Allocator, stdout: StdOut, - chk: *Chunk, + source: []const u8, debug_flag: bool, ) !VM { var self = VM{ - .chk = chk, + .src = source, .stack = try allocator.alloc(Value, 256), .stdout = stdout, @@ -156,13 +159,15 @@ pub const VM = struct { } } - pub fn interpret(self: *VM) !InterpretResult { - self.ip = 0; - - self.debug("VM start\n"); - var res = try self.run(); - self.debug("VM end\n"); - return res; + pub fn interpret(self: *VM) InterpretResult { + //self.ip = 0; + //self.debug("VM start\n"); + //var res = try self.run(); + //self.debug("VM end\n"); + //return res; + var cmpr = Compiler.init(self.allocator, self.src); + cmpr.compile(); + return InterpretResult.Ok; } pub fn push(self: *VM, val: Value) !void { From b80cd52c5002695d35349fa257b35851ef4ccbe9 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 16:17:28 -0300 Subject: [PATCH 25/68] main: readd runPrompt and runFile - main: make run() use the VM struct instance --- src/main.zig | 55 +++++++++++++--------------------------------------- 1 file changed, 13 insertions(+), 42 deletions(-) diff --git a/src/main.zig b/src/main.zig index e74c6f8..44463ca 100644 --- a/src/main.zig +++ b/src/main.zig @@ -6,38 +6,20 @@ const Allocator = std.mem.Allocator; const chunk = @import("chunk.zig"); const vm = @import("vm.zig"); +const InterpretResult = vm.InterpretResult; + //const Compiler = @import("compiler.zig").Compiler; pub var hadError = false; -fn run(allocator: *Allocator, data: []u8) !void { +fn run(allocator: *Allocator, data: []u8) !InterpretResult { var stdout_file = try std.io.getStdOut(); const stdout = &stdout_file.outStream().stream; - var scanner = try Scanner.init(allocator, data); - var tokens = try scanner.scanTokens(); - - var it = tokens.iterator(); - - while (it.next()) |token| { - switch (token) { - .Simple => |value| { - try value.printToken(stdout); - }, - .Slice => |value| { - try value.printToken(stdout); - }, - .Number => |value| { - try value.printToken(stdout); - }, - } - - hadError = false; - } + var vmach = try vm.VM.init(allocator, stdout, data, true); + return vmach.interpret(); } -// fn run() !void {} - pub fn doError(line: usize, message: []const u8) !void { try errorReport(line, "", message); } @@ -73,14 +55,15 @@ fn runPrompt(allocator: *Allocator) !void { var line = std.io.readLine(&buffer) catch |err| { if (err == error.EndOfStream) return; + return err; }; - try run(allocator, line); + _ = try run(allocator, line); } } -pub fn mainOld() anyerror!void { +pub fn main() anyerror!void { var da = std.heap.DirectAllocator.init(); var arena = std.heap.ArenaAllocator.init(&da.allocator); defer arena.deinit(); @@ -94,15 +77,14 @@ pub fn mainOld() anyerror!void { }); var lox_path = try (args_it.next(allocator) orelse { - // try runPrompt(allocator); - unreachable; + try runPrompt(allocator); + return; }); - //var vm = VM.init(); - //try runFile(allocator, lox_path); + try runFile(allocator, lox_path); } -pub fn main() !void { +pub fn oldMain() !void { var da = std.heap.DirectAllocator.init(); var arena = std.heap.ArenaAllocator.init(&da.allocator); defer arena.deinit(); @@ -111,19 +93,8 @@ pub fn main() !void { var stdout_file = try std.io.getStdOut(); var stdout = &stdout_file.outStream().stream; - var chk = try chunk.Chunk.init(allocator); - // this crashes zig??? lol + // var chk = try chunk.Chunk.init(allocator); //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); //try chk.write(chunk.OpCode.Return); - - try chk.writeConstant(1.2, 123); - try chk.writeConstant(3.4, 123); - try chk.write(chunk.OpCode.Add, 123); - try chk.writeConstant(5.6, 123); - try chk.write(chunk.OpCode.Divide, 123); - try chk.write(chunk.OpCode.Return, 123); - - var vmach = try vm.VM.init(allocator, stdout, "", true); - _ = vmach.interpret(); } From 63045e4df5e8ba995ee9f27e380d641c8274394f Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 16:21:36 -0300 Subject: [PATCH 26/68] add src/new_scanner.zig --- src/compiler.zig | 4 +++- src/new_scanner.zig | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 src/new_scanner.zig diff --git a/src/compiler.zig b/src/compiler.zig index e029b9c..3545694 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -11,5 +11,7 @@ pub const Compiler = struct { return Compiler{ .src = source, .allocator = allocator }; } - pub fn compile(self: *Compiler) void {} + pub fn compile(self: *Compiler) void { + var scanr = scanner.Scanner.init(self.allocator, self.src); + } }; diff --git a/src/new_scanner.zig b/src/new_scanner.zig new file mode 100644 index 0000000..acc8e49 --- /dev/null +++ b/src/new_scanner.zig @@ -0,0 +1,15 @@ +const std = @import("std"); + +const Allocator = std.mem.Allocator; + +pub const Scanner = struct { + source: []const u8, + allocator: *Allocator, + + pub fn init(allocator: *Allocator, data: []const u8) Scanner { + return Scanner{ + .allocator = allocator, + .source = data, + }; + } +}; From 6b9cc575d9960e46f45ed4dba24499010a74e8e4 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 16:37:24 -0300 Subject: [PATCH 27/68] add draft scanner, remove messy Token union, replace by struct --- src/compiler.zig | 31 +++++++++++++++++++++--- src/main.zig | 2 +- src/new_scanner.zig | 10 ++++++++ src/token.zig | 58 ++++----------------------------------------- src/vm.zig | 8 +++---- 5 files changed, 47 insertions(+), 62 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index 3545694..a223c67 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -1,17 +1,42 @@ const std = @import("std"); const scanner = @import("new_scanner.zig"); +const vm = @import("vm.zig"); const Allocator = std.mem.Allocator; +const TokenType = @import("token.zig").TokenType; pub const Compiler = struct { src: []const u8, + stdout: vm.StdOut, allocator: *Allocator, - pub fn init(allocator: *Allocator, source: []const u8) Compiler { - return Compiler{ .src = source, .allocator = allocator }; + pub fn init( + allocator: *Allocator, + stdout: vm.StdOut, + source: []const u8, + ) Compiler { + return Compiler{ + .src = source, + .allocator = allocator, + .stdout = stdout, + }; } - pub fn compile(self: *Compiler) void { + pub fn compile(self: *Compiler) !void { var scanr = scanner.Scanner.init(self.allocator, self.src); + var line: usize = 0; + while (true) { + var token = scanr.scanToken(); + + if (token.line != line) { + try self.stdout.print("{} ", token.line); + line = token.line; + } else { + try self.stdout.print(" | "); + } + + try self.stdout.print("{} '{}'\n", token.ttype, token.lexeme); + if (token.ttype == TokenType.EOF) break; + } } }; diff --git a/src/main.zig b/src/main.zig index 44463ca..4611d6f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -17,7 +17,7 @@ fn run(allocator: *Allocator, data: []u8) !InterpretResult { const stdout = &stdout_file.outStream().stream; var vmach = try vm.VM.init(allocator, stdout, data, true); - return vmach.interpret(); + return try vmach.interpret(); } pub fn doError(line: usize, message: []const u8) !void { diff --git a/src/new_scanner.zig b/src/new_scanner.zig index acc8e49..88b0bab 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -1,9 +1,15 @@ const std = @import("std"); +const tokens = @import("token.zig"); const Allocator = std.mem.Allocator; pub const Scanner = struct { source: []const u8, + + start: usize = 0, + current: usize = 0, + line: usize = 1, + allocator: *Allocator, pub fn init(allocator: *Allocator, data: []const u8) Scanner { @@ -12,4 +18,8 @@ pub const Scanner = struct { .source = data, }; } + + pub fn scanToken(self: *Scanner) tokens.Token { + return tokens.Token{}; + } }; diff --git a/src/token.zig b/src/token.zig index 9e88ee1..308e60e 100644 --- a/src/token.zig +++ b/src/token.zig @@ -50,58 +50,8 @@ pub const TokenType = enum(u6) { EOF, }; -pub fn TokenFactory( - comptime T: type, -) type { - return struct { - const Self = @This(); - - ttype: TokenType, - lexeme: []u8, - line: usize, - literal: T, - - pub fn init( - ttype: TokenType, - lexeme: []u8, - line: usize, - literal: T, - ) Self { - return Self{ - .ttype = ttype, - .lexeme = lexeme, - .line = line, - .literal = literal, - }; - } - - pub fn printToken(self: Self, stdout: var) !void { - if (T == void) { - try stdout.print( - "Token(type={x}, lexeme='{}', line={})\n", - self.ttype, - self.lexeme, - self.line, - ); - } else { - try stdout.print( - "Token(type={x}, lexeme='{}', line={} literal='{}')\n", - self.ttype, - self.lexeme, - self.line, - self.literal, - ); - } - } - }; -} - -pub const SimpleToken = TokenFactory(void); -pub const SliceToken = TokenFactory([]u8); -pub const NumberToken = TokenFactory(f32); - -pub const Token = union(enum) { - Simple: SimpleToken, - Slice: SliceToken, - Number: NumberToken, +pub const Token = struct { + ttype: TokenType = TokenType.EOF, + lexeme: []const u8 = ""[0..], + line: usize = 0, }; diff --git a/src/vm.zig b/src/vm.zig index d09511a..175414c 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -7,7 +7,7 @@ const Chunk = chunk.Chunk; const Value = value.Value; const Compiler = compiler.Compiler; -const StdOut = *std.io.OutStream(std.fs.File.WriteError); +pub const StdOut = *std.io.OutStream(std.fs.File.WriteError); pub const InterpretResult = enum { Ok, @@ -159,14 +159,14 @@ pub const VM = struct { } } - pub fn interpret(self: *VM) InterpretResult { + pub fn interpret(self: *VM) !InterpretResult { //self.ip = 0; //self.debug("VM start\n"); //var res = try self.run(); //self.debug("VM end\n"); //return res; - var cmpr = Compiler.init(self.allocator, self.src); - cmpr.compile(); + var cmpr = Compiler.init(self.allocator, self.stdout, self.src); + try cmpr.compile(); return InterpretResult.Ok; } From 2c7cf356b3da98834c327498519dcc1763eb5f11 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 16:50:48 -0300 Subject: [PATCH 28/68] scanner: add basic tokens and matched-tokens --- src/compiler.zig | 2 +- src/new_scanner.zig | 79 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index a223c67..50a356f 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -26,7 +26,7 @@ pub const Compiler = struct { var scanr = scanner.Scanner.init(self.allocator, self.src); var line: usize = 0; while (true) { - var token = scanr.scanToken(); + var token = try scanr.scanToken(); if (token.line != line) { try self.stdout.print("{} ", token.line); diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 88b0bab..24e305c 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -1,8 +1,16 @@ const std = @import("std"); const tokens = @import("token.zig"); +const Token = tokens.Token; +const TokenType = tokens.TokenType; + const Allocator = std.mem.Allocator; +pub const TokenError = error{ + Unexpected, + Unterminated, +}; + pub const Scanner = struct { source: []const u8, @@ -19,7 +27,74 @@ pub const Scanner = struct { }; } - pub fn scanToken(self: *Scanner) tokens.Token { - return tokens.Token{}; + fn isAtEnd(self: *Scanner) bool { + return self.current >= self.source.len; + } + + fn advance(self: *Scanner) u8 { + self.current += 1; + return self.source[self.current - 1]; + } + + fn makeToken(self: *Scanner, ttype: TokenType) Token { + return Token{ + .ttype = ttype, + .lexeme = self.source[self.start..self.current], + .line = self.line, + }; + } + + /// Check if the next character matches what is expected. + fn match(self: *Scanner, expected: u8) bool { + if (self.isAtEnd()) return false; + if (self.source[self.current] != expected) return false; + + self.current += 1; + return true; + } + + /// Add a SimpleToken of type_match if the next character is + /// `expected`. Adds a SimpleToken of type_nomatch when it is not. + fn makeMatchToken( + self: *Scanner, + expected: u8, + type_match: TokenType, + type_nomatch: TokenType, + ) Token { + if (self.match(expected)) { + return self.makeToken(type_match); + } else { + return self.makeToken(type_nomatch); + } + } + + pub fn scanToken(self: *Scanner) !tokens.Token { + self.start = self.current; + + if (self.isAtEnd()) return self.makeToken(TokenType.EOF); + + var c = self.advance(); + + var token = switch (c) { + '(' => self.makeToken(.LEFT_PAREN), + ')' => self.makeToken(.RIGHT_PAREN), + '{' => self.makeToken(.LEFT_BRACE), + '}' => self.makeToken(.RIGHT_BRACE), + ',' => self.makeToken(.COMMA), + '.' => self.makeToken(.DOT), + '-' => self.makeToken(.MINUS), + '+' => self.makeToken(.PLUS), + ';' => self.makeToken(.SEMICOLON), + '*' => self.makeToken(.STAR), + + '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), + '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), + '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), + '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), + + else => return TokenError.Unexpected, + }; + + return token; } }; From f4f1fe1fbc16ba4d2793ce8c07f1c26f2233e1b6 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 16:54:15 -0300 Subject: [PATCH 29/68] scanner: add whitespace handling --- src/new_scanner.zig | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 24e305c..0c97647 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -68,9 +68,30 @@ pub const Scanner = struct { } } - pub fn scanToken(self: *Scanner) !tokens.Token { - self.start = self.current; + fn peek(self: *Scanner) u8 { + if (self.isAtEnd()) return 0; + return self.source[self.current]; + } + fn skipWhitespace(self: *Scanner) void { + while (true) { + var c = self.peek(); + switch (c) { + ' ', '\r', '\t' => blk: { + _ = self.advance(); + }, + '\n' => blk: { + self.line += 1; + _ = self.advance(); + }, + else => return, + } + } + } + + pub fn scanToken(self: *Scanner) !tokens.Token { + self.skipWhitespace(); + self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); From 27b04e16123951068ab9cfd4963351b088df8297 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:07:22 -0300 Subject: [PATCH 30/68] scanner: add basic error handling, strings, comments --- src/compiler.zig | 28 ++++++++++++++++++++-------- src/new_scanner.zig | 45 +++++++++++++++++++++++++++++++++++++++++++-- src/scanner.zig | 2 +- 3 files changed, 64 insertions(+), 11 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index 50a356f..f0771ff 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -26,17 +26,29 @@ pub const Compiler = struct { var scanr = scanner.Scanner.init(self.allocator, self.src); var line: usize = 0; while (true) { - var token = try scanr.scanToken(); + var token_opt = scanr.scanToken() catch |err| { + std.debug.warn("Scan Error: {x}\n", err); + std.debug.warn( + "line: {}, cur lexeme: {}\n", + scanr.line, + scanr.currentLexeme(), + ); + break; + }; - if (token.line != line) { - try self.stdout.print("{} ", token.line); - line = token.line; + if (token_opt) |token| { + if (token.line != line) { + try self.stdout.print("{} ", token.line); + line = token.line; + } else { + try self.stdout.print(" | "); + } + + try self.stdout.print("{} '{}'\n", token.ttype, token.lexeme); + if (token.ttype == TokenType.EOF) break; } else { - try self.stdout.print(" | "); + break; } - - try self.stdout.print("{} '{}'\n", token.ttype, token.lexeme); - if (token.ttype == TokenType.EOF) break; } } }; diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 0c97647..f19b8a3 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -36,10 +36,14 @@ pub const Scanner = struct { return self.source[self.current - 1]; } + pub fn currentLexeme(self: *Scanner) []const u8 { + return self.source[self.start..self.current]; + } + fn makeToken(self: *Scanner, ttype: TokenType) Token { return Token{ .ttype = ttype, - .lexeme = self.source[self.start..self.current], + .lexeme = self.currentLexeme(), .line = self.line, }; } @@ -73,6 +77,11 @@ pub const Scanner = struct { return self.source[self.current]; } + fn peekNext(self: *Scanner) u8 { + if (self.isAtEnd()) return 0; + return self.source[self.current + 1]; + } + fn skipWhitespace(self: *Scanner) void { while (true) { var c = self.peek(); @@ -89,7 +98,26 @@ pub const Scanner = struct { } } - pub fn scanToken(self: *Scanner) !tokens.Token { + fn doString(self: *Scanner) !Token { + // consume entire string + while (self.peek() != '"' and !self.isAtEnd()) { + if (self.peek() == '\n') self.line += 1; + _ = self.advance(); + } + + // unterminated string. + if (self.isAtEnd()) { + return TokenError.Unterminated; + } + + // the closing ". + _ = self.advance(); + + // trim the surrounding quotes. + return self.makeToken(.STRING); + } + + pub fn scanToken(self: *Scanner) !?Token { self.skipWhitespace(); self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); @@ -113,6 +141,19 @@ pub const Scanner = struct { '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), + '/' => blk: { + if (self.peekNext() == '/') { + while (self.peek() != '\n' and !self.isAtEnd()) { + _ = self.advance(); + } + break :blk null; + } else { + break :blk self.makeToken(.SLASH); + } + }, + + '"' => try self.doString(), + else => return TokenError.Unexpected, }; diff --git a/src/scanner.zig b/src/scanner.zig index 0cffde2..ed5ec0e 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -98,7 +98,7 @@ pub const Scanner = struct { return self.source[self.current - 1]; } - fn currentLexeme(self: *Scanner) []u8 { + pub fn currentLexeme(self: *Scanner) []u8 { return self.source[self.start..self.current]; } From 9d1d253c9479cbf40e06f72105f118ceb7f05cc5 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:12:00 -0300 Subject: [PATCH 31/68] scanner: fix peekNext --- src/new_scanner.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index f19b8a3..b2005f7 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -79,7 +79,7 @@ pub const Scanner = struct { fn peekNext(self: *Scanner) u8 { if (self.isAtEnd()) return 0; - return self.source[self.current + 1]; + return self.source[self.current - 1]; } fn skipWhitespace(self: *Scanner) void { From 566d8313f3c8ec83177e52a014be0eb1d49810bb Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:13:50 -0300 Subject: [PATCH 32/68] add number tokens --- src/new_scanner.zig | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index b2005f7..2b48f48 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -11,6 +11,20 @@ pub const TokenError = error{ Unterminated, }; +fn isDigit(char: u8) bool { + return char >= '0' and char <= '9'; +} + +fn isAlpha(c: u8) bool { + return (c >= 'a' and c <= 'z') or + (c >= 'A' and c <= 'Z') or + c == '_'; +} + +fn isAlphaNumeric(char: u8) bool { + return isAlpha(char) or isDigit(char); +} + pub const Scanner = struct { source: []const u8, @@ -117,12 +131,32 @@ pub const Scanner = struct { return self.makeToken(.STRING); } + /// Consume a number + fn doNumber(self: *Scanner) Token { + while (isDigit(self.peek())) { + _ = self.advance(); + } + + // check if its a number like 12.34, where the '.' character + // exists and the one next to it is a digit. + if (self.peek() == '.' and isDigit(self.peekNext())) { + _ = self.advance(); + + while (isDigit(self.peek())) { + _ = self.advance(); + } + } + + return self.makeToken(.NUMBER); + } + pub fn scanToken(self: *Scanner) !?Token { self.skipWhitespace(); self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); + if (isDigit(c)) return self.doNumber(); var token = switch (c) { '(' => self.makeToken(.LEFT_PAREN), From d62c58a1956989a5e001f981005b6590039a0fc0 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:15:27 -0300 Subject: [PATCH 33/68] scanner: add identifiers --- src/new_scanner.zig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 2b48f48..340523b 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -150,12 +150,21 @@ pub const Scanner = struct { return self.makeToken(.NUMBER); } + fn doIdentifier(self: *Scanner) Token { + while (isAlphaNumeric(self.peek())) { + _ = self.advance(); + } + + return self.makeToken(.IDENTIFIER); + } + pub fn scanToken(self: *Scanner) !?Token { self.skipWhitespace(); self.start = self.current; if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); + if (isAlpha(c)) return self.doIdentifier(); if (isDigit(c)) return self.doNumber(); var token = switch (c) { From 7d7aabbdd7df8da75cc510fd1165f475b5c7acc6 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 17:20:50 -0300 Subject: [PATCH 34/68] scanner: add keyword handling (copied off the old scanner) as with most things, lol --- src/compiler.zig | 2 +- src/new_scanner.zig | 75 +++++++++++++++++++++++++++++++++++++++++++-- src/scanner.zig | 1 - 3 files changed, 73 insertions(+), 5 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index f0771ff..fd96032 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -23,7 +23,7 @@ pub const Compiler = struct { } pub fn compile(self: *Compiler) !void { - var scanr = scanner.Scanner.init(self.allocator, self.src); + var scanr = try scanner.Scanner.init(self.allocator, self.src); var line: usize = 0; while (true) { var token_opt = scanr.scanToken() catch |err| { diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 340523b..8a1c436 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -25,8 +25,62 @@ fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } +pub const KeywordMap = std.AutoHashMap([]const u8, u6); + +/// The book does say that C doesn't have hashmaps. but Zig does. and I can +/// use it here. +fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { + var map = KeywordMap.init(allocator); + + const keywords = [][]const u8{ + "and"[0..], + "class"[0..], + "else"[0..], + "false"[0..], + "for"[0..], + "fun"[0..], + "if"[0..], + "nil"[0..], + "or"[0..], + "print"[0..], + "return"[0..], + "super"[0..], + "this"[0..], + "true"[0..], + "var"[0..], + "while"[0..], + }; + + const tags = []TokenType{ + TokenType.AND, + TokenType.CLASS, + TokenType.ELSE, + TokenType.FALSE, + TokenType.FOR, + TokenType.FUN, + TokenType.IF, + TokenType.NIL, + TokenType.OR, + TokenType.PRINT, + TokenType.RETURN, + TokenType.SUPER, + TokenType.THIS, + TokenType.TRUE, + TokenType.VAR, + TokenType.WHILE, + }; + + for (keywords) |keyword, idx| { + var tag = @enumToInt(tags[idx]); + _ = try map.put(keyword, tag); + } + + return map; +} + pub const Scanner = struct { source: []const u8, + keywords: KeywordMap, start: usize = 0, current: usize = 0, @@ -34,10 +88,11 @@ pub const Scanner = struct { allocator: *Allocator, - pub fn init(allocator: *Allocator, data: []const u8) Scanner { + pub fn init(allocator: *Allocator, data: []const u8) !Scanner { return Scanner{ - .allocator = allocator, .source = data, + .keywords = try initKeywordMap(allocator), + .allocator = allocator, }; } @@ -150,12 +205,26 @@ pub const Scanner = struct { return self.makeToken(.NUMBER); } + /// Either a keyword or an identifier come out of this. fn doIdentifier(self: *Scanner) Token { while (isAlphaNumeric(self.peek())) { _ = self.advance(); } - return self.makeToken(.IDENTIFIER); + // after reading the identifier, we check + // if it is any of our keywords, if it is, then we add + // the specificed keyword type. if not, just .IDENTIFIER + var text = self.source[self.start..self.current]; + var type_opt = self.keywords.get(text); + var toktype: TokenType = undefined; + + if (type_opt) |kv| { + toktype = @intToEnum(TokenType, kv.value); + } else { + toktype = TokenType.IDENTIFIER; + } + + return self.makeToken(toktype); } pub fn scanToken(self: *Scanner) !?Token { diff --git a/src/scanner.zig b/src/scanner.zig index ed5ec0e..b83a7a4 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -20,7 +20,6 @@ fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } -// hashmaps don't work on HashMaps for some reason. anyways. pub const KeywordMap = std.AutoHashMap([]const u8, u6); fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { From e1d0e3ec0b46a46e5d116857762aee480c030a6e Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 20:33:43 -0300 Subject: [PATCH 35/68] add basic compiler code --- src/compiler.zig | 118 +++++++++++++++++++++++++++++++++++--------- src/new_scanner.zig | 6 ++- src/vm.zig | 13 +++-- 3 files changed, 109 insertions(+), 28 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index fd96032..a04fd1f 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -1,54 +1,124 @@ const std = @import("std"); const scanner = @import("new_scanner.zig"); const vm = @import("vm.zig"); +const chunks = @import("chunk.zig"); +const tokens = @import("token.zig"); const Allocator = std.mem.Allocator; -const TokenType = @import("token.zig").TokenType; +const Scanner = scanner.Scanner; +const Chunk = chunks.Chunk; +const Token = tokens.Token; +const TokenType = tokens.TokenType; + +pub const Parser = struct { + previous: Token = undefined, + current: Token = undefined, + + // TODO are those needed + hadError: bool = false, + panicMode: bool = false, +}; pub const Compiler = struct { src: []const u8, stdout: vm.StdOut, allocator: *Allocator, + parser: Parser, + scanr: Scanner = undefined, + chunk: *chunks.Chunk, pub fn init( allocator: *Allocator, + chunk: *chunks.Chunk, stdout: vm.StdOut, source: []const u8, ) Compiler { return Compiler{ .src = source, + .chunk = chunk, .allocator = allocator, .stdout = stdout, + .parser = Parser{}, }; } - pub fn compile(self: *Compiler) !void { - var scanr = try scanner.Scanner.init(self.allocator, self.src); - var line: usize = 0; + fn errorAt(self: *Compiler, token: Token, msg: []const u8) void { + if (self.parser.panicMode) return; + self.parser.panicMode = true; + + std.debug.warn("[line {}] Error", token.line); + if (token.ttype == TokenType.EOF) { + std.debug.warn(" at end"); + } else { + std.debug.warn(" at '{}'", token.lexeme); + } + + std.debug.warn(": {}\n", msg); + self.parser.hadError = true; + } + + fn errorCurrent(self: *Compiler, msg: []const u8) void { + self.errorAt(self.parser.current, msg); + } + + fn errorPrevious(self: *Compiler, msg: []const u8) void { + self.errorAt(self.parser.previous, msg); + } + + fn advance(self: *Compiler) !void { + self.parser.previous = self.parser.current; + while (true) { - var token_opt = scanr.scanToken() catch |err| { - std.debug.warn("Scan Error: {x}\n", err); - std.debug.warn( - "line: {}, cur lexeme: {}\n", - scanr.line, - scanr.currentLexeme(), - ); - break; - }; - + var token_opt = try self.scanr.scanToken(); if (token_opt) |token| { - if (token.line != line) { - try self.stdout.print("{} ", token.line); - line = token.line; - } else { - try self.stdout.print(" | "); - } - - try self.stdout.print("{} '{}'\n", token.ttype, token.lexeme); - if (token.ttype == TokenType.EOF) break; - } else { + self.parser.current = token; break; + } else { + self.errorCurrent(self.parser.current.lexeme); } } } + + fn consume(self: *Compiler, ttype: TokenType, msg: []const u8) !void { + if (self.parser.current.ttype == ttype) { + try self.advance(); + return; + } + + self.errorCurrent(msg); + } + + fn currentChunk(self: *Compiler) *chunks.Chunk { + return self.chunk; + } + + fn emitByte(self: *Compiler, byte: u8) !void { + try self.currentChunk().write(byte, self.parser.previous.line); + } + + fn emitBytes(self: *Compiler, byte1: u8, byte2: u82) !void { + try self.emitByte(byte1); + try self.emitByte(byte2); + } + + fn writeReturn(self: *Compiler) !void { + try self.emitByte(chunks.OpCode.Return); + } + + fn end(self: *Compiler) !void { + try self.writeReturn(); + } + + /// Compile the source given when initializing the compiler + /// into the given chunk. + pub fn compile(self: *Compiler, chunk: *Chunk) !bool { + self.scanr = try scanner.Scanner.init(self.allocator, self.src); + + try self.advance(); + //try self.expression(); + try self.consume(.EOF, "Expect end of expression."); + try self.end(); + + return !self.parser.hadError; + } }; diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 8a1c436..2b4020d 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -230,7 +230,11 @@ pub const Scanner = struct { pub fn scanToken(self: *Scanner) !?Token { self.skipWhitespace(); self.start = self.current; - if (self.isAtEnd()) return self.makeToken(TokenType.EOF); + + if (self.isAtEnd()) { + std.debug.warn("got at end\n"); + return self.makeToken(TokenType.EOF); + } var c = self.advance(); if (isAlpha(c)) return self.doIdentifier(); diff --git a/src/vm.zig b/src/vm.zig index 175414c..3375684 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -165,9 +165,16 @@ pub const VM = struct { //var res = try self.run(); //self.debug("VM end\n"); //return res; - var cmpr = Compiler.init(self.allocator, self.stdout, self.src); - try cmpr.compile(); - return InterpretResult.Ok; + var chk = try Chunk.init(self.allocator); + + var cmpr = Compiler.init(self.allocator, &chk, self.stdout, self.src); + if (!try cmpr.compile(&chk)) { + return InterpretResult.CompileError; + } + + self.chk = &chk; + self.ip = 0; + return try self.run(); } pub fn push(self: *VM, val: Value) !void { From 2736bee8d8552ae80d7d4079a0ffa81792354b36 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 20:34:09 -0300 Subject: [PATCH 36/68] scanner: remove debug print --- src/new_scanner.zig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 2b4020d..0e2cbb0 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -231,10 +231,7 @@ pub const Scanner = struct { self.skipWhitespace(); self.start = self.current; - if (self.isAtEnd()) { - std.debug.warn("got at end\n"); - return self.makeToken(TokenType.EOF); - } + if (self.isAtEnd()) return self.makeToken(TokenType.EOF); var c = self.advance(); if (isAlpha(c)) return self.doIdentifier(); From 230fef20b55e503499281ebcbb6717c0ae8547d0 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 20:48:26 -0300 Subject: [PATCH 37/68] add other bytecode emitters --- src/compiler.zig | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index a04fd1f..3fea307 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -3,12 +3,15 @@ const scanner = @import("new_scanner.zig"); const vm = @import("vm.zig"); const chunks = @import("chunk.zig"); const tokens = @import("token.zig"); +const values = @import("value.zig"); const Allocator = std.mem.Allocator; const Scanner = scanner.Scanner; const Chunk = chunks.Chunk; const Token = tokens.Token; const TokenType = tokens.TokenType; +const Value = values.Value; +const OpCode = chunks.OpCode; pub const Parser = struct { previous: Token = undefined, @@ -101,14 +104,44 @@ pub const Compiler = struct { try self.emitByte(byte2); } - fn writeReturn(self: *Compiler) !void { - try self.emitByte(chunks.OpCode.Return); + fn emitReturn(self: *Compiler) !void { + try self.emitByte(OpCode.Return); + } + + fn emitConstant(self: *Compiler, value: Value) !void { + try self.currentChunk().writeConstant( + value, + self.parser.previous.line, + ); } fn end(self: *Compiler) !void { - try self.writeReturn(); + try self.emitReturn(); } + fn grouping(self: *Compiler) !void { + try self.expression(); + try self.consume(.RIGHT_PAREN, "Expect ')' after expression."); + } + + /// Emits bytecode for a number being loaded into the code. + fn number(self: *Compiler) !void { + var value: f64 = try std.fmt.parseFloat(f64, parser.previous.lexeme); + try self.emitConstant(value); + } + + /// Emits bytecode for a given unary. + fn unary(self: *Compiler) !void { + var ttype = self.parser.previous.ttype; + try self.expression(); + switch (ttype) { + .MINUS => try self.emitByte(OpCode.Negate), + else => unreachable, + } + } + + fn expression(self: *Compiler) !void {} + /// Compile the source given when initializing the compiler /// into the given chunk. pub fn compile(self: *Compiler, chunk: *Chunk) !bool { From 0f8e19adf18c663b1b0fdc2f1d76c7b9e752cd0f Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 21:32:25 -0300 Subject: [PATCH 38/68] compiler: finish parser (for math expressions) --- src/compiler.zig | 150 +++++++++++++++++++++++++++++++++++++++++++++-- src/vm.zig | 8 ++- 2 files changed, 152 insertions(+), 6 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index 3fea307..6695de9 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -13,7 +13,8 @@ const TokenType = tokens.TokenType; const Value = values.Value; const OpCode = chunks.OpCode; -pub const Parser = struct { +/// Holds parser state for the compiler. +const Parser = struct { previous: Token = undefined, current: Token = undefined, @@ -22,6 +23,93 @@ pub const Parser = struct { panicMode: bool = false, }; +/// Represents the order of operations in the parser. +const Precedence = enum(u5) { + None, + Assignment, // = + Or, // or + And, // and + Equality, // == != + Comparison, // < > <= >= + Term, // + - + Factor, // * / + Unary, // ! - + Call, // . () [] + Primary, +}; + +const ParseFn = fn (*Compiler) anyerror!void; + +const ParseRule = struct { + prefix: ?ParseFn = null, + infix: ?ParseFn = null, + precedence: Precedence = Precedence.None, +}; + +/// For each token, this defines a parse rule for it. +var rules = []ParseRule{ + // for LEFT_PAREN, we determine it as a call precedence + // plus a prefix parse function of grouping + ParseRule{ .prefix = Compiler.grouping, .precedence = .Call }, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + + // dot token, means a call too, for things like a.b + ParseRule{ .precedence = .Call }, + + // specific to -, as it can be an unary operator when its a prefix + // of something, or a binary one, when its a infix or another thing. + ParseRule{ + .prefix = Compiler.unary, + .infix = Compiler.binary, + .precedence = .Term, + }, + + ParseRule{ .infix = Compiler.binary, .precedence = .Term }, + ParseRule{}, + + // slash is a binary operator, as well as star. + ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, + ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, + + // as the token enum says, those are 1/2 char tokens. + ParseRule{}, + // this is specifically for the != operator + ParseRule{ .precedence = .Equality }, + ParseRule{}, + // this is specifically for the == operator + ParseRule{ .precedence = .Equality }, + + // all the comparison ones + ParseRule{ .precedence = .Comparison }, + ParseRule{ .precedence = .Comparison }, + ParseRule{ .precedence = .Comparison }, + ParseRule{ .precedence = .Comparison }, + + ParseRule{}, + ParseRule{}, + ParseRule{ .prefix = Compiler.number }, + ParseRule{ .precedence = .And }, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{ .precedence = .Or }, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, +}; + pub const Compiler = struct { src: []const u8, stdout: vm.StdOut, @@ -29,12 +117,14 @@ pub const Compiler = struct { parser: Parser, scanr: Scanner = undefined, chunk: *chunks.Chunk, + debug_flag: bool = false, pub fn init( allocator: *Allocator, chunk: *chunks.Chunk, stdout: vm.StdOut, source: []const u8, + debug_flag: bool, ) Compiler { return Compiler{ .src = source, @@ -42,6 +132,7 @@ pub const Compiler = struct { .allocator = allocator, .stdout = stdout, .parser = Parser{}, + .debug_flag = debug_flag, }; } @@ -117,6 +208,10 @@ pub const Compiler = struct { fn end(self: *Compiler) !void { try self.emitReturn(); + + if (self.debug_flag and !self.parser.hadError) { + try self.currentChunk().disassemble(self.stdout, "code"); + } } fn grouping(self: *Compiler) !void { @@ -126,21 +221,66 @@ pub const Compiler = struct { /// Emits bytecode for a number being loaded into the code. fn number(self: *Compiler) !void { - var value: f64 = try std.fmt.parseFloat(f64, parser.previous.lexeme); + var value: f64 = try std.fmt.parseFloat( + f64, + self.parser.previous.lexeme, + ); try self.emitConstant(value); } /// Emits bytecode for a given unary. fn unary(self: *Compiler) !void { var ttype = self.parser.previous.ttype; - try self.expression(); + try self.parsePrecedence(.Unary); + switch (ttype) { .MINUS => try self.emitByte(OpCode.Negate), else => unreachable, } } - fn expression(self: *Compiler) !void {} + fn binary(self: *Compiler) !void { + var op_type = self.parser.previous.ttype; + var rule: *ParseRule = self.getRule(op_type); + try self.parsePrecedence(@intToEnum(Precedence, @enumToInt(rule.precedence) + 1)); + + switch (op_type) { + .PLUS => try self.emitByte(OpCode.Add), + .MINUS => try self.emitByte(OpCode.Subtract), + .STAR => try self.emitByte(OpCode.Multiply), + .SLASH => try self.emitByte(OpCode.Divide), + else => unreachable, + } + } + + fn parsePrecedence(self: *Compiler, precedence: Precedence) !void { + try self.advance(); + var as_int = @enumToInt(precedence); + var prefix_rule_opt = self.getRule(self.parser.previous.ttype).prefix; + + if (prefix_rule_opt) |prefix_rule| { + try prefix_rule(self); + + while (as_int <= @enumToInt(self.getRule(self.parser.current.ttype).precedence)) { + try self.advance(); + var infix_rule_opt = self.getRule(self.parser.previous.ttype).infix; + if (infix_rule_opt) |infix_rule| { + try infix_rule(self); + } + } + } else { + self.errorPrevious("Expect expression."); + return; + } + } + + fn getRule(self: *Compiler, ttype: TokenType) *ParseRule { + return &rules[@enumToInt(ttype)]; + } + + fn expression(self: *Compiler) !void { + try self.parsePrecedence(.Assignment); + } /// Compile the source given when initializing the compiler /// into the given chunk. @@ -148,7 +288,7 @@ pub const Compiler = struct { self.scanr = try scanner.Scanner.init(self.allocator, self.src); try self.advance(); - //try self.expression(); + try self.expression(); try self.consume(.EOF, "Expect end of expression."); try self.end(); diff --git a/src/vm.zig b/src/vm.zig index 3375684..e58bc66 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -167,7 +167,13 @@ pub const VM = struct { //return res; var chk = try Chunk.init(self.allocator); - var cmpr = Compiler.init(self.allocator, &chk, self.stdout, self.src); + var cmpr = Compiler.init( + self.allocator, + &chk, + self.stdout, + self.src, + self.debug_flag, + ); if (!try cmpr.compile(&chk)) { return InterpretResult.CompileError; } From 589413488cfa91fd5744f7714afd204d590a5357 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 23:33:53 -0300 Subject: [PATCH 39/68] move towards dynamically typed values - remove InterpretResult as an enum, replace by error. - scanner: fix peekNext() - vm: add runtime errors, add VM.peek() --- src/compiler.zig | 5 ++-- src/main.zig | 25 ++++++++++++---- src/new_scanner.zig | 6 ++-- src/value.zig | 36 +++++++++++++++++++++-- src/vm.zig | 71 +++++++++++++++++++++++++++++++++------------ 5 files changed, 112 insertions(+), 31 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index 6695de9..ae21e84 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -167,8 +167,6 @@ pub const Compiler = struct { if (token_opt) |token| { self.parser.current = token; break; - } else { - self.errorCurrent(self.parser.current.lexeme); } } } @@ -221,11 +219,12 @@ pub const Compiler = struct { /// Emits bytecode for a number being loaded into the code. fn number(self: *Compiler) !void { + std.debug.warn("parsing number: '{}'\n", self.parser.previous.lexeme); var value: f64 = try std.fmt.parseFloat( f64, self.parser.previous.lexeme, ); - try self.emitConstant(value); + try self.emitConstant(values.NumberVal(value)); } /// Emits bytecode for a given unary. diff --git a/src/main.zig b/src/main.zig index 4611d6f..660a9e7 100644 --- a/src/main.zig +++ b/src/main.zig @@ -12,12 +12,12 @@ const InterpretResult = vm.InterpretResult; pub var hadError = false; -fn run(allocator: *Allocator, data: []u8) !InterpretResult { +fn run(allocator: *Allocator, data: []u8) !void { var stdout_file = try std.io.getStdOut(); const stdout = &stdout_file.outStream().stream; var vmach = try vm.VM.init(allocator, stdout, data, true); - return try vmach.interpret(); + try vmach.interpret(); } pub fn doError(line: usize, message: []const u8) !void { @@ -40,9 +40,14 @@ fn runFile(allocator: *Allocator, path: []const u8) !void { var slice = try allocator.alloc(u8, total_bytes); _ = try lox_file.read(slice); - var res = try run(allocator, slice); - if (res == vm.InterpretResult.CompileError) std.os.exit(65); - if (res == vm.InterpretResult.RuntimeError) std.os.exit(70); + run(allocator, slice) catch |err| { + switch (err) { + InterpretResult.Ok => {}, + InterpretResult.CompileError => std.os.exit(65), + InterpretResult.RuntimeError => std.os.exit(70), + else => return err, + } + }; } fn runPrompt(allocator: *Allocator) !void { @@ -59,7 +64,15 @@ fn runPrompt(allocator: *Allocator) !void { return err; }; - _ = try run(allocator, line); + run(allocator, line) catch |err| { + switch (err) { + InterpretResult.Ok => {}, + InterpretResult.CompileError, InterpretResult.RuntimeError => blk: { + try stdout.print("compile/runtime error.\n"); + }, + else => return err, + } + }; } } diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 0e2cbb0..1a8efc4 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -147,8 +147,8 @@ pub const Scanner = struct { } fn peekNext(self: *Scanner) u8 { - if (self.isAtEnd()) return 0; - return self.source[self.current - 1]; + if (self.current + 1 >= self.source.len) return 0; + return self.source[self.current + 1]; } fn skipWhitespace(self: *Scanner) void { @@ -255,10 +255,12 @@ pub const Scanner = struct { '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), '/' => blk: { + std.debug.warn("next: {}\n", self.peekNext()); if (self.peekNext() == '/') { while (self.peek() != '\n' and !self.isAtEnd()) { _ = self.advance(); } + break :blk null; } else { break :blk self.makeToken(.SLASH); diff --git a/src/value.zig b/src/value.zig index b97d402..aa33ecc 100644 --- a/src/value.zig +++ b/src/value.zig @@ -3,10 +3,42 @@ const std = @import("std"); const Allocator = std.mem.Allocator; // NOTE: right now, only numbers. -pub const Value = f64; + +pub const ValueType = enum(u8) { + Bool, + Nil, + Number, +}; + +pub const ValueValue = union(ValueType) { + Bool: bool, + Nil: void, + Number: f64, +}; + +pub const Value = struct { + vtype: ValueType, + as: ValueValue, +}; + +// helper functions +pub fn BoolVal(val: bool) Value { + return Value{ .vtype = .Bool, .as = ValueValue{ .Bool = val } }; +} + +pub fn NilVal() Value { + return Value{ .vtype = .Nil, .as = ValueValue{ .Nil = {} } }; +} + +pub fn NumberVal(val: f64) Value { + return Value{ .vtype = .Number, .as = ValueValue{ .Number = val } }; +} pub fn printValue(stdout: var, value: Value) !void { - try stdout.print("{}", value); + switch (value.as) { + .Number => try stdout.print("{}", value.as.Number), + else => unreachable, + } } pub const ValueList = struct { diff --git a/src/vm.zig b/src/vm.zig index e58bc66..398f9ab 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -1,6 +1,7 @@ const std = @import("std"); const chunk = @import("chunk.zig"); const value = @import("value.zig"); +const values = value; const compiler = @import("compiler.zig"); const Chunk = chunk.Chunk; @@ -9,7 +10,7 @@ const Compiler = compiler.Compiler; pub const StdOut = *std.io.OutStream(std.fs.File.WriteError); -pub const InterpretResult = enum { +pub const InterpretResult = error{ Ok, CompileError, RuntimeError, @@ -91,35 +92,50 @@ pub const VM = struct { } /// gets a f64 out of a value on the top of the stack. - fn popNum(self: *VM) f64 { - return self.pop(); + fn popNum(self: *VM) !f64 { + var val: Value = self.pop(); + + switch (val.vtype) { + .Number => return val.as.Number, + + else => |vtype| blk: { + self.runtimeError("Expected number, got {x}", vtype); + return InterpretResult.RuntimeError; + }, + } } fn doAdd(self: *VM) !void { - var b = self.popNum(); - var a = self.popNum(); - try self.push(a + b); + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.NumberVal(a + b)); } fn doSub(self: *VM) !void { - var b = self.popNum(); - var a = self.popNum(); - try self.push(a * b); + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.NumberVal(a - b)); } fn doMul(self: *VM) !void { - var b = self.popNum(); - var a = self.popNum(); - try self.push(a * b); + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.NumberVal(a * b)); } fn doDiv(self: *VM) !void { - var b = self.popNum(); - var a = self.popNum(); - try self.push(a / b); + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.NumberVal(a / b)); } - fn run(self: *VM) !InterpretResult { + fn runtimeError(self: *VM, comptime fmt: []const u8, args: ...) void { + std.debug.warn(fmt, args); + std.debug.warn("\n[line {}] in script\n", self.chk.lines[self.ip]); + self.resetStack(); + } + + fn run(self: *VM) !void { while (true) { if (self.debug_flag) { try self.debugStack(); @@ -150,7 +166,22 @@ pub const VM = struct { chunk.OpCode.Subtract => try self.doSub(), chunk.OpCode.Multiply => try self.doMul(), chunk.OpCode.Divide => try self.doDiv(), - chunk.OpCode.Negate => try self.push(-self.pop()), + chunk.OpCode.Negate => blk: { + var val = self.peek(0); + if (val.vtype != .Bool) { + self.runtimeError("Operand must be a number."); + return InterpretResult.RuntimeError; + } + + val = self.pop(); + switch (val.as) { + .Number => |num| { + try self.push(values.NumberVal(-num)); + }, + else => unreachable, + } + }, + else => blk: { std.debug.warn("Unknown instruction: {x}\n", instruction); return InterpretResult.RuntimeError; @@ -159,7 +190,7 @@ pub const VM = struct { } } - pub fn interpret(self: *VM) !InterpretResult { + pub fn interpret(self: *VM) !void { //self.ip = 0; //self.debug("VM start\n"); //var res = try self.run(); @@ -196,4 +227,8 @@ pub const VM = struct { self.stackTop -= 1; return self.stack[self.stackTop]; } + + pub fn peek(self: *VM, distance: usize) Value { + return self.stack[self.stackTop - 1 - distance]; + } }; From ec652b29d91ec247a68ea6724bc1b92bac8c4557 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 23:35:13 -0300 Subject: [PATCH 40/68] remove print debug --- src/new_scanner.zig | 1 - 1 file changed, 1 deletion(-) diff --git a/src/new_scanner.zig b/src/new_scanner.zig index 1a8efc4..93f8160 100644 --- a/src/new_scanner.zig +++ b/src/new_scanner.zig @@ -255,7 +255,6 @@ pub const Scanner = struct { '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), '/' => blk: { - std.debug.warn("next: {}\n", self.peekNext()); if (self.peekNext() == '/') { while (self.peek() != '\n' and !self.isAtEnd()) { _ = self.advance(); From 1d774c60117e1e5f189fe10c5fbe2761d932ac02 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 1 Jun 2019 23:44:59 -0300 Subject: [PATCH 41/68] add support for nil, true and false literal values --- src/chunk.zig | 11 +++++++++++ src/compiler.zig | 17 ++++++++++++++--- src/value.zig | 2 ++ src/vm.zig | 4 ++++ 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index a3008c7..c32345e 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -13,6 +13,11 @@ const AllOpcodes = struct { pub Multiply: u8 = 5, pub Divide: u8 = 6, pub Negate: u8 = 7, + + // basic type op codes + pub Nil: u8 = 8, + pub True: u8 = 9, + pub False: u8 = 10, }; pub const OpCode = AllOpcodes{}; @@ -163,6 +168,12 @@ pub const Chunk = struct { return try simpleInstruction(stdout, "OP_MULTIPLY", index); } else if (instruction == OpCode.Divide) { return try simpleInstruction(stdout, "OP_DIVIDE", index); + } else if (instruction == OpCode.Nil) { + return try simpleInstruction(stdout, "OP_NIL", index); + } else if (instruction == OpCode.True) { + return try simpleInstruction(stdout, "OP_TRUE", index); + } else if (instruction == OpCode.False) { + return try simpleInstruction(stdout, "OP_FALSE", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/compiler.zig b/src/compiler.zig index ae21e84..350e6ad 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -94,17 +94,19 @@ var rules = []ParseRule{ ParseRule{ .precedence = .And }, ParseRule{}, ParseRule{}, + + // false + ParseRule{ .prefix = Compiler.literal }, ParseRule{}, ParseRule{}, ParseRule{}, - ParseRule{}, - ParseRule{}, + ParseRule{ .prefix = Compiler.literal }, ParseRule{ .precedence = .Or }, ParseRule{}, ParseRule{}, ParseRule{}, ParseRule{}, - ParseRule{}, + ParseRule{ .prefix = Compiler.literal }, ParseRule{}, ParseRule{}, ParseRule{}, @@ -252,6 +254,15 @@ pub const Compiler = struct { } } + fn literal(self: *Compiler) !void { + switch (self.parser.previous.ttype) { + .FALSE => try self.emitByte(OpCode.False), + .NIL => try self.emitByte(OpCode.Nil), + .TRUE => try self.emitByte(OpCode.True), + else => unreachable, + } + } + fn parsePrecedence(self: *Compiler, precedence: Precedence) !void { try self.advance(); var as_int = @enumToInt(precedence); diff --git a/src/value.zig b/src/value.zig index aa33ecc..713c4b1 100644 --- a/src/value.zig +++ b/src/value.zig @@ -36,6 +36,8 @@ pub fn NumberVal(val: f64) Value { pub fn printValue(stdout: var, value: Value) !void { switch (value.as) { + .Nil => try stdout.print("nil"), + .Bool => try stdout.print("{}", value.as.Bool), .Number => try stdout.print("{}", value.as.Number), else => unreachable, } diff --git a/src/vm.zig b/src/vm.zig index 398f9ab..f1ce443 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -162,6 +162,10 @@ pub const VM = struct { return InterpretResult.Ok; }, + chunk.OpCode.Nil => try self.push(values.NilVal()), + chunk.OpCode.True => try self.push(values.BoolVal(true)), + chunk.OpCode.False => try self.push(values.BoolVal(false)), + chunk.OpCode.Add => try self.doAdd(), chunk.OpCode.Subtract => try self.doSub(), chunk.OpCode.Multiply => try self.doMul(), From c5d704a34ff6377dea5d651599864a9385d5c936 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 00:02:37 -0300 Subject: [PATCH 42/68] add not operator --- README.md | 17 +++++++++++++++-- src/chunk.zig | 2 ++ src/compiler.zig | 3 ++- src/vm.zig | 7 +++++++ 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2168721..b8e6fc4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,18 @@ # jorts -an interpreter for the lox language from https://craftinginterpreters.com +a compiler for the lox language from https://craftinginterpreters.com -this is a learning project. +this is a learning project. the implemtation is based heavily off the C part +of the book, but also the Java part for the scanner. + +## notes + + - jorts' lox bytecode is not compatible with any implementation. + +## how do? + +``` +zig build run +``` + +and play around with it diff --git a/src/chunk.zig b/src/chunk.zig index c32345e..3f8b2f6 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -18,6 +18,8 @@ const AllOpcodes = struct { pub Nil: u8 = 8, pub True: u8 = 9, pub False: u8 = 10, + + pub Not: u8 = 11, }; pub const OpCode = AllOpcodes{}; diff --git a/src/compiler.zig b/src/compiler.zig index 350e6ad..5928b95 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -75,7 +75,7 @@ var rules = []ParseRule{ ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, // as the token enum says, those are 1/2 char tokens. - ParseRule{}, + ParseRule{ .prefix = Compiler.unary }, // this is specifically for the != operator ParseRule{ .precedence = .Equality }, ParseRule{}, @@ -236,6 +236,7 @@ pub const Compiler = struct { switch (ttype) { .MINUS => try self.emitByte(OpCode.Negate), + .BANG => try self.emitByte(OpCode.Not), else => unreachable, } } diff --git a/src/vm.zig b/src/vm.zig index f1ce443..5b89051 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -16,6 +16,10 @@ pub const InterpretResult = error{ RuntimeError, }; +fn isFalsey(val: value.Value) bool { + return val.vtype == .Nil or (val.vtype == .Bool and !val.as.Bool); +} + pub const VM = struct { chk: *Chunk = undefined, src: []const u8, @@ -170,6 +174,9 @@ pub const VM = struct { chunk.OpCode.Subtract => try self.doSub(), chunk.OpCode.Multiply => try self.doMul(), chunk.OpCode.Divide => try self.doDiv(), + chunk.OpCode.Not => blk: { + try self.push(values.BoolVal(isFalsey(self.pop()))); + }, chunk.OpCode.Negate => blk: { var val = self.peek(0); if (val.vtype != .Bool) { From 39e28f01ace285e3e123da093e3d702cae3bd020 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 00:03:54 -0300 Subject: [PATCH 43/68] chunk: add printing of OP_NOT --- src/chunk.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/chunk.zig b/src/chunk.zig index 3f8b2f6..eb002d6 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -176,6 +176,8 @@ pub const Chunk = struct { return try simpleInstruction(stdout, "OP_TRUE", index); } else if (instruction == OpCode.False) { return try simpleInstruction(stdout, "OP_FALSE", index); + } else if (instruction == OpCode.Not) { + return try simpleInstruction(stdout, "OP_NOT", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; From 15c58a2216c583b9bbf7d08686a68f952046d88e Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 00:16:33 -0300 Subject: [PATCH 44/68] comparison operators, part 1 --- src/chunk.zig | 5 +++++ src/compiler.zig | 23 ++++++++++++++++------- src/vm.zig | 17 +++++++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index eb002d6..31985eb 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -20,6 +20,11 @@ const AllOpcodes = struct { pub False: u8 = 10, pub Not: u8 = 11, + + // comparison op codes! + pub Equal: u8 = 12, + pub Greater: u8 = 13, + pub Less: u8 = 14, }; pub const OpCode = AllOpcodes{}; diff --git a/src/compiler.zig b/src/compiler.zig index 5928b95..17b8a0b 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -77,16 +77,16 @@ var rules = []ParseRule{ // as the token enum says, those are 1/2 char tokens. ParseRule{ .prefix = Compiler.unary }, // this is specifically for the != operator - ParseRule{ .precedence = .Equality }, + ParseRule{ .infix = Compiler.binary, .precedence = .Equality }, ParseRule{}, // this is specifically for the == operator - ParseRule{ .precedence = .Equality }, + ParseRule{ .infix = Compiler.binary, .precedence = .Equality }, // all the comparison ones - ParseRule{ .precedence = .Comparison }, - ParseRule{ .precedence = .Comparison }, - ParseRule{ .precedence = .Comparison }, - ParseRule{ .precedence = .Comparison }, + ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, + ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, + ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, + ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, ParseRule{}, ParseRule{}, @@ -190,7 +190,7 @@ pub const Compiler = struct { try self.currentChunk().write(byte, self.parser.previous.line); } - fn emitBytes(self: *Compiler, byte1: u8, byte2: u82) !void { + fn emitBytes(self: *Compiler, byte1: u8, byte2: u8) !void { try self.emitByte(byte1); try self.emitByte(byte2); } @@ -251,6 +251,15 @@ pub const Compiler = struct { .MINUS => try self.emitByte(OpCode.Subtract), .STAR => try self.emitByte(OpCode.Multiply), .SLASH => try self.emitByte(OpCode.Divide), + + .EQUAL_EQUAL => try self.emitByte(OpCode.Equal), + .GREATER => try self.emitByte(OpCode.Greater), + .LESS => try self.emitByte(OpCode.Less), + + .BANG_EQUAL => try self.emitBytes(OpCode.Equal, OpCode.Not), + .GREATER_EQUAL => try self.emitBytes(OpCode.Less, OpCode.Not), + .LESS_EQUAL => try self.emitBytes(OpCode.Greater, OpCode.Not), + else => unreachable, } } diff --git a/src/vm.zig b/src/vm.zig index 5b89051..d712897 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -20,6 +20,16 @@ fn isFalsey(val: value.Value) bool { return val.vtype == .Nil or (val.vtype == .Bool and !val.as.Bool); } +fn valuesEqual(a: value.Value, b: value.Value) bool { + if (a.vtype != b.vtype) return false; + + switch (a.vtype) { + .Nil => return true, + .Bool => return a.as.Bool == b.as.Bool, + .Number => return a.as.Number == b.as.Number, + } +} + pub const VM = struct { chk: *Chunk = undefined, src: []const u8, @@ -170,6 +180,12 @@ pub const VM = struct { chunk.OpCode.True => try self.push(values.BoolVal(true)), chunk.OpCode.False => try self.push(values.BoolVal(false)), + chunk.OpCode.Equal => blk: { + var a = self.pop(); + var b = self.pop(); + try self.push(values.BoolVal(valuesEqual(a, b))); + }, + chunk.OpCode.Add => try self.doAdd(), chunk.OpCode.Subtract => try self.doSub(), chunk.OpCode.Multiply => try self.doMul(), @@ -177,6 +193,7 @@ pub const VM = struct { chunk.OpCode.Not => blk: { try self.push(values.BoolVal(isFalsey(self.pop()))); }, + chunk.OpCode.Negate => blk: { var val = self.peek(0); if (val.vtype != .Bool) { From 71dba5c77d82298975fae5d4ee6ae330ef3da799 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 00:23:50 -0300 Subject: [PATCH 45/68] vm: add greater and less --- src/chunk.zig | 6 ++++++ src/compiler.zig | 1 - src/vm.zig | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/chunk.zig b/src/chunk.zig index 31985eb..50aca63 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -183,6 +183,12 @@ pub const Chunk = struct { return try simpleInstruction(stdout, "OP_FALSE", index); } else if (instruction == OpCode.Not) { return try simpleInstruction(stdout, "OP_NOT", index); + } else if (instruction == OpCode.Equal) { + return try simpleInstruction(stdout, "OP_EQUAL", index); + } else if (instruction == OpCode.Greater) { + return try simpleInstruction(stdout, "OP_GREATER", index); + } else if (instruction == OpCode.Less) { + return try simpleInstruction(stdout, "OP_LESS", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/compiler.zig b/src/compiler.zig index 17b8a0b..459adda 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -221,7 +221,6 @@ pub const Compiler = struct { /// Emits bytecode for a number being loaded into the code. fn number(self: *Compiler) !void { - std.debug.warn("parsing number: '{}'\n", self.parser.previous.lexeme); var value: f64 = try std.fmt.parseFloat( f64, self.parser.previous.lexeme, diff --git a/src/vm.zig b/src/vm.zig index d712897..1fbf0ac 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -143,6 +143,18 @@ pub const VM = struct { try self.push(values.NumberVal(a / b)); } + fn doGreater(self: *VM) !void { + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.BoolVal(a > b)); + } + + fn doLess(self: *VM) !void { + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.BoolVal(a < b)); + } + fn runtimeError(self: *VM, comptime fmt: []const u8, args: ...) void { std.debug.warn(fmt, args); std.debug.warn("\n[line {}] in script\n", self.chk.lines[self.ip]); @@ -186,6 +198,9 @@ pub const VM = struct { try self.push(values.BoolVal(valuesEqual(a, b))); }, + chunk.OpCode.Greater => try self.doGreater(), + chunk.OpCode.Less => try self.doLess(), + chunk.OpCode.Add => try self.doAdd(), chunk.OpCode.Subtract => try self.doSub(), chunk.OpCode.Multiply => try self.doMul(), From 44c27f43b7c31d93c6e59665ea7ab268101055a8 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 13:17:32 -0300 Subject: [PATCH 46/68] move new_scanner.zig to scanner.zig --- src/compiler.zig | 2 +- src/new_scanner.zig | 276 -------------------------------------------- src/scanner.zig | 230 ++++++++++++++---------------------- 3 files changed, 88 insertions(+), 420 deletions(-) delete mode 100644 src/new_scanner.zig diff --git a/src/compiler.zig b/src/compiler.zig index 459adda..7b75e0e 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const scanner = @import("new_scanner.zig"); +const scanner = @import("scanner.zig"); const vm = @import("vm.zig"); const chunks = @import("chunk.zig"); const tokens = @import("token.zig"); diff --git a/src/new_scanner.zig b/src/new_scanner.zig deleted file mode 100644 index 93f8160..0000000 --- a/src/new_scanner.zig +++ /dev/null @@ -1,276 +0,0 @@ -const std = @import("std"); -const tokens = @import("token.zig"); - -const Token = tokens.Token; -const TokenType = tokens.TokenType; - -const Allocator = std.mem.Allocator; - -pub const TokenError = error{ - Unexpected, - Unterminated, -}; - -fn isDigit(char: u8) bool { - return char >= '0' and char <= '9'; -} - -fn isAlpha(c: u8) bool { - return (c >= 'a' and c <= 'z') or - (c >= 'A' and c <= 'Z') or - c == '_'; -} - -fn isAlphaNumeric(char: u8) bool { - return isAlpha(char) or isDigit(char); -} - -pub const KeywordMap = std.AutoHashMap([]const u8, u6); - -/// The book does say that C doesn't have hashmaps. but Zig does. and I can -/// use it here. -fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { - var map = KeywordMap.init(allocator); - - const keywords = [][]const u8{ - "and"[0..], - "class"[0..], - "else"[0..], - "false"[0..], - "for"[0..], - "fun"[0..], - "if"[0..], - "nil"[0..], - "or"[0..], - "print"[0..], - "return"[0..], - "super"[0..], - "this"[0..], - "true"[0..], - "var"[0..], - "while"[0..], - }; - - const tags = []TokenType{ - TokenType.AND, - TokenType.CLASS, - TokenType.ELSE, - TokenType.FALSE, - TokenType.FOR, - TokenType.FUN, - TokenType.IF, - TokenType.NIL, - TokenType.OR, - TokenType.PRINT, - TokenType.RETURN, - TokenType.SUPER, - TokenType.THIS, - TokenType.TRUE, - TokenType.VAR, - TokenType.WHILE, - }; - - for (keywords) |keyword, idx| { - var tag = @enumToInt(tags[idx]); - _ = try map.put(keyword, tag); - } - - return map; -} - -pub const Scanner = struct { - source: []const u8, - keywords: KeywordMap, - - start: usize = 0, - current: usize = 0, - line: usize = 1, - - allocator: *Allocator, - - pub fn init(allocator: *Allocator, data: []const u8) !Scanner { - return Scanner{ - .source = data, - .keywords = try initKeywordMap(allocator), - .allocator = allocator, - }; - } - - fn isAtEnd(self: *Scanner) bool { - return self.current >= self.source.len; - } - - fn advance(self: *Scanner) u8 { - self.current += 1; - return self.source[self.current - 1]; - } - - pub fn currentLexeme(self: *Scanner) []const u8 { - return self.source[self.start..self.current]; - } - - fn makeToken(self: *Scanner, ttype: TokenType) Token { - return Token{ - .ttype = ttype, - .lexeme = self.currentLexeme(), - .line = self.line, - }; - } - - /// Check if the next character matches what is expected. - fn match(self: *Scanner, expected: u8) bool { - if (self.isAtEnd()) return false; - if (self.source[self.current] != expected) return false; - - self.current += 1; - return true; - } - - /// Add a SimpleToken of type_match if the next character is - /// `expected`. Adds a SimpleToken of type_nomatch when it is not. - fn makeMatchToken( - self: *Scanner, - expected: u8, - type_match: TokenType, - type_nomatch: TokenType, - ) Token { - if (self.match(expected)) { - return self.makeToken(type_match); - } else { - return self.makeToken(type_nomatch); - } - } - - fn peek(self: *Scanner) u8 { - if (self.isAtEnd()) return 0; - return self.source[self.current]; - } - - fn peekNext(self: *Scanner) u8 { - if (self.current + 1 >= self.source.len) return 0; - return self.source[self.current + 1]; - } - - fn skipWhitespace(self: *Scanner) void { - while (true) { - var c = self.peek(); - switch (c) { - ' ', '\r', '\t' => blk: { - _ = self.advance(); - }, - '\n' => blk: { - self.line += 1; - _ = self.advance(); - }, - else => return, - } - } - } - - fn doString(self: *Scanner) !Token { - // consume entire string - while (self.peek() != '"' and !self.isAtEnd()) { - if (self.peek() == '\n') self.line += 1; - _ = self.advance(); - } - - // unterminated string. - if (self.isAtEnd()) { - return TokenError.Unterminated; - } - - // the closing ". - _ = self.advance(); - - // trim the surrounding quotes. - return self.makeToken(.STRING); - } - - /// Consume a number - fn doNumber(self: *Scanner) Token { - while (isDigit(self.peek())) { - _ = self.advance(); - } - - // check if its a number like 12.34, where the '.' character - // exists and the one next to it is a digit. - if (self.peek() == '.' and isDigit(self.peekNext())) { - _ = self.advance(); - - while (isDigit(self.peek())) { - _ = self.advance(); - } - } - - return self.makeToken(.NUMBER); - } - - /// Either a keyword or an identifier come out of this. - fn doIdentifier(self: *Scanner) Token { - while (isAlphaNumeric(self.peek())) { - _ = self.advance(); - } - - // after reading the identifier, we check - // if it is any of our keywords, if it is, then we add - // the specificed keyword type. if not, just .IDENTIFIER - var text = self.source[self.start..self.current]; - var type_opt = self.keywords.get(text); - var toktype: TokenType = undefined; - - if (type_opt) |kv| { - toktype = @intToEnum(TokenType, kv.value); - } else { - toktype = TokenType.IDENTIFIER; - } - - return self.makeToken(toktype); - } - - pub fn scanToken(self: *Scanner) !?Token { - self.skipWhitespace(); - self.start = self.current; - - if (self.isAtEnd()) return self.makeToken(TokenType.EOF); - - var c = self.advance(); - if (isAlpha(c)) return self.doIdentifier(); - if (isDigit(c)) return self.doNumber(); - - var token = switch (c) { - '(' => self.makeToken(.LEFT_PAREN), - ')' => self.makeToken(.RIGHT_PAREN), - '{' => self.makeToken(.LEFT_BRACE), - '}' => self.makeToken(.RIGHT_BRACE), - ',' => self.makeToken(.COMMA), - '.' => self.makeToken(.DOT), - '-' => self.makeToken(.MINUS), - '+' => self.makeToken(.PLUS), - ';' => self.makeToken(.SEMICOLON), - '*' => self.makeToken(.STAR), - - '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), - '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), - '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), - '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), - - '/' => blk: { - if (self.peekNext() == '/') { - while (self.peek() != '\n' and !self.isAtEnd()) { - _ = self.advance(); - } - - break :blk null; - } else { - break :blk self.makeToken(.SLASH); - } - }, - - '"' => try self.doString(), - - else => return TokenError.Unexpected, - }; - - return token; - } -}; diff --git a/src/scanner.zig b/src/scanner.zig index b83a7a4..93f8160 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -1,10 +1,15 @@ const std = @import("std"); +const tokens = @import("token.zig"); -const token = @import("token.zig"); -const main = @import("main.zig"); +const Token = tokens.Token; +const TokenType = tokens.TokenType; -const TokenList = std.ArrayList(token.Token); -const TokenType = token.TokenType; +const Allocator = std.mem.Allocator; + +pub const TokenError = error{ + Unexpected, + Unterminated, +}; fn isDigit(char: u8) bool { return char >= '0' and char <= '9'; @@ -22,6 +27,8 @@ fn isAlphaNumeric(char: u8) bool { pub const KeywordMap = std.AutoHashMap([]const u8, u6); +/// The book does say that C doesn't have hashmaps. but Zig does. and I can +/// use it here. fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { var map = KeywordMap.init(allocator); @@ -72,19 +79,20 @@ fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { } pub const Scanner = struct { - source: []u8, - tokens: TokenList, + source: []const u8, keywords: KeywordMap, start: usize = 0, current: usize = 0, line: usize = 1, - pub fn init(allocator: *std.mem.Allocator, data: []u8) !Scanner { + allocator: *Allocator, + + pub fn init(allocator: *Allocator, data: []const u8) !Scanner { return Scanner{ .source = data, - .tokens = TokenList.init(allocator), .keywords = try initKeywordMap(allocator), + .allocator = allocator, }; } @@ -97,49 +105,16 @@ pub const Scanner = struct { return self.source[self.current - 1]; } - pub fn currentLexeme(self: *Scanner) []u8 { + pub fn currentLexeme(self: *Scanner) []const u8 { return self.source[self.start..self.current]; } - fn addSimpleToken(self: *Scanner, ttype: token.TokenType) !void { - try self.addToken(token.Token{ - .Simple = token.SimpleToken.init( - ttype, - self.currentLexeme(), - self.line, - {}, - ), - }); - } - - fn addSliceToken(self: *Scanner, ttype: token.TokenType, slice: []u8) !void { - try self.addToken(token.Token{ - .Slice = token.SliceToken.init( - ttype, - self.currentLexeme(), - self.line, - slice, - ), - }); - } - - /// Keep in mind Lox only has a single number type and that is a float one. - fn addNumberToken(self: *Scanner, ttype: token.TokenType, num: f32) !void { - try self.addToken(token.Token{ - .Number = token.NumberToken.init( - ttype, - self.currentLexeme(), - self.line, - num, - ), - }); - } - - fn addToken( - self: *Scanner, - tok: token.Token, - ) !void { - try self.tokens.append(tok); + fn makeToken(self: *Scanner, ttype: TokenType) Token { + return Token{ + .ttype = ttype, + .lexeme = self.currentLexeme(), + .line = self.line, + }; } /// Check if the next character matches what is expected. @@ -153,16 +128,16 @@ pub const Scanner = struct { /// Add a SimpleToken of type_match if the next character is /// `expected`. Adds a SimpleToken of type_nomatch when it is not. - fn addMatchToken( + fn makeMatchToken( self: *Scanner, expected: u8, - type_match: token.TokenType, - type_nomatch: token.TokenType, - ) !void { + type_match: TokenType, + type_nomatch: TokenType, + ) Token { if (self.match(expected)) { - try self.addSimpleToken(type_match); + return self.makeToken(type_match); } else { - try self.addSimpleToken(type_nomatch); + return self.makeToken(type_nomatch); } } @@ -171,7 +146,28 @@ pub const Scanner = struct { return self.source[self.current]; } - fn doString(self: *Scanner) !void { + fn peekNext(self: *Scanner) u8 { + if (self.current + 1 >= self.source.len) return 0; + return self.source[self.current + 1]; + } + + fn skipWhitespace(self: *Scanner) void { + while (true) { + var c = self.peek(); + switch (c) { + ' ', '\r', '\t' => blk: { + _ = self.advance(); + }, + '\n' => blk: { + self.line += 1; + _ = self.advance(); + }, + else => return, + } + } + } + + fn doString(self: *Scanner) !Token { // consume entire string while (self.peek() != '"' and !self.isAtEnd()) { if (self.peek() == '\n') self.line += 1; @@ -180,27 +176,18 @@ pub const Scanner = struct { // unterminated string. if (self.isAtEnd()) { - try main.doError(self.line, "Unterminated string."); - return; + return TokenError.Unterminated; } // the closing ". _ = self.advance(); // trim the surrounding quotes. - try self.addSliceToken( - .STRING, - self.source[self.start + 1 .. self.current - 1], - ); - } - - fn peekNext(self: *Scanner) u8 { - if (self.current + 1 >= self.source.len) return 0; - return self.source[self.current + 1]; + return self.makeToken(.STRING); } /// Consume a number - fn doNumber(self: *Scanner) !void { + fn doNumber(self: *Scanner) Token { while (isDigit(self.peek())) { _ = self.advance(); } @@ -215,17 +202,11 @@ pub const Scanner = struct { } } - // after going through all of the number, we can just use fmt.parseFloat - - var num = try std.fmt.parseFloat( - f32, - self.source[self.start..self.current], - ); - - try self.addNumberToken(.NUMBER, num); + return self.makeToken(.NUMBER); } - fn doIdentifier(self: *Scanner) !void { + /// Either a keyword or an identifier come out of this. + fn doIdentifier(self: *Scanner) Token { while (isAlphaNumeric(self.peek())) { _ = self.advance(); } @@ -243,90 +224,53 @@ pub const Scanner = struct { toktype = TokenType.IDENTIFIER; } - try self.addSimpleToken(toktype); + return self.makeToken(toktype); } - /// Scan through our tokens and add them to the Scanner's token list. - fn scanToken(self: *Scanner) !void { + pub fn scanToken(self: *Scanner) !?Token { + self.skipWhitespace(); + self.start = self.current; + + if (self.isAtEnd()) return self.makeToken(TokenType.EOF); + var c = self.advance(); + if (isAlpha(c)) return self.doIdentifier(); + if (isDigit(c)) return self.doNumber(); - switch (c) { - '(' => try self.addSimpleToken(.LEFT_PAREN), - ')' => try self.addSimpleToken(.RIGHT_PAREN), - '{' => try self.addSimpleToken(.LEFT_BRACE), - '}' => try self.addSimpleToken(.RIGHT_BRACE), - ',' => try self.addSimpleToken(.COMMA), - '.' => try self.addSimpleToken(.DOT), - '-' => try self.addSimpleToken(.MINUS), - '+' => try self.addSimpleToken(.PLUS), - ';' => try self.addSimpleToken(.SEMICOLON), - '*' => try self.addSimpleToken(.STAR), + var token = switch (c) { + '(' => self.makeToken(.LEFT_PAREN), + ')' => self.makeToken(.RIGHT_PAREN), + '{' => self.makeToken(.LEFT_BRACE), + '}' => self.makeToken(.RIGHT_BRACE), + ',' => self.makeToken(.COMMA), + '.' => self.makeToken(.DOT), + '-' => self.makeToken(.MINUS), + '+' => self.makeToken(.PLUS), + ';' => self.makeToken(.SEMICOLON), + '*' => self.makeToken(.STAR), - '!' => try self.addMatchToken('=', .BANG_EQUAL, .BANG), - '=' => try self.addMatchToken('=', .EQUAL_EQUAL, .EQUAL), - '<' => try self.addMatchToken('=', .LESS_EQUAL, .LESS), - '>' => try self.addMatchToken('=', .GREATER_EQUAL, .GREATER), + '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), + '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), + '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), + '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), '/' => blk: { - // consume comments - if (self.match('/')) { + if (self.peekNext() == '/') { while (self.peek() != '\n' and !self.isAtEnd()) { _ = self.advance(); } - } else if (self.match('*')) { - // multiline block comments are messier to work with, but - // we can still do it! - while (true) { - if (self.isAtEnd()) break; - // check '*/' - if (self.peek() == '*' and self.peekNext() == '/') { - self.current += 2; - break; - } - _ = self.advance(); - } + break :blk null; } else { - try self.addSimpleToken(.SLASH); + break :blk self.makeToken(.SLASH); } }, - ' ', '\r', '\t' => blk: {}, - '\n' => blk: { - self.line += 1; - }, - '"' => try self.doString(), - else => { - if (isDigit(c)) { - try self.doNumber(); - } else if (isAlpha(c)) { - try self.doIdentifier(); - } else { - try main.doError(self.line, "Unexpected character"); - } - }, - } - } + else => return TokenError.Unexpected, + }; - pub fn scanTokens(self: *Scanner) !TokenList { - // while we aren't at the end, we're still consuming - // tokens. - while (!self.isAtEnd()) { - self.start = self.current; - try self.scanToken(); - } - - try self.addToken(token.Token{ - .Simple = token.SimpleToken.init( - .EOF, - "", - self.line, - {}, - ), - }); - - return self.tokens; + return token; } }; From 9ac5fccc2b71aea8fc8a2703dbf0e0ed72bb7449 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 14:01:54 -0300 Subject: [PATCH 47/68] add objects and string comparison --- src/compiler.zig | 10 +++++++++- src/object.zig | 34 ++++++++++++++++++++++++++++++++++ src/value.zig | 14 ++++++++++++-- src/vm.zig | 5 +++++ 4 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 src/object.zig diff --git a/src/compiler.zig b/src/compiler.zig index 7b75e0e..2aaff70 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -4,6 +4,7 @@ const vm = @import("vm.zig"); const chunks = @import("chunk.zig"); const tokens = @import("token.zig"); const values = @import("value.zig"); +const objects = @import("object.zig"); const Allocator = std.mem.Allocator; const Scanner = scanner.Scanner; @@ -89,7 +90,7 @@ var rules = []ParseRule{ ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, ParseRule{}, - ParseRule{}, + ParseRule{ .prefix = Compiler.string }, ParseRule{ .prefix = Compiler.number }, ParseRule{ .precedence = .And }, ParseRule{}, @@ -228,6 +229,13 @@ pub const Compiler = struct { try self.emitConstant(values.NumberVal(value)); } + fn string(self: *Compiler) !void { + try self.emitConstant(values.ObjVal(try objects.copyString( + self.allocator, + self.parser.previous.lexeme, + ))); + } + /// Emits bytecode for a given unary. fn unary(self: *Compiler) !void { var ttype = self.parser.previous.ttype; diff --git a/src/object.zig b/src/object.zig new file mode 100644 index 0000000..0971fe3 --- /dev/null +++ b/src/object.zig @@ -0,0 +1,34 @@ +const std = @import("std"); + +const Allocator = std.mem.Allocator; + +pub const ObjType = enum { + String, +}; + +pub const ObjValue = struct { + String: []u8, +}; + +pub const Object = struct { + otype: ObjType, + value: ObjValue, +}; + +pub fn copyString(allocator: *Allocator, data: []const u8) !*Object { + var str = try allocator.alloc(u8, data.len); + std.mem.copy(u8, str, data); + + var obj = try allocator.create(Object); + obj.otype = ObjType.String; + obj.value = ObjValue{ .String = str }; + + return obj; +} + +pub fn printObject(stdout: var, obj: Object) !void { + switch (obj.otype) { + .String => try stdout.print("{}", obj.value.String), + else => unreachable, + } +} diff --git a/src/value.zig b/src/value.zig index 713c4b1..b1c927c 100644 --- a/src/value.zig +++ b/src/value.zig @@ -1,19 +1,20 @@ const std = @import("std"); +const objects = @import("object.zig"); const Allocator = std.mem.Allocator; -// NOTE: right now, only numbers. - pub const ValueType = enum(u8) { Bool, Nil, Number, + Object, }; pub const ValueValue = union(ValueType) { Bool: bool, Nil: void, Number: f64, + Object: *objects.Object, }; pub const Value = struct { @@ -34,11 +35,20 @@ pub fn NumberVal(val: f64) Value { return Value{ .vtype = .Number, .as = ValueValue{ .Number = val } }; } +pub fn ObjVal(val: *objects.Object) Value { + return Value{ .vtype = .Object, .as = ValueValue{ .Object = val } }; +} + +pub fn isObjType(val: Value, otype: objects.ObjType) bool { + return val.vtype == .Object and val.as.Object.otype == otype; +} + pub fn printValue(stdout: var, value: Value) !void { switch (value.as) { .Nil => try stdout.print("nil"), .Bool => try stdout.print("{}", value.as.Bool), .Number => try stdout.print("{}", value.as.Number), + .Object => try objects.printObject(stdout, value.as.Object.*), else => unreachable, } } diff --git a/src/vm.zig b/src/vm.zig index 1fbf0ac..302f371 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -27,6 +27,11 @@ fn valuesEqual(a: value.Value, b: value.Value) bool { .Nil => return true, .Bool => return a.as.Bool == b.as.Bool, .Number => return a.as.Number == b.as.Number, + .Object => blk: { + var aStr = a.as.Object.value.String; + var bStr = b.as.Object.value.String; + return std.mem.compare(u8, aStr, bStr) == .Equal; + }, } } From 38715af2000a8f5686792996fa94d368d93d0165 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 14:18:01 -0300 Subject: [PATCH 48/68] vm: add string concatenation - compiler: fix string creation --- src/compiler.zig | 3 ++- src/object.zig | 16 ++++++++++++---- src/vm.zig | 21 +++++++++++++++++++++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index 2aaff70..81fe6be 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -230,9 +230,10 @@ pub const Compiler = struct { } fn string(self: *Compiler) !void { + const lexeme_len = self.parser.previous.lexeme.len; try self.emitConstant(values.ObjVal(try objects.copyString( self.allocator, - self.parser.previous.lexeme, + self.parser.previous.lexeme[1 .. lexeme_len - 1], ))); } diff --git a/src/object.zig b/src/object.zig index 0971fe3..c47cf65 100644 --- a/src/object.zig +++ b/src/object.zig @@ -15,15 +15,23 @@ pub const Object = struct { value: ObjValue, }; +fn createString(allocator: *Allocator, data: []u8) !*Object { + var obj = try allocator.create(Object); + obj.otype = ObjType.String; + obj.value = ObjValue{ .String = data }; + return obj; +} + pub fn copyString(allocator: *Allocator, data: []const u8) !*Object { var str = try allocator.alloc(u8, data.len); std.mem.copy(u8, str, data); - var obj = try allocator.create(Object); - obj.otype = ObjType.String; - obj.value = ObjValue{ .String = str }; + return try createString(allocator, str); +} - return obj; +/// Assumes it can take ownership of the given data. +pub fn takeString(allocator: *Allocator, data: []u8) !*Object { + return try createString(allocator, data); } pub fn printObject(stdout: var, obj: Object) !void { diff --git a/src/vm.zig b/src/vm.zig index 302f371..40c1ac6 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -3,6 +3,7 @@ const chunk = @import("chunk.zig"); const value = @import("value.zig"); const values = value; const compiler = @import("compiler.zig"); +const objects = @import("object.zig"); const Chunk = chunk.Chunk; const Value = value.Value; @@ -124,7 +125,27 @@ pub const VM = struct { } } + fn concatenateStrings(self: *VM) !void { + var b = self.pop().as.Object.value.String; + var a = self.pop().as.Object.value.String; + + var res_str = try std.mem.join( + self.allocator, + "", + [][]u8{ a, b }, + ); + + var val = values.ObjVal(try objects.takeString(self.allocator, res_str)); + try self.push(val); + } + fn doAdd(self: *VM) !void { + if (values.isObjType(self.peek(0), .String) and + values.isObjType(self.peek(1), .String)) + { + return try self.concatenateStrings(); + } + var b = try self.popNum(); var a = try self.popNum(); try self.push(values.NumberVal(a + b)); From cf53b6fc8633bde08c7c4439c2cc5673e52e7b41 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 14:52:19 -0300 Subject: [PATCH 49/68] add basics of virtual machine object list - object: move functions to accept VM pointer, not Allocator --- src/compiler.zig | 6 +++++- src/object.zig | 32 ++++++++++++++++++++++---------- src/vm.zig | 7 +++++-- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/compiler.zig b/src/compiler.zig index 81fe6be..8666e11 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -121,6 +121,7 @@ pub const Compiler = struct { scanr: Scanner = undefined, chunk: *chunks.Chunk, debug_flag: bool = false, + vmach: *vm.VM, pub fn init( allocator: *Allocator, @@ -128,6 +129,7 @@ pub const Compiler = struct { stdout: vm.StdOut, source: []const u8, debug_flag: bool, + vmach: *vm.VM, ) Compiler { return Compiler{ .src = source, @@ -136,6 +138,7 @@ pub const Compiler = struct { .stdout = stdout, .parser = Parser{}, .debug_flag = debug_flag, + .vmach = vmach, }; } @@ -231,8 +234,9 @@ pub const Compiler = struct { fn string(self: *Compiler) !void { const lexeme_len = self.parser.previous.lexeme.len; + try self.emitConstant(values.ObjVal(try objects.copyString( - self.allocator, + self.vmach, self.parser.previous.lexeme[1 .. lexeme_len - 1], ))); } diff --git a/src/object.zig b/src/object.zig index c47cf65..17418df 100644 --- a/src/object.zig +++ b/src/object.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const vm = @import("vm.zig"); const Allocator = std.mem.Allocator; @@ -13,25 +14,36 @@ pub const ObjValue = struct { pub const Object = struct { otype: ObjType, value: ObjValue, + next: ?*Object = null, }; -fn createString(allocator: *Allocator, data: []u8) !*Object { - var obj = try allocator.create(Object); - obj.otype = ObjType.String; - obj.value = ObjValue{ .String = data }; +pub fn allocateObject( + vmach: *vm.VM, + otype: ObjType, + value: ObjValue, +) !*Object { + var obj = try vmach.allocator.create(Object); + obj.otype = otype; + obj.value = value; + + obj.next = vmach.objs; + vmach.objs = obj; return obj; } -pub fn copyString(allocator: *Allocator, data: []const u8) !*Object { - var str = try allocator.alloc(u8, data.len); - std.mem.copy(u8, str, data); +fn createString(vmach: *vm.VM, data: []u8) !*Object { + return allocateObject(vmach, ObjType.String, ObjValue{ .String = data }); +} - return try createString(allocator, str); +pub fn copyString(vmach: *vm.VM, data: []const u8) !*Object { + var str = try vmach.allocator.alloc(u8, data.len); + std.mem.copy(u8, str, data); + return try createString(vmach, str); } /// Assumes it can take ownership of the given data. -pub fn takeString(allocator: *Allocator, data: []u8) !*Object { - return try createString(allocator, data); +pub fn takeString(vmach: *vm.VM, data: []u8) !*Object { + return try createString(vmach, data); } pub fn printObject(stdout: var, obj: Object) !void { diff --git a/src/vm.zig b/src/vm.zig index 40c1ac6..674f7f7 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -46,7 +46,9 @@ pub const VM = struct { stdout: StdOut, debug_flag: bool, - allocator: *std.mem.Allocator, + pub allocator: *std.mem.Allocator, + + objs: ?*objects.Object = null, fn resetStack(self: *VM) void { self.stackTop = 0; @@ -135,7 +137,7 @@ pub const VM = struct { [][]u8{ a, b }, ); - var val = values.ObjVal(try objects.takeString(self.allocator, res_str)); + var val = values.ObjVal(try objects.takeString(self, res_str)); try self.push(val); } @@ -273,6 +275,7 @@ pub const VM = struct { self.stdout, self.src, self.debug_flag, + self, ); if (!try cmpr.compile(&chk)) { return InterpretResult.CompileError; From 3f2a8f3801e5dfc0688768360cb5c808c51274e0 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 15:39:04 -0300 Subject: [PATCH 50/68] add object list cleaning on VM.deinit --- src/main.zig | 1 + src/vm.zig | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/main.zig b/src/main.zig index 660a9e7..8451723 100644 --- a/src/main.zig +++ b/src/main.zig @@ -17,6 +17,7 @@ fn run(allocator: *Allocator, data: []u8) !void { const stdout = &stdout_file.outStream().stream; var vmach = try vm.VM.init(allocator, stdout, data, true); + defer vmach.deinit(); try vmach.interpret(); } diff --git a/src/vm.zig b/src/vm.zig index 674f7f7..197ba46 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -74,6 +74,36 @@ pub const VM = struct { return self; } + fn deinitObject(self: *VM, obj: *objects.Object) void { + switch (obj.otype) { + .String => blk: { + self.allocator.free(obj.value.String); + self.allocator.destroy(obj); + break :blk; + }, + else => unreachable, + } + } + + fn deinitObjects(self: *VM) void { + var obj_opt: ?*objects.Object = self.objs; + + // doing a while(obj != null) but with optionals + while (true) { + if (obj_opt) |obj| { + var next = obj.next; + self.deinitObject(obj); + obj_opt = next; + } else { + break; + } + } + } + + pub fn deinit(self: *VM) void { + self.deinitObjects(); + } + pub fn debug(self: *VM, comptime fmt: []const u8, args: ...) void { if (self.debug_flag) { std.debug.warn(fmt, args); From bea6e34365819cdc152043386b1d6c4384ed128f Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 17:28:54 -0300 Subject: [PATCH 51/68] add print statement - remove opcode return's use as debug --- src/chunk.zig | 4 ++++ src/compiler.zig | 34 ++++++++++++++++++++++++++++++++-- src/vm.zig | 7 ++++++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index 50aca63..f4d295f 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -25,6 +25,8 @@ const AllOpcodes = struct { pub Equal: u8 = 12, pub Greater: u8 = 13, pub Less: u8 = 14, + + pub Print: u8 = 15, }; pub const OpCode = AllOpcodes{}; @@ -189,6 +191,8 @@ pub const Chunk = struct { return try simpleInstruction(stdout, "OP_GREATER", index); } else if (instruction == OpCode.Less) { return try simpleInstruction(stdout, "OP_LESS", index); + } else if (instruction == OpCode.Print) { + return try simpleInstruction(stdout, "OP_PRINT", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/compiler.zig b/src/compiler.zig index 8666e11..eacbdd9 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -186,6 +186,17 @@ pub const Compiler = struct { self.errorCurrent(msg); } + fn check(self: *Compiler, ttype: TokenType) bool { + return self.parser.current.ttype == ttype; + } + + fn match(self: *Compiler, ttype: TokenType) !bool { + if (!(self.check(ttype))) return false; + + try self.advance(); + return true; + } + fn currentChunk(self: *Compiler) *chunks.Chunk { return self.chunk; } @@ -314,14 +325,33 @@ pub const Compiler = struct { try self.parsePrecedence(.Assignment); } + fn printStmt(self: *Compiler) !void { + try self.expression(); + try self.consume(.SEMICOLON, "Expect ';' after value."); + try self.emitByte(OpCode.Print); + } + + fn declaration(self: *Compiler) !void { + try self.statement(); + } + + fn statement(self: *Compiler) !void { + if (try self.match(.PRINT)) { + try self.printStmt(); + } + } + /// Compile the source given when initializing the compiler /// into the given chunk. pub fn compile(self: *Compiler, chunk: *Chunk) !bool { self.scanr = try scanner.Scanner.init(self.allocator, self.src); try self.advance(); - try self.expression(); - try self.consume(.EOF, "Expect end of expression."); + while (!(try self.match(.EOF))) { + try self.declaration(); + } + // try self.expression(); + // try self.consume(.EOF, "Expect end of expression."); try self.end(); return !self.parser.hadError; diff --git a/src/vm.zig b/src/vm.zig index 197ba46..c3514f5 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -240,9 +240,14 @@ pub const VM = struct { break :blk; }, - chunk.OpCode.Return => blk: { + chunk.OpCode.Print => blk: { try value.printValue(self.stdout, self.pop()); try self.stdout.print("\n"); + break :blk; + }, + + chunk.OpCode.Return => blk: { + // Exit VM return InterpretResult.Ok; }, From 06df2d37ee4fab65e02d8f240b0de10e4d12d018 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 18:04:36 -0300 Subject: [PATCH 52/68] add expression statements --- src/chunk.zig | 3 +++ src/compiler.zig | 8 ++++++++ src/vm.zig | 2 ++ 3 files changed, 13 insertions(+) diff --git a/src/chunk.zig b/src/chunk.zig index f4d295f..289ef33 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -27,6 +27,7 @@ const AllOpcodes = struct { pub Less: u8 = 14, pub Print: u8 = 15, + pub Pop: u8 = 16, }; pub const OpCode = AllOpcodes{}; @@ -193,6 +194,8 @@ pub const Chunk = struct { return try simpleInstruction(stdout, "OP_LESS", index); } else if (instruction == OpCode.Print) { return try simpleInstruction(stdout, "OP_PRINT", index); + } else if (instruction == OpCode.Pop) { + return try simpleInstruction(stdout, "OP_POP", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/compiler.zig b/src/compiler.zig index eacbdd9..c6bdd5b 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -331,6 +331,12 @@ pub const Compiler = struct { try self.emitByte(OpCode.Print); } + fn exprStmt(self: *Compiler) !void { + try self.expression(); + try self.consume(.SEMICOLON, "Expect ';' after expression."); + try self.emitByte(OpCode.Pop); + } + fn declaration(self: *Compiler) !void { try self.statement(); } @@ -338,6 +344,8 @@ pub const Compiler = struct { fn statement(self: *Compiler) !void { if (try self.match(.PRINT)) { try self.printStmt(); + } else { + try self.exprStmt(); } } diff --git a/src/vm.zig b/src/vm.zig index c3514f5..6ffc733 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -255,6 +255,8 @@ pub const VM = struct { chunk.OpCode.True => try self.push(values.BoolVal(true)), chunk.OpCode.False => try self.push(values.BoolVal(false)), + chunk.OpCode.Pop => self.pop(), + chunk.OpCode.Equal => blk: { var a = self.pop(); var b = self.pop(); From aa94396e5121d1afa465e0030ec1e62b6579de96 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 18:11:23 -0300 Subject: [PATCH 53/68] compiler: add nicer error handling - vm: fix pop opcode handler --- src/compiler.zig | 16 ++++++++++++++++ src/vm.zig | 5 ++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/compiler.zig b/src/compiler.zig index c6bdd5b..71750da 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -337,8 +337,24 @@ pub const Compiler = struct { try self.emitByte(OpCode.Pop); } + fn synchronize(self: *Compiler) !void { + self.parser.panicMode = false; + + while (self.parser.current.ttype != .EOF) { + if (self.parser.previous.ttype == .SEMICOLON) return; + + switch (self.parser.current.ttype) { + .CLASS, .FUN, .VAR, .FOR, .IF, .WHILE, .PRINT, .RETURN => return, + else => {}, + } + + try self.advance(); + } + } + fn declaration(self: *Compiler) !void { try self.statement(); + if (self.parser.panicMode) try self.synchronize(); } fn statement(self: *Compiler) !void { diff --git a/src/vm.zig b/src/vm.zig index 6ffc733..98211c6 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -255,7 +255,10 @@ pub const VM = struct { chunk.OpCode.True => try self.push(values.BoolVal(true)), chunk.OpCode.False => try self.push(values.BoolVal(false)), - chunk.OpCode.Pop => self.pop(), + chunk.OpCode.Pop => blk: { + _ = self.pop(); + break :blk; + }, chunk.OpCode.Equal => blk: { var a = self.pop(); From 3a6df2d9eadb0b2fe3a2f925121a4ae1e0718f55 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 22:52:19 -0300 Subject: [PATCH 54/68] add basic global variable support - chunk: make writeConstant return a ConstantIndex for better integration with the (optional) OP_CONST_LONG - compiler: quickfix emitConstant() - vm: add a global ValueMap --- src/chunk.zig | 29 ++++++++++++++++++++++++++-- src/compiler.zig | 50 ++++++++++++++++++++++++++++++++++++++++++++++-- src/vm.zig | 22 +++++++++++++++++++++ 3 files changed, 97 insertions(+), 4 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index 289ef33..76d4bb9 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -28,6 +28,9 @@ const AllOpcodes = struct { pub Print: u8 = 15, pub Pop: u8 = 16, + + pub DefineGlobal: u8 = 17, + pub DefineGlobalLong: u8 = 18, }; pub const OpCode = AllOpcodes{}; @@ -78,6 +81,16 @@ fn constantLongInstruction( return offset + 4; } +pub const ConstantIndexTag = enum { + Small, + Long, +}; + +pub const ConstantIndex = union(ConstantIndexTag) { + Small: u8, + Long: [3]u8, +}; + pub const Chunk = struct { count: usize, lines: []usize, @@ -119,13 +132,20 @@ pub const Chunk = struct { return self.constants.count - 1; } - pub fn writeConstant(self: *Chunk, val: value.Value, line: usize) !void { + pub fn writeConstant( + self: *Chunk, + val: value.Value, + line: usize, + ) !ConstantIndex { try self.constants.write(val); var constant_idx = self.constants.count - 1; if (constant_idx < 256) { try self.write(OpCode.Constant, line); - try self.write(@intCast(u8, constant_idx), line); + + var idx_small = @intCast(u8, constant_idx); + try self.write(idx_small, line); + return ConstantIndex{ .Small = idx_small }; } else { var idx_u24: u24 = @intCast(u24, constant_idx); @@ -139,6 +159,7 @@ pub const Chunk = struct { try self.write(v3, line); try self.write(v2, line); try self.write(v1, line); + return ConstantIndex{ .Long = []u8{ v3, v2, v1 } }; } } @@ -196,6 +217,10 @@ pub const Chunk = struct { return try simpleInstruction(stdout, "OP_PRINT", index); } else if (instruction == OpCode.Pop) { return try simpleInstruction(stdout, "OP_POP", index); + } else if (instruction == OpCode.DefineGlobal) { + return try simpleInstruction(stdout, "OP_DEFGLOBAL", index); + } else if (instruction == OpCode.DefineGlobalLong) { + return try simpleInstruction(stdout, "OP_DEFGLOBAL_LONG", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/compiler.zig b/src/compiler.zig index 71750da..1b44624 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -215,7 +215,7 @@ pub const Compiler = struct { } fn emitConstant(self: *Compiler, value: Value) !void { - try self.currentChunk().writeConstant( + _ = try self.currentChunk().writeConstant( value, self.parser.previous.line, ); @@ -352,8 +352,54 @@ pub const Compiler = struct { } } + /// Write an identifier constant to the bytecode. + fn identifierConstant( + self: *Compiler, + token: *Token, + ) !chunks.ConstantIndex { + return try self.currentChunk().writeConstant(values.ObjVal(try objects.copyString( + self.vmach, + token.lexeme, + )), token.line); + } + + fn parseVariable(self: *Compiler, msg: []const u8) !chunks.ConstantIndex { + try self.consume(.IDENTIFIER, msg); + return try self.identifierConstant(&self.parser.previous); + } + + fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void { + switch (global) { + .Small => |val| try self.emitBytes(chunks.OpCode.DefineGlobal, val), + .Long => |val| blk: { + try self.emitByte(chunks.OpCode.DefineGlobalLong); + try self.emitByte(val[0]); + try self.emitByte(val[1]); + try self.emitByte(val[2]); + }, + else => unreachable, + } + } + + fn varDecl(self: *Compiler) !void { + var global = try self.parseVariable("Expect variable name."); + + if (try self.match(.EQUAL)) { + try self.expression(); + } else { + try self.emitByte(chunks.OpCode.Nil); + } + + try self.consume(.SEMICOLON, "Expect ';' after variable declaration."); + try self.defineVariable(global); + } + fn declaration(self: *Compiler) !void { - try self.statement(); + if (try self.match(.VAR)) { + try self.varDecl(); + } else { + try self.statement(); + } if (self.parser.panicMode) try self.synchronize(); } diff --git a/src/vm.zig b/src/vm.zig index 98211c6..0b6e3fb 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -36,6 +36,8 @@ fn valuesEqual(a: value.Value, b: value.Value) bool { } } +pub const ValueMap = std.AutoHashMap([]const u8, values.Value); + pub const VM = struct { chk: *Chunk = undefined, src: []const u8, @@ -49,6 +51,7 @@ pub const VM = struct { pub allocator: *std.mem.Allocator, objs: ?*objects.Object = null, + globals: ValueMap, fn resetStack(self: *VM) void { self.stackTop = 0; @@ -67,6 +70,8 @@ pub const VM = struct { .stdout = stdout, .debug_flag = debug_flag, .allocator = allocator, + + .globals = ValueMap.init(allocator), }; self.resetStack(); @@ -219,6 +224,11 @@ pub const VM = struct { self.resetStack(); } + fn defGlobal(self: *VM, name: []const u8) !void { + _ = try self.globals.put(name, self.peek(0)); + _ = self.pop(); + } + fn run(self: *VM) !void { while (true) { if (self.debug_flag) { @@ -260,6 +270,18 @@ pub const VM = struct { break :blk; }, + // extracting the name is different depending of the + // op code since one just uses a single byte, the other + // uses three bytes since its a u24. + chunk.OpCode.DefineGlobal => blk: { + try self.defGlobal(self.readConst().as.Object.value.String); + break :blk; + }, + chunk.OpCode.DefineGlobalLong => blk: { + try self.defGlobal(self.readConstLong().as.Object.value.String); + break :blk; + }, + chunk.OpCode.Equal => blk: { var a = self.pop(); var b = self.pop(); From e3ac28d84e322fda781a61750765fbb66a42076f Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 23:13:42 -0300 Subject: [PATCH 55/68] vm: deinit globals on vm deinit --- src/vm.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vm.zig b/src/vm.zig index 0b6e3fb..7482f71 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -106,6 +106,7 @@ pub const VM = struct { } pub fn deinit(self: *VM) void { + self.globals.deinit(); self.deinitObjects(); } From 887cb1adea74bafa0188b1fbc4bab75dfa498da1 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 23:43:12 -0300 Subject: [PATCH 56/68] add emitting of GetGlobal/GetGlobalLong --- src/chunk.zig | 2 ++ src/compiler.zig | 36 +++++++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index 76d4bb9..1c0f58e 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -31,6 +31,8 @@ const AllOpcodes = struct { pub DefineGlobal: u8 = 17, pub DefineGlobalLong: u8 = 18, + pub GetGlobal: u8 = 19, + pub GetGlobalLong: u8 = 20, }; pub const OpCode = AllOpcodes{}; diff --git a/src/compiler.zig b/src/compiler.zig index 1b44624..3a8df90 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -89,7 +89,7 @@ var rules = []ParseRule{ ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, - ParseRule{}, + ParseRule{ .prefix = Compiler.variable }, ParseRule{ .prefix = Compiler.string }, ParseRule{ .prefix = Compiler.number }, ParseRule{ .precedence = .And }, @@ -252,6 +252,19 @@ pub const Compiler = struct { ))); } + fn namedVariable(self: *Compiler, tok: *Token) !void { + var idx = try self.identifierConstant(tok); + try self.emitConstWithIndex( + chunks.OpCode.GetGlobal, + chunks.OpCode.GetGlobalLong, + idx, + ); + } + + fn variable(self: *Compiler) !void { + try self.namedVariable(self.parser.previous); + } + /// Emits bytecode for a given unary. fn unary(self: *Compiler) !void { var ttype = self.parser.previous.ttype; @@ -368,11 +381,16 @@ pub const Compiler = struct { return try self.identifierConstant(&self.parser.previous); } - fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void { - switch (global) { - .Small => |val| try self.emitBytes(chunks.OpCode.DefineGlobal, val), + fn emitConstWithIndex( + self: *Compiler, + op_short: u8, + op_long: u8, + idx: chunks.ConstantIndex, + ) !void { + switch (idx) { + .Small => |val| try self.emitBytes(op_short, val), .Long => |val| blk: { - try self.emitByte(chunks.OpCode.DefineGlobalLong); + try self.emitByte(op_long); try self.emitByte(val[0]); try self.emitByte(val[1]); try self.emitByte(val[2]); @@ -381,6 +399,14 @@ pub const Compiler = struct { } } + fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void { + try self.emitConstWithIndex( + chunks.OpCode.DefineGlobal, + chunks.OpCode.DefineGlobalLong, + global, + ); + } + fn varDecl(self: *Compiler) !void { var global = try self.parseVariable("Expect variable name."); From 005981fbbdffe5be773b10a17690a7938b17daa1 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 2 Jun 2019 23:57:28 -0300 Subject: [PATCH 57/68] vm: add getglobal support --- src/chunk.zig | 4 ++++ src/compiler.zig | 2 +- src/vm.zig | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index 1c0f58e..abc54af 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -223,6 +223,10 @@ pub const Chunk = struct { return try simpleInstruction(stdout, "OP_DEFGLOBAL", index); } else if (instruction == OpCode.DefineGlobalLong) { return try simpleInstruction(stdout, "OP_DEFGLOBAL_LONG", index); + } else if (instruction == OpCode.GetGlobal) { + return try simpleInstruction(stdout, "OP_GETGLOBAL", index); + } else if (instruction == OpCode.GetGlobalLong) { + return try simpleInstruction(stdout, "OP_GETGLOBAL_LONG", index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/compiler.zig b/src/compiler.zig index 3a8df90..de0f25a 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -262,7 +262,7 @@ pub const Compiler = struct { } fn variable(self: *Compiler) !void { - try self.namedVariable(self.parser.previous); + try self.namedVariable(&self.parser.previous); } /// Emits bytecode for a given unary. diff --git a/src/vm.zig b/src/vm.zig index 7482f71..646dfbe 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -230,6 +230,25 @@ pub const VM = struct { _ = self.pop(); } + fn readString(self: *VM) []u8 { + return self.readConst().as.Object.value.String; + } + + fn readStringLong(self: *VM) []u8 { + return self.readConstLong().as.Object.value.String; + } + + fn doGetGlobal(self: *VM, name: []u8) !void { + var kv_opt = self.globals.get(name); + + if (kv_opt) |kv| { + try self.push(kv.value); + } else { + self.runtimeError("Undefined variable '{}'.", name); + return InterpretResult.RuntimeError; + } + } + fn run(self: *VM) !void { while (true) { if (self.debug_flag) { @@ -271,15 +290,24 @@ pub const VM = struct { break :blk; }, + chunk.OpCode.GetGlobal => blk: { + try self.doGetGlobal(self.readString()); + break :blk; + }, + chunk.OpCode.GetGlobalLong => blk: { + try self.doGetGlobal(self.readStringLong()); + break :blk; + }, + // extracting the name is different depending of the // op code since one just uses a single byte, the other // uses three bytes since its a u24. chunk.OpCode.DefineGlobal => blk: { - try self.defGlobal(self.readConst().as.Object.value.String); + try self.defGlobal(self.readString()); break :blk; }, chunk.OpCode.DefineGlobalLong => blk: { - try self.defGlobal(self.readConstLong().as.Object.value.String); + try self.defGlobal(self.readStringLong()); break :blk; }, From 3936b4a4263995b2cc30624cd1c44c98217b0274 Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 00:02:07 -0300 Subject: [PATCH 58/68] main: keep a long-running vm instance for REPL enables us to declare variables and keep them going for as long as the REPL goes. - vm: remove source arg from init(), move to interpret() --- src/main.zig | 17 ++++++++++++++--- src/vm.zig | 8 ++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/main.zig b/src/main.zig index 8451723..a38e2a3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -16,9 +16,17 @@ fn run(allocator: *Allocator, data: []u8) !void { var stdout_file = try std.io.getStdOut(); const stdout = &stdout_file.outStream().stream; - var vmach = try vm.VM.init(allocator, stdout, data, true); + var vmach = try vm.VM.init(allocator, stdout, true); defer vmach.deinit(); - try vmach.interpret(); + try vmach.interpret(data); +} + +fn runWithVM(vmach: *vm.VM, data: []u8) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + defer vmach.deinit(); + try vmach.interpret(data); } pub fn doError(line: usize, message: []const u8) !void { @@ -55,6 +63,9 @@ fn runPrompt(allocator: *Allocator) !void { var stdout_file = try std.io.getStdOut(); const stdout = &stdout_file.outStream().stream; + var vmach = try vm.VM.init(allocator, stdout, true); + defer vmach.deinit(); + while (true) { try stdout.print(">"); var buffer = try std.Buffer.init(allocator, ""[0..]); @@ -65,7 +76,7 @@ fn runPrompt(allocator: *Allocator) !void { return err; }; - run(allocator, line) catch |err| { + runWithVM(&vmach, line) catch |err| { switch (err) { InterpretResult.Ok => {}, InterpretResult.CompileError, InterpretResult.RuntimeError => blk: { diff --git a/src/vm.zig b/src/vm.zig index 646dfbe..24a72f8 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -40,7 +40,6 @@ pub const ValueMap = std.AutoHashMap([]const u8, values.Value); pub const VM = struct { chk: *Chunk = undefined, - src: []const u8, ip: usize = 0, stack: []Value, @@ -60,12 +59,9 @@ pub const VM = struct { pub fn init( allocator: *std.mem.Allocator, stdout: StdOut, - source: []const u8, debug_flag: bool, ) !VM { var self = VM{ - .src = source, - .stack = try allocator.alloc(Value, 256), .stdout = stdout, .debug_flag = debug_flag, @@ -352,7 +348,7 @@ pub const VM = struct { } } - pub fn interpret(self: *VM) !void { + pub fn interpret(self: *VM, src: []const u8) !void { //self.ip = 0; //self.debug("VM start\n"); //var res = try self.run(); @@ -364,7 +360,7 @@ pub const VM = struct { self.allocator, &chk, self.stdout, - self.src, + src, self.debug_flag, self, ); From 922f3c530c69987e99972073c1ca7932b9262517 Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 00:07:11 -0300 Subject: [PATCH 59/68] chunk: fix disasm on the new const-load op codes --- src/chunk.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index abc54af..cbebaf1 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -220,13 +220,13 @@ pub const Chunk = struct { } else if (instruction == OpCode.Pop) { return try simpleInstruction(stdout, "OP_POP", index); } else if (instruction == OpCode.DefineGlobal) { - return try simpleInstruction(stdout, "OP_DEFGLOBAL", index); + return try constantInstruction(stdout, "OP_DEFGLOBAL", self, index); } else if (instruction == OpCode.DefineGlobalLong) { - return try simpleInstruction(stdout, "OP_DEFGLOBAL_LONG", index); + return try constantLongInstruction(stdout, "OP_DEFGLOBAL_LONG", self, index); } else if (instruction == OpCode.GetGlobal) { - return try simpleInstruction(stdout, "OP_GETGLOBAL", index); + return try constantInstruction(stdout, "OP_GETGLOBAL", self, index); } else if (instruction == OpCode.GetGlobalLong) { - return try simpleInstruction(stdout, "OP_GETGLOBAL_LONG", index); + return try constantLongInstruction(stdout, "OP_GETGLOBAL_LONG", self, index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; From 8bc220d2f88f6eca09206edf479bf83ccb934903 Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 00:55:50 -0300 Subject: [PATCH 60/68] chunk: split writing side-effects into own function split writeConstant() into that and writeConstantRaw() for the places where we don't want OP_CONSTANT written as well. this caused a bug where doing `"some const string" + x` would cause an unecessary OP_CONSTANT to be added for the x variable and would cause the wrong result to be given. - main: reset stack on repl tick (?) --- src/chunk.zig | 34 ++++++++++++++++++++++++++-------- src/compiler.zig | 8 +++++++- src/main.zig | 2 ++ src/vm.zig | 7 +++++++ 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index cbebaf1..7a89c3e 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -134,7 +134,7 @@ pub const Chunk = struct { return self.constants.count - 1; } - pub fn writeConstant( + pub fn writeConstantRaw( self: *Chunk, val: value.Value, line: usize, @@ -143,10 +143,7 @@ pub const Chunk = struct { var constant_idx = self.constants.count - 1; if (constant_idx < 256) { - try self.write(OpCode.Constant, line); - var idx_small = @intCast(u8, constant_idx); - try self.write(idx_small, line); return ConstantIndex{ .Small = idx_small }; } else { var idx_u24: u24 = @intCast(u24, constant_idx); @@ -157,14 +154,35 @@ pub const Chunk = struct { const v2: u8 = @intCast(u8, (idx_u24 >> 8) & mask); const v3: u8 = @intCast(u8, (idx_u24 >> 16) & mask); - try self.write(OpCode.ConstantLong, line); - try self.write(v3, line); - try self.write(v2, line); - try self.write(v1, line); return ConstantIndex{ .Long = []u8{ v3, v2, v1 } }; } } + pub fn writeConstant( + self: *Chunk, + val: value.Value, + line: usize, + ) !ConstantIndex { + var idx = try self.writeConstantRaw(val, line); + + switch (idx) { + .Small => |idx_small| blk: { + try self.write(OpCode.Constant, line); + try self.write(idx_small, line); + break :blk; + }, + .Long => |long_u8| blk: { + try self.write(OpCode.ConstantLong, line); + try self.write(long_u8[0], line); + try self.write(long_u8[1], line); + try self.write(long_u8[2], line); + }, + else => unreachable, + } + + return idx; + } + pub fn disassembleInstruction( self: *Chunk, stdout: var, diff --git a/src/compiler.zig b/src/compiler.zig index de0f25a..fb3ee39 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -253,7 +253,13 @@ pub const Compiler = struct { } fn namedVariable(self: *Compiler, tok: *Token) !void { - var idx = try self.identifierConstant(tok); + // writeConstant always writes OP_CODE which may be not + // what we want, so. + var idx = try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString( + self.vmach, + tok.lexeme, + )), tok.line); + try self.emitConstWithIndex( chunks.OpCode.GetGlobal, chunks.OpCode.GetGlobalLong, diff --git a/src/main.zig b/src/main.zig index a38e2a3..3983140 100644 --- a/src/main.zig +++ b/src/main.zig @@ -85,6 +85,8 @@ fn runPrompt(allocator: *Allocator) !void { else => return err, } }; + + vmach.resetStack(); } } diff --git a/src/vm.zig b/src/vm.zig index 24a72f8..a0decdf 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -237,6 +237,13 @@ pub const VM = struct { fn doGetGlobal(self: *VM, name: []u8) !void { var kv_opt = self.globals.get(name); + // take out the OP_CONST loaded before, if any + // note this is a complete hack. + //var val = self.peek(0); + //if (val.vtype == .Object and val.as.Object.otype == .String and std.mem.compare(u8, val.as.Object.value.String, name) == .Equal) { + // _ = self.pop(); + //} + if (kv_opt) |kv| { try self.push(kv.value); } else { From 9f45dea2c0308ebb19d2fde4cb8305facdea2d0e Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 00:58:44 -0300 Subject: [PATCH 61/68] vm: remove uneeded hack --- src/vm.zig | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/vm.zig b/src/vm.zig index a0decdf..24a72f8 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -237,13 +237,6 @@ pub const VM = struct { fn doGetGlobal(self: *VM, name: []u8) !void { var kv_opt = self.globals.get(name); - // take out the OP_CONST loaded before, if any - // note this is a complete hack. - //var val = self.peek(0); - //if (val.vtype == .Object and val.as.Object.otype == .String and std.mem.compare(u8, val.as.Object.value.String, name) == .Equal) { - // _ = self.pop(); - //} - if (kv_opt) |kv| { try self.push(kv.value); } else { From 69dda36d168451070c6bfc3965776b211d1b53ad Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 01:41:22 -0300 Subject: [PATCH 62/68] add SetGlobal/SetGlobalLong opcodes - main: split compile/runtime error messages --- src/chunk.zig | 6 ++++++ src/compiler.zig | 55 ++++++++++++++++++++++++++++++------------------ src/main.zig | 7 ++++-- src/vm.zig | 20 ++++++++++++++++++ 4 files changed, 66 insertions(+), 22 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index 7a89c3e..5ce7062 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -33,6 +33,8 @@ const AllOpcodes = struct { pub DefineGlobalLong: u8 = 18, pub GetGlobal: u8 = 19, pub GetGlobalLong: u8 = 20, + pub SetGlobal: u8 = 21, + pub SetGlobalLong: u8 = 22, }; pub const OpCode = AllOpcodes{}; @@ -245,6 +247,10 @@ pub const Chunk = struct { return try constantInstruction(stdout, "OP_GETGLOBAL", self, index); } else if (instruction == OpCode.GetGlobalLong) { return try constantLongInstruction(stdout, "OP_GETGLOBAL_LONG", self, index); + } else if (instruction == OpCode.SetGlobal) { + return try constantInstruction(stdout, "OP_SETGLOBAL", self, index); + } else if (instruction == OpCode.SetGlobalLong) { + return try constantLongInstruction(stdout, "OP_SETGLOBAL_LONG", self, index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/compiler.zig b/src/compiler.zig index fb3ee39..ac6030a 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -39,7 +39,7 @@ const Precedence = enum(u5) { Primary, }; -const ParseFn = fn (*Compiler) anyerror!void; +const ParseFn = fn (*Compiler, bool) anyerror!void; const ParseRule = struct { prefix: ?ParseFn = null, @@ -229,13 +229,13 @@ pub const Compiler = struct { } } - fn grouping(self: *Compiler) !void { + fn grouping(self: *Compiler, canAssign: bool) !void { try self.expression(); try self.consume(.RIGHT_PAREN, "Expect ')' after expression."); } /// Emits bytecode for a number being loaded into the code. - fn number(self: *Compiler) !void { + fn number(self: *Compiler, canAssign: bool) !void { var value: f64 = try std.fmt.parseFloat( f64, self.parser.previous.lexeme, @@ -243,7 +243,7 @@ pub const Compiler = struct { try self.emitConstant(values.NumberVal(value)); } - fn string(self: *Compiler) !void { + fn string(self: *Compiler, canAssign: bool) !void { const lexeme_len = self.parser.previous.lexeme.len; try self.emitConstant(values.ObjVal(try objects.copyString( @@ -252,7 +252,7 @@ pub const Compiler = struct { ))); } - fn namedVariable(self: *Compiler, tok: *Token) !void { + fn namedVariable(self: *Compiler, tok: *Token, canAssign: bool) !void { // writeConstant always writes OP_CODE which may be not // what we want, so. var idx = try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString( @@ -260,19 +260,28 @@ pub const Compiler = struct { tok.lexeme, )), tok.line); - try self.emitConstWithIndex( - chunks.OpCode.GetGlobal, - chunks.OpCode.GetGlobalLong, - idx, - ); + if (canAssign and try self.match(.EQUAL)) { + try self.expression(); + try self.emitConstWithIndex( + chunks.OpCode.SetGlobal, + chunks.OpCode.SetGlobalLong, + idx, + ); + } else { + try self.emitConstWithIndex( + chunks.OpCode.GetGlobal, + chunks.OpCode.GetGlobalLong, + idx, + ); + } } - fn variable(self: *Compiler) !void { - try self.namedVariable(&self.parser.previous); + fn variable(self: *Compiler, canAssign: bool) !void { + try self.namedVariable(&self.parser.previous, canAssign); } /// Emits bytecode for a given unary. - fn unary(self: *Compiler) !void { + fn unary(self: *Compiler, canAssign: bool) !void { var ttype = self.parser.previous.ttype; try self.parsePrecedence(.Unary); @@ -283,7 +292,7 @@ pub const Compiler = struct { } } - fn binary(self: *Compiler) !void { + fn binary(self: *Compiler, canAssign: bool) !void { var op_type = self.parser.previous.ttype; var rule: *ParseRule = self.getRule(op_type); try self.parsePrecedence(@intToEnum(Precedence, @enumToInt(rule.precedence) + 1)); @@ -306,7 +315,7 @@ pub const Compiler = struct { } } - fn literal(self: *Compiler) !void { + fn literal(self: *Compiler, canAssign: bool) !void { switch (self.parser.previous.ttype) { .FALSE => try self.emitByte(OpCode.False), .NIL => try self.emitByte(OpCode.Nil), @@ -315,21 +324,27 @@ pub const Compiler = struct { } } - fn parsePrecedence(self: *Compiler, precedence: Precedence) !void { + fn parsePrecedence(self: *Compiler, precedence: Precedence) anyerror!void { try self.advance(); var as_int = @enumToInt(precedence); var prefix_rule_opt = self.getRule(self.parser.previous.ttype).prefix; if (prefix_rule_opt) |prefix_rule| { - try prefix_rule(self); + var canAssign: bool = as_int <= @enumToInt(Precedence.Assignment); + try prefix_rule(self, canAssign); while (as_int <= @enumToInt(self.getRule(self.parser.current.ttype).precedence)) { try self.advance(); var infix_rule_opt = self.getRule(self.parser.previous.ttype).infix; if (infix_rule_opt) |infix_rule| { - try infix_rule(self); + try infix_rule(self, canAssign); } } + + if (canAssign and try self.match(.EQUAL)) { + self.errorPrevious("Invalid assignment target."); + try self.expression(); + } } else { self.errorPrevious("Expect expression."); return; @@ -340,7 +355,7 @@ pub const Compiler = struct { return &rules[@enumToInt(ttype)]; } - fn expression(self: *Compiler) !void { + fn expression(self: *Compiler) anyerror!void { try self.parsePrecedence(.Assignment); } @@ -376,7 +391,7 @@ pub const Compiler = struct { self: *Compiler, token: *Token, ) !chunks.ConstantIndex { - return try self.currentChunk().writeConstant(values.ObjVal(try objects.copyString( + return try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString( self.vmach, token.lexeme, )), token.line); diff --git a/src/main.zig b/src/main.zig index 3983140..1162392 100644 --- a/src/main.zig +++ b/src/main.zig @@ -79,8 +79,11 @@ fn runPrompt(allocator: *Allocator) !void { runWithVM(&vmach, line) catch |err| { switch (err) { InterpretResult.Ok => {}, - InterpretResult.CompileError, InterpretResult.RuntimeError => blk: { - try stdout.print("compile/runtime error.\n"); + InterpretResult.CompileError => blk: { + try stdout.print("compile error.\n"); + }, + InterpretResult.RuntimeError => blk: { + try stdout.print("runtime error.\n"); }, else => return err, } diff --git a/src/vm.zig b/src/vm.zig index 24a72f8..d2b923d 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -245,6 +245,17 @@ pub const VM = struct { } } + fn doSetGlobal(self: *VM, name: []u8) !void { + var res = try self.globals.getOrPut(name); + + if (res.found_existing) { + res.kv.value = self.peek(0); + } else { + self.runtimeError("Undefined variable '{}'.", name); + return InterpretResult.RuntimeError; + } + } + fn run(self: *VM) !void { while (true) { if (self.debug_flag) { @@ -295,6 +306,15 @@ pub const VM = struct { break :blk; }, + chunk.OpCode.SetGlobal => blk: { + try self.doSetGlobal(self.readString()); + break :blk; + }, + chunk.OpCode.SetGlobalLong => blk: { + try self.doSetGlobal(self.readStringLong()); + break :blk; + }, + // extracting the name is different depending of the // op code since one just uses a single byte, the other // uses three bytes since its a u24. From 25ee586acb5f1d45ab53c9d35c48b8eb009dcf2f Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 15:10:12 -0300 Subject: [PATCH 63/68] compiler: add local scope basics --- src/compiler.zig | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/compiler.zig b/src/compiler.zig index ac6030a..cd8eb85 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -113,6 +113,11 @@ var rules = []ParseRule{ ParseRule{}, }; +pub const Local = struct { + name: tokens.Token, + depth: i32, +}; + pub const Compiler = struct { src: []const u8, stdout: vm.StdOut, @@ -123,6 +128,10 @@ pub const Compiler = struct { debug_flag: bool = false, vmach: *vm.VM, + locals: [256]Local, + localCount: u8 = 0, + scopeDepth: u8 = 0, + pub fn init( allocator: *Allocator, chunk: *chunks.Chunk, @@ -139,6 +148,12 @@ pub const Compiler = struct { .parser = Parser{}, .debug_flag = debug_flag, .vmach = vmach, + + // local variable resolution + .locals = []Local{Local{ + .name = Token{}, + .depth = -1, + }} ** 256, }; } From 5138410be4e89a096b22ee4e68409760f2e115f8 Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 15:17:07 -0300 Subject: [PATCH 64/68] compiler: add scope support --- src/compiler.zig | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/compiler.zig b/src/compiler.zig index cd8eb85..f468ee8 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -244,6 +244,14 @@ pub const Compiler = struct { } } + fn beginScope(self: *Compiler) void { + self.scopeDepth += 1; + } + + fn endScope(self: *Compiler) void { + self.scopeDepth -= 1; + } + fn grouping(self: *Compiler, canAssign: bool) !void { try self.expression(); try self.consume(.RIGHT_PAREN, "Expect ')' after expression."); @@ -456,7 +464,7 @@ pub const Compiler = struct { try self.defineVariable(global); } - fn declaration(self: *Compiler) !void { + fn declaration(self: *Compiler) anyerror!void { if (try self.match(.VAR)) { try self.varDecl(); } else { @@ -465,9 +473,21 @@ pub const Compiler = struct { if (self.parser.panicMode) try self.synchronize(); } + fn block(self: *Compiler) anyerror!void { + while (!self.check(.RIGHT_BRACE) and !self.check(.EOF)) { + try self.declaration(); + } + + try self.consume(.RIGHT_BRACE, "Expect '}' after block."); + } + fn statement(self: *Compiler) !void { if (try self.match(.PRINT)) { try self.printStmt(); + } else if (try self.match(.LEFT_BRACE)) { + self.beginScope(); + try self.block(); + self.endScope(); } else { try self.exprStmt(); } From 26d299cd2341e1af5ad2a3bfb6eab6abe0b98086 Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 16:10:49 -0300 Subject: [PATCH 65/68] add local/global "switch" --- src/chunk.zig | 3 ++ src/compiler.zig | 99 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 84 insertions(+), 18 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index 5ce7062..bcfc23e 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -35,6 +35,9 @@ const AllOpcodes = struct { pub GetGlobalLong: u8 = 20, pub SetGlobal: u8 = 21, pub SetGlobalLong: u8 = 22, + + pub GetLocal: u8 = 23, + pub SetLocal: u8 = 24, }; pub const OpCode = AllOpcodes{}; diff --git a/src/compiler.zig b/src/compiler.zig index f468ee8..7b51ca4 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -129,8 +129,8 @@ pub const Compiler = struct { vmach: *vm.VM, locals: [256]Local, - localCount: u8 = 0, - scopeDepth: u8 = 0, + localCount: i32 = 0, + scopeDepth: i32 = 0, pub fn init( allocator: *Allocator, @@ -248,8 +248,14 @@ pub const Compiler = struct { self.scopeDepth += 1; } - fn endScope(self: *Compiler) void { + fn endScope(self: *Compiler) !void { self.scopeDepth -= 1; + + // clear the current scope in the stack + while (self.localCount > 0 and self.locals[@intCast(usize, self.localCount - 1)].depth > self.scopeDepth) { + try self.emitByte(chunks.OpCode.Pop); + self.localCount -= 1; + } } fn grouping(self: *Compiler, canAssign: bool) !void { @@ -275,27 +281,45 @@ pub const Compiler = struct { ))); } + fn resolveLocal(self: *Compiler, name: *Token) i32 { + var i = self.localCount - 1; + while (i >= 0) : (i -= 1) { + var idx = @intCast(usize, i); + var local = &self.locals[idx]; + if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { + return i; + } + } + + return -1; + } + fn namedVariable(self: *Compiler, tok: *Token, canAssign: bool) !void { // writeConstant always writes OP_CODE which may be not // what we want, so. - var idx = try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString( - self.vmach, - tok.lexeme, - )), tok.line); + var getOp: u8 = undefined; + var setOp: u8 = undefined; + + // we try to resolve the local. depending if it gets resolved + // or not, we select the necessary get/set op codes. + var arg: i32 = self.resolveLocal(tok); + + if (arg != -1) { + getOp = chunks.OpCode.GetLocal; + setOp = chunks.OpCode.SetLocal; + } else { + arg = (try self.identifierConstant(tok)).Small; + getOp = chunks.OpCode.GetGlobal; + setOp = chunks.OpCode.SetGlobal; + } + + var idx: u8 = @intCast(u8, arg); if (canAssign and try self.match(.EQUAL)) { try self.expression(); - try self.emitConstWithIndex( - chunks.OpCode.SetGlobal, - chunks.OpCode.SetGlobalLong, - idx, - ); + try self.emitBytes(setOp, idx); } else { - try self.emitConstWithIndex( - chunks.OpCode.GetGlobal, - chunks.OpCode.GetGlobalLong, - idx, - ); + try self.emitBytes(getOp, idx); } } @@ -420,8 +444,43 @@ pub const Compiler = struct { )), token.line); } + fn addLocal(self: *Compiler, name: Token) void { + if (self.localCount == 256) { + self.errorCurrent("Too many variables in function."); + return; + } + + self.localCount += 1; + var local: *Local = &self.locals[@intCast(usize, self.localCount)]; + local.name = name; + local.depth = self.scopeDepth; + } + + fn declareVariable(self: *Compiler) void { + if (self.scopeDepth == 0) return; + var name: *Token = &self.parser.previous; + + // check if we're redeclaring an existing variable + // in the *CURRENT* scope. + + // go from current down to global + var i = self.localCount; + while (i >= 0) : (i -= 1) { + var local = self.locals[@intCast(usize, i)]; + if (local.depth == -1 and local.depth < self.scopeDepth) break; + + if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { + self.errorCurrent("Variable with this name already declared in this scope."); + } + } + + self.addLocal(name.*); + } + fn parseVariable(self: *Compiler, msg: []const u8) !chunks.ConstantIndex { try self.consume(.IDENTIFIER, msg); + self.declareVariable(); + if (self.scopeDepth > 0) return chunks.ConstantIndex{ .Small = 0 }; return try self.identifierConstant(&self.parser.previous); } @@ -444,6 +503,8 @@ pub const Compiler = struct { } fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void { + if (self.scopeDepth > 0) return; + try self.emitConstWithIndex( chunks.OpCode.DefineGlobal, chunks.OpCode.DefineGlobalLong, @@ -460,6 +521,8 @@ pub const Compiler = struct { try self.emitByte(chunks.OpCode.Nil); } + // check scopeDepth here + try self.consume(.SEMICOLON, "Expect ';' after variable declaration."); try self.defineVariable(global); } @@ -487,7 +550,7 @@ pub const Compiler = struct { } else if (try self.match(.LEFT_BRACE)) { self.beginScope(); try self.block(); - self.endScope(); + try self.endScope(); } else { try self.exprStmt(); } From d7b78e09e33fcf0af491a321a906f489a28cc73c Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 3 Jun 2019 16:24:54 -0300 Subject: [PATCH 66/68] finish impl for local vars --- src/chunk.zig | 15 +++++++++++++++ src/compiler.zig | 17 +++++++++++++++-- src/vm.zig | 20 +++++++++----------- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index bcfc23e..a244bd2 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -88,6 +88,17 @@ fn constantLongInstruction( return offset + 4; } +fn byteInstruction( + stdout: var, + name: []const u8, + chunk: *Chunk, + index: usize, +) !usize { + var slot: u8 = chunk.code[index + 1]; + try stdout.print("{} {}", name, slot); + return index + 2; +} + pub const ConstantIndexTag = enum { Small, Long, @@ -254,6 +265,10 @@ pub const Chunk = struct { return try constantInstruction(stdout, "OP_SETGLOBAL", self, index); } else if (instruction == OpCode.SetGlobalLong) { return try constantLongInstruction(stdout, "OP_SETGLOBAL_LONG", self, index); + } else if (instruction == OpCode.GetLocal) { + return try byteInstruction(stdout, "OP_GETLOCAL", self, index); + } else if (instruction == OpCode.SetLocal) { + return try byteInstruction(stdout, "OP_GETLOCAL", self, index); } else { try stdout.print("Unknown opcode: {}\n", instruction); return index + 1; diff --git a/src/compiler.zig b/src/compiler.zig index 7b51ca4..398741b 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -287,6 +287,9 @@ pub const Compiler = struct { var idx = @intCast(usize, i); var local = &self.locals[idx]; if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { + if (local.depth == -1) { + self.errorCurrent("Cannot read local variable in its own initializer."); + } return i; } } @@ -453,7 +456,8 @@ pub const Compiler = struct { self.localCount += 1; var local: *Local = &self.locals[@intCast(usize, self.localCount)]; local.name = name; - local.depth = self.scopeDepth; + //local.depth = self.scopeDepth; + local.depth = -1; } fn declareVariable(self: *Compiler) void { @@ -502,8 +506,17 @@ pub const Compiler = struct { } } + fn markInitialized(self: *Compiler) void { + if (self.scopeDepth == 0) return; + var idx = @intCast(usize, self.localCount); + self.locals[idx].depth = self.scopeDepth; + } + fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void { - if (self.scopeDepth > 0) return; + if (self.scopeDepth > 0) { + self.markInitialized(); + return; + } try self.emitConstWithIndex( chunks.OpCode.DefineGlobal, diff --git a/src/vm.zig b/src/vm.zig index d2b923d..b3e3942 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -294,26 +294,24 @@ pub const VM = struct { chunk.OpCode.Pop => blk: { _ = self.pop(); - break :blk; + }, + + chunk.OpCode.GetLocal => blk: { + var slot = self.readByte(); + try self.push(self.stack[slot]); + }, + chunk.OpCode.SetLocal => blk: { + var slot = self.readByte(); + self.stack[slot] = self.peek(0); }, chunk.OpCode.GetGlobal => blk: { try self.doGetGlobal(self.readString()); - break :blk; }, - chunk.OpCode.GetGlobalLong => blk: { - try self.doGetGlobal(self.readStringLong()); - break :blk; - }, - chunk.OpCode.SetGlobal => blk: { try self.doSetGlobal(self.readString()); break :blk; }, - chunk.OpCode.SetGlobalLong => blk: { - try self.doSetGlobal(self.readStringLong()); - break :blk; - }, // extracting the name is different depending of the // op code since one just uses a single byte, the other From 036e76d3a57f22128eeef4d6d5913addbfcd4a30 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 30 Jun 2019 00:01:03 -0300 Subject: [PATCH 67/68] fix for latest zig --- src/chunk.zig | 2 +- src/compiler.zig | 4 ++-- src/main.zig | 6 ++---- src/scanner.zig | 4 ++-- src/vm.zig | 2 +- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index a244bd2..27af808 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -170,7 +170,7 @@ pub const Chunk = struct { const v2: u8 = @intCast(u8, (idx_u24 >> 8) & mask); const v3: u8 = @intCast(u8, (idx_u24 >> 16) & mask); - return ConstantIndex{ .Long = []u8{ v3, v2, v1 } }; + return ConstantIndex{ .Long = [_]u8{ v3, v2, v1 } }; } } diff --git a/src/compiler.zig b/src/compiler.zig index 398741b..12e710b 100644 --- a/src/compiler.zig +++ b/src/compiler.zig @@ -48,7 +48,7 @@ const ParseRule = struct { }; /// For each token, this defines a parse rule for it. -var rules = []ParseRule{ +var rules = [_]ParseRule{ // for LEFT_PAREN, we determine it as a call precedence // plus a prefix parse function of grouping ParseRule{ .prefix = Compiler.grouping, .precedence = .Call }, @@ -150,7 +150,7 @@ pub const Compiler = struct { .vmach = vmach, // local variable resolution - .locals = []Local{Local{ + .locals = [_]Local{Local{ .name = Token{}, .depth = -1, }} ** 256, diff --git a/src/main.zig b/src/main.zig index 1162392..4189e97 100644 --- a/src/main.zig +++ b/src/main.zig @@ -94,8 +94,7 @@ fn runPrompt(allocator: *Allocator) !void { } pub fn main() anyerror!void { - var da = std.heap.DirectAllocator.init(); - var arena = std.heap.ArenaAllocator.init(&da.allocator); + var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator); defer arena.deinit(); var allocator = &arena.allocator; @@ -115,8 +114,7 @@ pub fn main() anyerror!void { } pub fn oldMain() !void { - var da = std.heap.DirectAllocator.init(); - var arena = std.heap.ArenaAllocator.init(&da.allocator); + var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator); defer arena.deinit(); var allocator = &arena.allocator; diff --git a/src/scanner.zig b/src/scanner.zig index 93f8160..1d8d831 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -32,7 +32,7 @@ pub const KeywordMap = std.AutoHashMap([]const u8, u6); fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { var map = KeywordMap.init(allocator); - const keywords = [][]const u8{ + const keywords = [_][]const u8{ "and"[0..], "class"[0..], "else"[0..], @@ -51,7 +51,7 @@ fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { "while"[0..], }; - const tags = []TokenType{ + const tags = [_]TokenType{ TokenType.AND, TokenType.CLASS, TokenType.ELSE, diff --git a/src/vm.zig b/src/vm.zig index b3e3942..1a03f39 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -166,7 +166,7 @@ pub const VM = struct { var res_str = try std.mem.join( self.allocator, "", - [][]u8{ a, b }, + [_][]u8{ a, b }, ); var val = values.ObjVal(try objects.takeString(self, res_str)); From d2e865adfc3cb4bf930eb0d79a5ee627f386fb14 Mon Sep 17 00:00:00 2001 From: Luna Date: Sat, 2 Nov 2019 10:22:20 -0300 Subject: [PATCH 68/68] fix for latest zig --- src/chunk.zig | 50 ++++++++++++++++++++++++------------------------- src/scanner.zig | 2 +- src/vm.zig | 4 ++-- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/chunk.zig b/src/chunk.zig index 27af808..ae6b958 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -5,39 +5,39 @@ const Allocator = std.mem.Allocator; // hack. ugly hack. zig has compiler crash. const AllOpcodes = struct { - pub Return: u8 = 0, - pub Constant: u8 = 1, - pub ConstantLong: u8 = 2, - pub Add: u8 = 3, - pub Subtract: u8 = 4, - pub Multiply: u8 = 5, - pub Divide: u8 = 6, - pub Negate: u8 = 7, + Return: u8 = 0, + Constant: u8 = 1, + ConstantLong: u8 = 2, + Add: u8 = 3, + Subtract: u8 = 4, + Multiply: u8 = 5, + Divide: u8 = 6, + Negate: u8 = 7, // basic type op codes - pub Nil: u8 = 8, - pub True: u8 = 9, - pub False: u8 = 10, + Nil: u8 = 8, + True: u8 = 9, + False: u8 = 10, - pub Not: u8 = 11, + Not: u8 = 11, // comparison op codes! - pub Equal: u8 = 12, - pub Greater: u8 = 13, - pub Less: u8 = 14, + Equal: u8 = 12, + Greater: u8 = 13, + Less: u8 = 14, - pub Print: u8 = 15, - pub Pop: u8 = 16, + Print: u8 = 15, + Pop: u8 = 16, - pub DefineGlobal: u8 = 17, - pub DefineGlobalLong: u8 = 18, - pub GetGlobal: u8 = 19, - pub GetGlobalLong: u8 = 20, - pub SetGlobal: u8 = 21, - pub SetGlobalLong: u8 = 22, + DefineGlobal: u8 = 17, + DefineGlobalLong: u8 = 18, + GetGlobal: u8 = 19, + GetGlobalLong: u8 = 20, + SetGlobal: u8 = 21, + SetGlobalLong: u8 = 22, - pub GetLocal: u8 = 23, - pub SetLocal: u8 = 24, + GetLocal: u8 = 23, + SetLocal: u8 = 24, }; pub const OpCode = AllOpcodes{}; diff --git a/src/scanner.zig b/src/scanner.zig index 1d8d831..77807cd 100644 --- a/src/scanner.zig +++ b/src/scanner.zig @@ -25,7 +25,7 @@ fn isAlphaNumeric(char: u8) bool { return isAlpha(char) or isDigit(char); } -pub const KeywordMap = std.AutoHashMap([]const u8, u6); +pub const KeywordMap = std.StringHashMap(u6); /// The book does say that C doesn't have hashmaps. but Zig does. and I can /// use it here. diff --git a/src/vm.zig b/src/vm.zig index 1a03f39..1a41288 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -36,7 +36,7 @@ fn valuesEqual(a: value.Value, b: value.Value) bool { } } -pub const ValueMap = std.AutoHashMap([]const u8, values.Value); +pub const ValueMap = std.StringHashMap(values.Value); pub const VM = struct { chk: *Chunk = undefined, @@ -47,7 +47,7 @@ pub const VM = struct { stdout: StdOut, debug_flag: bool, - pub allocator: *std.mem.Allocator, + allocator: *std.mem.Allocator, objs: ?*objects.Object = null, globals: ValueMap,