diff --git a/.gitignore b/.gitignore index 0447b8b..3cef7be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,116 +1 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +zig-cache/ diff --git a/README.md b/README.md index 9b3bc4e..b8e6fc4 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,18 @@ # jorts -jorts programming language +a compiler for the lox language from https://craftinginterpreters.com -## installing +this is a learning project. the implemtation is based heavily off the C part +of the book, but also the Java part for the scanner. -```sh -git clone https://gitdab.com/luna/jorts -cd jorts -pip install --user --editable . -``` +## notes -## using + - jorts' lox bytecode is not compatible with any implementation. -right now, its not pretty, nor finished +## how do? ``` -cat examples/hello.jt | jortsc +zig build run ``` + +and play around with it diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..371246c --- /dev/null +++ b/build.zig @@ -0,0 +1,15 @@ +const Builder = @import("std").build.Builder; + +pub fn build(b: *Builder) void { + const mode = b.standardReleaseOptions(); + const exe = b.addExecutable("jorts", "src/main.zig"); + exe.setBuildMode(mode); + + const run_cmd = exe.run(); + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + b.default_step.dependOn(&exe.step); + b.installArtifact(exe); +} diff --git a/examples/add.jt b/examples/add.jt deleted file mode 100644 index 447355f..0000000 --- a/examples/add.jt +++ /dev/null @@ -1,23 +0,0 @@ -import io - -fn add (int a, int b) -> int { - a + b -} - -// return type is void by default -fn main () { - // explicit types, or - int val = add(2, 2) - - // type inferred from the functions' return value - val := add(2, 2) - - // variables are immutable, however, you can update them with - // the value of the old one. - val = val + 1 - - // a shorthand is val++, same for val--. - - // string interpolation is implicit - io.puts("2 plus 2 = {val}") -} diff --git a/examples/closures.jt b/examples/closures.jt deleted file mode 100644 index 6a1291a..0000000 --- a/examples/closures.jt +++ /dev/null @@ -1,22 +0,0 @@ -import io - -fn main () { - x := 0 - - // since variable are immutable but updatable, x is 1 inside clojure, but - // 0 inside main() - fn inner() { - x++ - } - - inner() - - // shows 0 - io.puts("x is {x}") - - // however, if you explicitly update x: - x = inner() - - // shows 1 - io.puts("x is {x}") -} diff --git a/examples/custom-types.jt b/examples/custom-types.jt deleted file mode 100644 index 534e670..0000000 --- a/examples/custom-types.jt +++ /dev/null @@ -1,11 +0,0 @@ -import io - -// you can create your own types with 'type' -type T = int - -fn main () { - T a = 2 - - // since T is int, io.puts with an int works - io.puts(a) -} diff --git a/examples/extending-structs.jt b/examples/extending-structs.jt deleted file mode 100644 index 3d8d8bc..0000000 --- a/examples/extending-structs.jt +++ /dev/null @@ -1,15 +0,0 @@ - -struct A { - int a, - int b -} - -struct B <- A { - int c -} - -fn main () { - a := A{1, 2} - b := B{1, 2, 3} -} - diff --git a/examples/function-overload.jt b/examples/function-overload.jt deleted file mode 100644 index e47830a..0000000 --- a/examples/function-overload.jt +++ /dev/null @@ -1,15 +0,0 @@ -import io -import integer - -fn my_puts(string str) { - io.puts(str) -} - -fn my_puts(int my_int) { - io.puts(integer.to_str(my_int)) -} - -fn main () { - my_puts(2) - my_puts("aaa") -} diff --git a/examples/hello.jt b/examples/hello.jt deleted file mode 100644 index 4bc50f6..0000000 --- a/examples/hello.jt +++ /dev/null @@ -1,11 +0,0 @@ -import io - -// if a return type is not defined, it is implicitly void and so the function -// returns nil (the only instance of void) - -// main can return int or void, void mains are handled by jortsc -fn main () -> int { - // todo: put it back to io.puts - ioputs("pants") - 0 -} diff --git a/examples/higher-order-functions.jt b/examples/higher-order-functions.jt deleted file mode 100644 index 34776dd..0000000 --- a/examples/higher-order-functions.jt +++ /dev/null @@ -1,26 +0,0 @@ -import io - -// takes a function that receives two ints, returns an int -// Func is the function type keyword, to not switch it with fn (which declares -// a function) -fn function_tester (Func func ([int, int] -> int)) -> int { - func(2, 2) -} - -fn add(int a, int b) -> int { - a + b -} - -fn main () { - // passes the function add to function_tester - res := function_tester(add) - - // you can also create functions and put them in variables. not putting a - // function name on the fn block makes it return a Func instance to be put - // in a variable - anonymous := (fn () {}) - - // anonymous has type Func ([] -> void) - - io.puts("res = {res}") -} diff --git a/examples/sockets.jt b/examples/sockets.jt deleted file mode 100644 index 08ab425..0000000 --- a/examples/sockets.jt +++ /dev/null @@ -1,10 +0,0 @@ -import socket -import io - -fn main () { - sock := socket.tcp_connect("example.com", 80) - sock.send("HTTP/1.1\n") - frame := sock.recv(1024) - sock.close() - io.puts(frame) -} diff --git a/examples/strings.jt b/examples/strings.jt deleted file mode 100644 index 75f0931..0000000 --- a/examples/strings.jt +++ /dev/null @@ -1,15 +0,0 @@ -import io - -fn main () { - s := "this is a string" - io.puts(s) - - s := "this is {s}" - io.puts(s) - - s := s + 2 // invalid - - // this however, is valid, there is an io.puts that handles int, - // more on function overload in a bit - io.puts(2) -} diff --git a/examples/struct-functions.jt b/examples/struct-functions.jt deleted file mode 100644 index 38df73e..0000000 --- a/examples/struct-functions.jt +++ /dev/null @@ -1,60 +0,0 @@ -import io - -struct A { - int val1, - int val2 -} - -// self is injected and represents the struct A -// from the functions' definition -fn A:sum_fields() -> int { - self.val1 + self.val2 -} - -// type of sum_fields is: -// Func ([A] -> int) - -// the mut keyword signals that self is a "reference" -// to self, instead of a copy - -// however, what actually happens is that an instance of -// A is returned from the function implicitly - -fn mut A:incr_both_fields() { - self.val1++ - self.val2++ -} - -// and so, the type becomes: -// Func ([A] -> A) - -fn mut A:incr_and_sum () { - self.val1++ - self.val2++ - - self.val1 + self.val2 -} - -// type is: -// Func ([A] -> (A, int)) - -fn main () { - a := A{0, 0} - - a.incr_both_fields() - - /* - translates to: - a := incr_both_fields(a) - */ - - sum := a.sum_fields() - io.puts(sum) - - val = a.incr_and_sum() - - /* - translates to: - a, val := incr_and_sum(a) - */ -} diff --git a/examples/structs.jt b/examples/structs.jt deleted file mode 100644 index 229058d..0000000 --- a/examples/structs.jt +++ /dev/null @@ -1,14 +0,0 @@ -import io - -struct MyStruct { - int var1, - int var2, - int var3 -} - -fn main () { - st = MyStruct{1, 2, 3} - - // TODO: define a way for printable things - io.puts(st) -} diff --git a/jortsc/__init__.py b/jortsc/__init__.py deleted file mode 100644 index da3f54a..0000000 --- a/jortsc/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .main import main - -__all__ = ['main'] diff --git a/jortsc/main.py b/jortsc/main.py deleted file mode 100644 index 16d1ec2..0000000 --- a/jortsc/main.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/python3 - -import sys -import pprint -import logging - -from jortsc.parser.lexer import lex_jorts -from jortsc.parser.syntatic import syntatic - -logging.basicConfig(level=logging.DEBUG) - -def main(): - """main entry point""" - try: - in_data = sys.stdin.read() - except EOFError: - pass - - tokens = lex_jorts(in_data) - pprint.pprint(tokens) - - tree = syntatic(tokens) - print(tree) - - -if __name__ == '__main__': - main() diff --git a/jortsc/parser/__init__.py b/jortsc/parser/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/jortsc/parser/ast_nodes.py b/jortsc/parser/ast_nodes.py deleted file mode 100644 index bff1c05..0000000 --- a/jortsc/parser/ast_nodes.py +++ /dev/null @@ -1,45 +0,0 @@ -from dataclasses import dataclass - -@dataclass -class TypedVar: - type_: str - name: str - - -@dataclass -class ReturnType: - type_: str - - -@dataclass -class Function: - name: str - arguments: str - ret_type: ReturnType - block: list - - -@dataclass -class Identifier: - name: str - - -@dataclass -class Import: - module: str - - -@dataclass -class String: - value: str - - -@dataclass -class Number: - value: str - - -@dataclass -class FunctionCall: - function: str - args: list diff --git a/jortsc/parser/lexer.py b/jortsc/parser/lexer.py deleted file mode 100644 index 7d61ff7..0000000 --- a/jortsc/parser/lexer.py +++ /dev/null @@ -1,112 +0,0 @@ -import re - -from dataclasses import dataclass -from enum import Enum, auto - - -class TokenType(Enum): - """Defines the type of a token""" - reserved = auto() - identifier = auto() - comment = auto() - comment_start = auto() - comment_end = auto() - whitespace = auto() - number = auto() - string = auto() - - -@dataclass -class Token: - value: str - type_: TokenType - - -class LexerError(Exception): - """Lexer error.""" - pass - - -TOKENS = [ - (r'[ \n\t]+', TokenType.whitespace), - - # single line comments and multiline comments - (r'//[^\n]*', TokenType.comment), - - # TODO: shouldnt this be /* */ instead of - # only tokenizing on the start and end? - (r'/\*', TokenType.comment_start), - (r'\*/', TokenType.comment_end), - - (r'fn', TokenType.reserved), - (r'if', TokenType.reserved), - (r'import', TokenType.reserved), - - (r'\(', TokenType.reserved), - (r'\)', TokenType.reserved), - - (r'\{', TokenType.reserved), - (r'\}', TokenType.reserved), - - (r'\-\>', TokenType.reserved), - (r'\.', TokenType.reserved), - - (r'\"[^\n]*\"', TokenType.string), - - # basic math ops - (r'[\+\-\/\*]', TokenType.reserved), - - (r'[0-9]+', TokenType.number), - (r'[A-Za-z][A-Za-z0-9_]*', TokenType.identifier) -] - - -def lex(string: str, token_defs: list) -> list: - """Generate tokens out of the given string.""" - pos = 0 - strlen = len(string) - tokens = [] - - # generate a dict for compiled regexes out of the token defs - # instead of compiling on each token definition per token. - compiled = {pattern: re.compile(pattern) - for pattern, _ in token_defs} - - # we use this instead of for pos in range(len(string)) because we - # need to increment pos to a whole token length's, and that wouldn't - # be easy on a for .. in range(..) - while pos < strlen: - valid = False - - for definition in token_defs: - pattern, tok_type = definition - regex = compiled[pattern] - - match = regex.match(string, pos) - - if not match: - continue - - text = match.group(0) - - # update pos to the end of the token - pos = match.end(0) - - valid = True - tokens.append(Token(text, tok_type)) - - # go to next token instead of checking other - # definitions for tokens, e.g if its a reserved token - # we shouldn't go down the path of an identifier. - break - - if not valid: - print(f'context: {pos} {len(string)} {string[pos-1:pos+20]!r}') - raise LexerError(f'Invalid character: {string[pos]!r}') - - return tokens - - -def lex_jorts(string: str) -> list: - """Lex with the jorts token definitions""" - return lex(string, TOKENS) diff --git a/jortsc/parser/parser.py b/jortsc/parser/parser.py deleted file mode 100644 index 7333bd6..0000000 --- a/jortsc/parser/parser.py +++ /dev/null @@ -1,44 +0,0 @@ - -from lark import Lark - -GRAMMAR = """ -FN: "fn" -IMPORT: "import" -COMMA: "," -DOT: "." -SINGLE_COMMENT: "//" -NEWLINE: /(\\r?\\n)+\\s*/ -ANY: /.+/ -WHITESPACE: " " -INTEGER: /[0-9]+/ -ARROW: "->" -COM_START: "/*" -COM_END: "*/" -QUOTE: "\\"" - -identifier: WHITESPACE* ANY WHITESPACE* - -single_comment: SINGLE_COMMENT ANY* NEWLINE -multi_comment: COM_START ANY* COM_END - -import_stmt: IMPORT identifier NEWLINE - -fn_arg: identifier identifier -parameters: fn_arg (COMMA fn_arg) -fn_stmt: FN identifier? "(" parameters? ")" [ARROW identifier] "{" NEWLINE? [stmt NEWLINE]* "}" - -sign_int: "+" | "-" -string: QUOTE ANY* QUOTE -value: (sign_int* INTEGER) | string - -call_stmt: [identifier DOT] identifier "(" [value COMMA]* ")" - -stmt: value | import_stmt | fn_stmt | call_stmt - -start: (NEWLINE | stmt)* -""" - -def parse(string: str): - """Parse using Lark""" - parser = Lark(GRAMMAR, parser='lalr', debug=True) - return parser.parse(string) diff --git a/jortsc/parser/syntatic.py b/jortsc/parser/syntatic.py deleted file mode 100644 index 7e9bc62..0000000 --- a/jortsc/parser/syntatic.py +++ /dev/null @@ -1,272 +0,0 @@ -from typing import Optional, Any, List - -from jortsc.parser.lexer import Token, TokenType -from jortsc.parser.ast_nodes import ( - Function, TypedVar, Identifier, Import, ReturnType, String, Number, - FunctionCall -) - - -class ParseError(Exception): - """Represents a parse error.""" - pass - - -class Reader: - """Main reader class""" - def __init__(self, tokens: List[Token]): - self.tokens = tokens - self.cur = 0 - - def __repr__(self): - return (f'') - - def peek(self) -> Optional[Token]: - """Peek at the current token.""" - try: - token = self.tokens[self.cur] - return token - except IndexError: - return None - - def next(self) -> Optional[Token]: - """Fetch the current token then skip to the next one.""" - token = self.peek() - self.cur += 1 - return token - - def expect(self, token_type: TokenType) -> Token: - """Check for a specific token type and error if it fails""" - token = self.next() - - if token.type_ != token_type: - raise ParseError(f'Expected {token_type}, got ' - f'{token.type_} {token.value!r}') - - return token - - def expect_val(self, value: str) -> Token: - """Check the next token to see if it matches against a given value, - instead of a type.""" - token = self.next() - - if token.value != value: - raise ParseError(f'Expected {value!r}, got ' - f'{token.type_} {token.value!r}') - - return token - - - def next_safe(self) -> Token: - """'Safe' version of next(). - - Raises an 'Unexpected EOF' error if next() returns None. - """ - token = self.next() - - if token is None: - raise ParseError('Unexpected EOF') - - return token - - def ignore(self, token_type: TokenType): - """Only increase self.cur if token_type is the upcoming token.""" - try: - assert self.tokens[self.cur].type_ == token_type - self.cur += 1 - except AssertionError: - pass - - -def _fn_read_args(reader: Reader, cur: List = None) -> List: - """Recursively read the arguments of the function.""" - if cur is None: - cur = [] - - # it can be an identifier for the arguments' type, OR a RPAREN - # if it is rparen, we stop - # if it isnt, we keep going until that happens - token = reader.next_safe() - - if token.value == ')': - return cur - - argtype = token - reader.expect(TokenType.whitespace) - argname = reader.next_safe() - - cur.append(TypedVar(argtype.value, argname.value)) - return _fn_read_args(reader, cur) - - -def _fn_ret_type(reader: Reader) -> ReturnType: - """Fetch the return type of a function. Defaults to void.""" - try: - reader.expect_val('->') - except ParseError: - return ReturnType('void') - - reader.ignore(TokenType.whitespace) - token = reader.expect(TokenType.identifier) - return ReturnType(token.value) - - -def read_function(reader: Reader): - """Read a function block.""" - reader.expect(TokenType.whitespace) - - token = reader.next() - - fn_name = '_anonymous' - fn_args = [] - - print('function token', token) - - if token.type_ == TokenType.identifier: - fn_name = token.value - - reader.expect(TokenType.whitespace) - reader.expect_val('(') - - fn_args = _fn_read_args(reader) - - reader.expect(TokenType.whitespace) - fn_ret_type = _fn_ret_type(reader) - - # only skip whitespace if we see it - reader.ignore(TokenType.whitespace) - block = read_start(reader) - elif token.value == '(': - fn_args = _fn_read_args(reader) - fn_ret_type = _fn_ret_type(reader) - block = read_start(reader) - - print('final function', fn_name, fn_args, fn_ret_type, block) - - return Function(fn_name, fn_args, fn_ret_type, block) - - -def read_import(reader): - """Read an import""" - reader.expect(TokenType.whitespace) - module = reader.next_safe() - return Import(module.value) - - -HANDLERS = { - 'fn': read_function, - 'import': read_import, -} - - -def read_reserved(token: Token, reader: Reader): - """Read reserved statements.""" - try: - handler = HANDLERS[token.value] - except KeyError: - raise ParseError(f'Unexpected reserved word {token.value!r}') - - return handler(reader) - - -def read_value(token: Token, _reader: Reader): - """Read a given value""" - if token.type_ == TokenType.string: - return String(token.value) - elif token.type_ == TokenType.number: - return Number(token.value) - - -def read_statement(token: Token, reader: Reader): - """Read a statement""" - # token is an identifier, so first check for a function call - - # TODO: handle more things than a function call - call_fn_name = token.value - token = reader.expect_val('(') - res = [] - - while True: - token = reader.next_safe() - - if token.value == ')': - break - - res.append(read_value(token, reader)) - - return FunctionCall(call_fn_name, res) - - -def read_start(reader: Reader): - """Read the start of a program.""" - print('reader', reader) - - token = reader.next() - - if token is None: - print('eof!') - return None - - ast = [] - res = [] - - # handle blocks - if token.value == '{': - # next can be a whitespace, or a } - token = reader.next() - - print('block start!, next:', token) - - if token.type_ == TokenType.whitespace: - # keep going on reading - while True: - token = reader.peek() - print('block append', token) - - if token.value == '}': - print('block end') - reader.next() - break - - res.extend(read_start(reader)) - elif token.value == '}': - res = [] - - # import, fn, etc - elif token.type_ == TokenType.reserved: - res = read_reserved(token, reader) - - elif token.type_ == TokenType.comment: - return [] - - elif token.type_ == TokenType.identifier: - res = read_statement(token, reader) - else: - res = read_value(token, reader) - - ast.append(res) - return ast - - -def read_loop(reader: Reader): - """Read the AST.""" - final_ast = [] - - while True: - ast = read_start(reader) - - # break when eof - if ast is None: - break - - # TODO: better ast cleanup - - final_ast.append(ast) - - return final_ast - - -def syntatic(tokens: List[Token]): - """Create an AST out of the tokens.""" - return read_loop(Reader(tokens)) diff --git a/setup.py b/setup.py deleted file mode 100644 index 45d4212..0000000 --- a/setup.py +++ /dev/null @@ -1,14 +0,0 @@ -from setuptools import setup - -setup( - name='jortsc', - version='0.1', - py_modules=['jortsc'], - install_requires=[ - 'lark-parser==0.6.7' - ], - entry_points=''' - [console_scripts] - jortsc=jortsc:main - ''' -) diff --git a/src/chunk.zig b/src/chunk.zig new file mode 100644 index 0000000..ae6b958 --- /dev/null +++ b/src/chunk.zig @@ -0,0 +1,286 @@ +const std = @import("std"); + +const value = @import("value.zig"); +const Allocator = std.mem.Allocator; + +// hack. ugly hack. zig has compiler crash. +const AllOpcodes = struct { + Return: u8 = 0, + Constant: u8 = 1, + ConstantLong: u8 = 2, + Add: u8 = 3, + Subtract: u8 = 4, + Multiply: u8 = 5, + Divide: u8 = 6, + Negate: u8 = 7, + + // basic type op codes + Nil: u8 = 8, + True: u8 = 9, + False: u8 = 10, + + Not: u8 = 11, + + // comparison op codes! + Equal: u8 = 12, + Greater: u8 = 13, + Less: u8 = 14, + + Print: u8 = 15, + Pop: u8 = 16, + + DefineGlobal: u8 = 17, + DefineGlobalLong: u8 = 18, + GetGlobal: u8 = 19, + GetGlobalLong: u8 = 20, + SetGlobal: u8 = 21, + SetGlobalLong: u8 = 22, + + GetLocal: u8 = 23, + SetLocal: u8 = 24, +}; + +pub const OpCode = AllOpcodes{}; + +fn simpleInstruction( + stdout: var, + comptime name: []const u8, + index: usize, +) !usize { + try stdout.print("{}\n", name); + return index + 1; +} + +fn constantInstruction( + stdout: var, + comptime name: []const u8, + chunk: *Chunk, + index: usize, +) !usize { + // get the constant's index in constants slice + var idx = chunk.code[index + 1]; + + try stdout.print("\t{}\t{} '", name, idx); + try value.printValue(stdout, chunk.constants.values[idx]); + try stdout.print("'\n"); + + return index + 2; +} + +fn constantLongInstruction( + stdout: var, + comptime name: []const u8, + chunk: *Chunk, + offset: usize, +) !usize { + // constantLong uses three u8's that encode a u24 as the + // contants' index. + var v3: u8 = chunk.code[offset + 1]; + var v2: u8 = chunk.code[offset + 2]; + var v1: u8 = chunk.code[offset + 3]; + + var idx: u24 = (@intCast(u24, v3) << 16) | (@intCast(u24, v2) << 8) | v1; + + try stdout.print("\t{}\t{} '", name, idx); + try value.printValue(stdout, chunk.constants.values[idx]); + try stdout.print("'\n"); + + return offset + 4; +} + +fn byteInstruction( + stdout: var, + name: []const u8, + chunk: *Chunk, + index: usize, +) !usize { + var slot: u8 = chunk.code[index + 1]; + try stdout.print("{} {}", name, slot); + return index + 2; +} + +pub const ConstantIndexTag = enum { + Small, + Long, +}; + +pub const ConstantIndex = union(ConstantIndexTag) { + Small: u8, + Long: [3]u8, +}; + +pub const Chunk = struct { + count: usize, + lines: []usize, + code: []u8, + + allocator: *Allocator, + constants: value.ValueList, + + pub fn init(allocator: *Allocator) !Chunk { + return Chunk{ + .count = 0, + .allocator = allocator, + .code = try allocator.alloc(u8, 0), + .lines = try allocator.alloc(usize, 0), + .constants = try value.ValueList.init(allocator), + }; + } + + pub fn write(self: *Chunk, byte: u8, line: usize) !void { + if (self.code.len < self.count + 1) { + self.code = try self.allocator.realloc( + self.code, + self.count + 1, + ); + + self.lines = try self.allocator.realloc( + self.lines, + self.count + 1, + ); + } + + self.code[self.count] = byte; + self.lines[self.count] = line; + self.count += 1; + } + + pub fn addConstant(self: *Chunk, val: value.Value) !u8 { + try self.constants.write(val); + return self.constants.count - 1; + } + + pub fn writeConstantRaw( + self: *Chunk, + val: value.Value, + line: usize, + ) !ConstantIndex { + try self.constants.write(val); + var constant_idx = self.constants.count - 1; + + if (constant_idx < 256) { + var idx_small = @intCast(u8, constant_idx); + return ConstantIndex{ .Small = idx_small }; + } else { + var idx_u24: u24 = @intCast(u24, constant_idx); + + const mask = @intCast(u24, 0xff); + + const v1: u8 = @intCast(u8, idx_u24 & mask); + const v2: u8 = @intCast(u8, (idx_u24 >> 8) & mask); + const v3: u8 = @intCast(u8, (idx_u24 >> 16) & mask); + + return ConstantIndex{ .Long = [_]u8{ v3, v2, v1 } }; + } + } + + pub fn writeConstant( + self: *Chunk, + val: value.Value, + line: usize, + ) !ConstantIndex { + var idx = try self.writeConstantRaw(val, line); + + switch (idx) { + .Small => |idx_small| blk: { + try self.write(OpCode.Constant, line); + try self.write(idx_small, line); + break :blk; + }, + .Long => |long_u8| blk: { + try self.write(OpCode.ConstantLong, line); + try self.write(long_u8[0], line); + try self.write(long_u8[1], line); + try self.write(long_u8[2], line); + }, + else => unreachable, + } + + return idx; + } + + pub fn disassembleInstruction( + self: *Chunk, + stdout: var, + index: usize, + ) !usize { + try stdout.print("{} ", index); + + if (index > 0 and self.lines[index] == self.lines[index - 1]) { + try stdout.print(" | "); + } else { + try stdout.print("{} ", self.lines[index]); + } + + var instruction = self.code[index]; + + if (instruction == OpCode.Return) { + return try simpleInstruction(stdout, "OP_RETURN", index); + } else if (instruction == OpCode.Constant) { + return try constantInstruction(stdout, "OP_CONSTANT", self, index); + } else if (instruction == OpCode.ConstantLong) { + return try constantLongInstruction( + stdout, + "OP_CONSTANT_LONG", + self, + index, + ); + } else if (instruction == OpCode.Negate) { + return try simpleInstruction(stdout, "OP_NEGATE", index); + } else if (instruction == OpCode.Add) { + return try simpleInstruction(stdout, "OP_ADD", index); + } else if (instruction == OpCode.Subtract) { + return try simpleInstruction(stdout, "OP_SUBTRACT", index); + } else if (instruction == OpCode.Multiply) { + return try simpleInstruction(stdout, "OP_MULTIPLY", index); + } else if (instruction == OpCode.Divide) { + return try simpleInstruction(stdout, "OP_DIVIDE", index); + } else if (instruction == OpCode.Nil) { + return try simpleInstruction(stdout, "OP_NIL", index); + } else if (instruction == OpCode.True) { + return try simpleInstruction(stdout, "OP_TRUE", index); + } else if (instruction == OpCode.False) { + return try simpleInstruction(stdout, "OP_FALSE", index); + } else if (instruction == OpCode.Not) { + return try simpleInstruction(stdout, "OP_NOT", index); + } else if (instruction == OpCode.Equal) { + return try simpleInstruction(stdout, "OP_EQUAL", index); + } else if (instruction == OpCode.Greater) { + return try simpleInstruction(stdout, "OP_GREATER", index); + } else if (instruction == OpCode.Less) { + return try simpleInstruction(stdout, "OP_LESS", index); + } else if (instruction == OpCode.Print) { + return try simpleInstruction(stdout, "OP_PRINT", index); + } else if (instruction == OpCode.Pop) { + return try simpleInstruction(stdout, "OP_POP", index); + } else if (instruction == OpCode.DefineGlobal) { + return try constantInstruction(stdout, "OP_DEFGLOBAL", self, index); + } else if (instruction == OpCode.DefineGlobalLong) { + return try constantLongInstruction(stdout, "OP_DEFGLOBAL_LONG", self, index); + } else if (instruction == OpCode.GetGlobal) { + return try constantInstruction(stdout, "OP_GETGLOBAL", self, index); + } else if (instruction == OpCode.GetGlobalLong) { + return try constantLongInstruction(stdout, "OP_GETGLOBAL_LONG", self, index); + } else if (instruction == OpCode.SetGlobal) { + return try constantInstruction(stdout, "OP_SETGLOBAL", self, index); + } else if (instruction == OpCode.SetGlobalLong) { + return try constantLongInstruction(stdout, "OP_SETGLOBAL_LONG", self, index); + } else if (instruction == OpCode.GetLocal) { + return try byteInstruction(stdout, "OP_GETLOCAL", self, index); + } else if (instruction == OpCode.SetLocal) { + return try byteInstruction(stdout, "OP_GETLOCAL", self, index); + } else { + try stdout.print("Unknown opcode: {}\n", instruction); + return index + 1; + } + } + + pub fn disassemble(self: *Chunk, stdout: var, name: []const u8) !void { + try stdout.print("== {} ==\n", name); + + var i: usize = 0; + while (i < self.count) { + i = try self.disassembleInstruction(stdout, i); + } + } +}; diff --git a/src/compiler.zig b/src/compiler.zig new file mode 100644 index 0000000..12e710b --- /dev/null +++ b/src/compiler.zig @@ -0,0 +1,587 @@ +const std = @import("std"); +const scanner = @import("scanner.zig"); +const vm = @import("vm.zig"); +const chunks = @import("chunk.zig"); +const tokens = @import("token.zig"); +const values = @import("value.zig"); +const objects = @import("object.zig"); + +const Allocator = std.mem.Allocator; +const Scanner = scanner.Scanner; +const Chunk = chunks.Chunk; +const Token = tokens.Token; +const TokenType = tokens.TokenType; +const Value = values.Value; +const OpCode = chunks.OpCode; + +/// Holds parser state for the compiler. +const Parser = struct { + previous: Token = undefined, + current: Token = undefined, + + // TODO are those needed + hadError: bool = false, + panicMode: bool = false, +}; + +/// Represents the order of operations in the parser. +const Precedence = enum(u5) { + None, + Assignment, // = + Or, // or + And, // and + Equality, // == != + Comparison, // < > <= >= + Term, // + - + Factor, // * / + Unary, // ! - + Call, // . () [] + Primary, +}; + +const ParseFn = fn (*Compiler, bool) anyerror!void; + +const ParseRule = struct { + prefix: ?ParseFn = null, + infix: ?ParseFn = null, + precedence: Precedence = Precedence.None, +}; + +/// For each token, this defines a parse rule for it. +var rules = [_]ParseRule{ + // for LEFT_PAREN, we determine it as a call precedence + // plus a prefix parse function of grouping + ParseRule{ .prefix = Compiler.grouping, .precedence = .Call }, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + + // dot token, means a call too, for things like a.b + ParseRule{ .precedence = .Call }, + + // specific to -, as it can be an unary operator when its a prefix + // of something, or a binary one, when its a infix or another thing. + ParseRule{ + .prefix = Compiler.unary, + .infix = Compiler.binary, + .precedence = .Term, + }, + + ParseRule{ .infix = Compiler.binary, .precedence = .Term }, + ParseRule{}, + + // slash is a binary operator, as well as star. + ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, + ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, + + // as the token enum says, those are 1/2 char tokens. + ParseRule{ .prefix = Compiler.unary }, + // this is specifically for the != operator + ParseRule{ .infix = Compiler.binary, .precedence = .Equality }, + ParseRule{}, + // this is specifically for the == operator + ParseRule{ .infix = Compiler.binary, .precedence = .Equality }, + + // all the comparison ones + ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, + ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, + ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, + ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, + + ParseRule{ .prefix = Compiler.variable }, + ParseRule{ .prefix = Compiler.string }, + ParseRule{ .prefix = Compiler.number }, + ParseRule{ .precedence = .And }, + ParseRule{}, + ParseRule{}, + + // false + ParseRule{ .prefix = Compiler.literal }, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{ .prefix = Compiler.literal }, + ParseRule{ .precedence = .Or }, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{}, + ParseRule{ .prefix = Compiler.literal }, + ParseRule{}, + ParseRule{}, + ParseRule{}, +}; + +pub const Local = struct { + name: tokens.Token, + depth: i32, +}; + +pub const Compiler = struct { + src: []const u8, + stdout: vm.StdOut, + allocator: *Allocator, + parser: Parser, + scanr: Scanner = undefined, + chunk: *chunks.Chunk, + debug_flag: bool = false, + vmach: *vm.VM, + + locals: [256]Local, + localCount: i32 = 0, + scopeDepth: i32 = 0, + + pub fn init( + allocator: *Allocator, + chunk: *chunks.Chunk, + stdout: vm.StdOut, + source: []const u8, + debug_flag: bool, + vmach: *vm.VM, + ) Compiler { + return Compiler{ + .src = source, + .chunk = chunk, + .allocator = allocator, + .stdout = stdout, + .parser = Parser{}, + .debug_flag = debug_flag, + .vmach = vmach, + + // local variable resolution + .locals = [_]Local{Local{ + .name = Token{}, + .depth = -1, + }} ** 256, + }; + } + + fn errorAt(self: *Compiler, token: Token, msg: []const u8) void { + if (self.parser.panicMode) return; + self.parser.panicMode = true; + + std.debug.warn("[line {}] Error", token.line); + if (token.ttype == TokenType.EOF) { + std.debug.warn(" at end"); + } else { + std.debug.warn(" at '{}'", token.lexeme); + } + + std.debug.warn(": {}\n", msg); + self.parser.hadError = true; + } + + fn errorCurrent(self: *Compiler, msg: []const u8) void { + self.errorAt(self.parser.current, msg); + } + + fn errorPrevious(self: *Compiler, msg: []const u8) void { + self.errorAt(self.parser.previous, msg); + } + + fn advance(self: *Compiler) !void { + self.parser.previous = self.parser.current; + + while (true) { + var token_opt = try self.scanr.scanToken(); + if (token_opt) |token| { + self.parser.current = token; + break; + } + } + } + + fn consume(self: *Compiler, ttype: TokenType, msg: []const u8) !void { + if (self.parser.current.ttype == ttype) { + try self.advance(); + return; + } + + self.errorCurrent(msg); + } + + fn check(self: *Compiler, ttype: TokenType) bool { + return self.parser.current.ttype == ttype; + } + + fn match(self: *Compiler, ttype: TokenType) !bool { + if (!(self.check(ttype))) return false; + + try self.advance(); + return true; + } + + fn currentChunk(self: *Compiler) *chunks.Chunk { + return self.chunk; + } + + fn emitByte(self: *Compiler, byte: u8) !void { + try self.currentChunk().write(byte, self.parser.previous.line); + } + + fn emitBytes(self: *Compiler, byte1: u8, byte2: u8) !void { + try self.emitByte(byte1); + try self.emitByte(byte2); + } + + fn emitReturn(self: *Compiler) !void { + try self.emitByte(OpCode.Return); + } + + fn emitConstant(self: *Compiler, value: Value) !void { + _ = try self.currentChunk().writeConstant( + value, + self.parser.previous.line, + ); + } + + fn end(self: *Compiler) !void { + try self.emitReturn(); + + if (self.debug_flag and !self.parser.hadError) { + try self.currentChunk().disassemble(self.stdout, "code"); + } + } + + fn beginScope(self: *Compiler) void { + self.scopeDepth += 1; + } + + fn endScope(self: *Compiler) !void { + self.scopeDepth -= 1; + + // clear the current scope in the stack + while (self.localCount > 0 and self.locals[@intCast(usize, self.localCount - 1)].depth > self.scopeDepth) { + try self.emitByte(chunks.OpCode.Pop); + self.localCount -= 1; + } + } + + fn grouping(self: *Compiler, canAssign: bool) !void { + try self.expression(); + try self.consume(.RIGHT_PAREN, "Expect ')' after expression."); + } + + /// Emits bytecode for a number being loaded into the code. + fn number(self: *Compiler, canAssign: bool) !void { + var value: f64 = try std.fmt.parseFloat( + f64, + self.parser.previous.lexeme, + ); + try self.emitConstant(values.NumberVal(value)); + } + + fn string(self: *Compiler, canAssign: bool) !void { + const lexeme_len = self.parser.previous.lexeme.len; + + try self.emitConstant(values.ObjVal(try objects.copyString( + self.vmach, + self.parser.previous.lexeme[1 .. lexeme_len - 1], + ))); + } + + fn resolveLocal(self: *Compiler, name: *Token) i32 { + var i = self.localCount - 1; + while (i >= 0) : (i -= 1) { + var idx = @intCast(usize, i); + var local = &self.locals[idx]; + if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { + if (local.depth == -1) { + self.errorCurrent("Cannot read local variable in its own initializer."); + } + return i; + } + } + + return -1; + } + + fn namedVariable(self: *Compiler, tok: *Token, canAssign: bool) !void { + // writeConstant always writes OP_CODE which may be not + // what we want, so. + var getOp: u8 = undefined; + var setOp: u8 = undefined; + + // we try to resolve the local. depending if it gets resolved + // or not, we select the necessary get/set op codes. + var arg: i32 = self.resolveLocal(tok); + + if (arg != -1) { + getOp = chunks.OpCode.GetLocal; + setOp = chunks.OpCode.SetLocal; + } else { + arg = (try self.identifierConstant(tok)).Small; + getOp = chunks.OpCode.GetGlobal; + setOp = chunks.OpCode.SetGlobal; + } + + var idx: u8 = @intCast(u8, arg); + + if (canAssign and try self.match(.EQUAL)) { + try self.expression(); + try self.emitBytes(setOp, idx); + } else { + try self.emitBytes(getOp, idx); + } + } + + fn variable(self: *Compiler, canAssign: bool) !void { + try self.namedVariable(&self.parser.previous, canAssign); + } + + /// Emits bytecode for a given unary. + fn unary(self: *Compiler, canAssign: bool) !void { + var ttype = self.parser.previous.ttype; + try self.parsePrecedence(.Unary); + + switch (ttype) { + .MINUS => try self.emitByte(OpCode.Negate), + .BANG => try self.emitByte(OpCode.Not), + else => unreachable, + } + } + + fn binary(self: *Compiler, canAssign: bool) !void { + var op_type = self.parser.previous.ttype; + var rule: *ParseRule = self.getRule(op_type); + try self.parsePrecedence(@intToEnum(Precedence, @enumToInt(rule.precedence) + 1)); + + switch (op_type) { + .PLUS => try self.emitByte(OpCode.Add), + .MINUS => try self.emitByte(OpCode.Subtract), + .STAR => try self.emitByte(OpCode.Multiply), + .SLASH => try self.emitByte(OpCode.Divide), + + .EQUAL_EQUAL => try self.emitByte(OpCode.Equal), + .GREATER => try self.emitByte(OpCode.Greater), + .LESS => try self.emitByte(OpCode.Less), + + .BANG_EQUAL => try self.emitBytes(OpCode.Equal, OpCode.Not), + .GREATER_EQUAL => try self.emitBytes(OpCode.Less, OpCode.Not), + .LESS_EQUAL => try self.emitBytes(OpCode.Greater, OpCode.Not), + + else => unreachable, + } + } + + fn literal(self: *Compiler, canAssign: bool) !void { + switch (self.parser.previous.ttype) { + .FALSE => try self.emitByte(OpCode.False), + .NIL => try self.emitByte(OpCode.Nil), + .TRUE => try self.emitByte(OpCode.True), + else => unreachable, + } + } + + fn parsePrecedence(self: *Compiler, precedence: Precedence) anyerror!void { + try self.advance(); + var as_int = @enumToInt(precedence); + var prefix_rule_opt = self.getRule(self.parser.previous.ttype).prefix; + + if (prefix_rule_opt) |prefix_rule| { + var canAssign: bool = as_int <= @enumToInt(Precedence.Assignment); + try prefix_rule(self, canAssign); + + while (as_int <= @enumToInt(self.getRule(self.parser.current.ttype).precedence)) { + try self.advance(); + var infix_rule_opt = self.getRule(self.parser.previous.ttype).infix; + if (infix_rule_opt) |infix_rule| { + try infix_rule(self, canAssign); + } + } + + if (canAssign and try self.match(.EQUAL)) { + self.errorPrevious("Invalid assignment target."); + try self.expression(); + } + } else { + self.errorPrevious("Expect expression."); + return; + } + } + + fn getRule(self: *Compiler, ttype: TokenType) *ParseRule { + return &rules[@enumToInt(ttype)]; + } + + fn expression(self: *Compiler) anyerror!void { + try self.parsePrecedence(.Assignment); + } + + fn printStmt(self: *Compiler) !void { + try self.expression(); + try self.consume(.SEMICOLON, "Expect ';' after value."); + try self.emitByte(OpCode.Print); + } + + fn exprStmt(self: *Compiler) !void { + try self.expression(); + try self.consume(.SEMICOLON, "Expect ';' after expression."); + try self.emitByte(OpCode.Pop); + } + + fn synchronize(self: *Compiler) !void { + self.parser.panicMode = false; + + while (self.parser.current.ttype != .EOF) { + if (self.parser.previous.ttype == .SEMICOLON) return; + + switch (self.parser.current.ttype) { + .CLASS, .FUN, .VAR, .FOR, .IF, .WHILE, .PRINT, .RETURN => return, + else => {}, + } + + try self.advance(); + } + } + + /// Write an identifier constant to the bytecode. + fn identifierConstant( + self: *Compiler, + token: *Token, + ) !chunks.ConstantIndex { + return try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString( + self.vmach, + token.lexeme, + )), token.line); + } + + fn addLocal(self: *Compiler, name: Token) void { + if (self.localCount == 256) { + self.errorCurrent("Too many variables in function."); + return; + } + + self.localCount += 1; + var local: *Local = &self.locals[@intCast(usize, self.localCount)]; + local.name = name; + //local.depth = self.scopeDepth; + local.depth = -1; + } + + fn declareVariable(self: *Compiler) void { + if (self.scopeDepth == 0) return; + var name: *Token = &self.parser.previous; + + // check if we're redeclaring an existing variable + // in the *CURRENT* scope. + + // go from current down to global + var i = self.localCount; + while (i >= 0) : (i -= 1) { + var local = self.locals[@intCast(usize, i)]; + if (local.depth == -1 and local.depth < self.scopeDepth) break; + + if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { + self.errorCurrent("Variable with this name already declared in this scope."); + } + } + + self.addLocal(name.*); + } + + fn parseVariable(self: *Compiler, msg: []const u8) !chunks.ConstantIndex { + try self.consume(.IDENTIFIER, msg); + self.declareVariable(); + if (self.scopeDepth > 0) return chunks.ConstantIndex{ .Small = 0 }; + return try self.identifierConstant(&self.parser.previous); + } + + fn emitConstWithIndex( + self: *Compiler, + op_short: u8, + op_long: u8, + idx: chunks.ConstantIndex, + ) !void { + switch (idx) { + .Small => |val| try self.emitBytes(op_short, val), + .Long => |val| blk: { + try self.emitByte(op_long); + try self.emitByte(val[0]); + try self.emitByte(val[1]); + try self.emitByte(val[2]); + }, + else => unreachable, + } + } + + fn markInitialized(self: *Compiler) void { + if (self.scopeDepth == 0) return; + var idx = @intCast(usize, self.localCount); + self.locals[idx].depth = self.scopeDepth; + } + + fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void { + if (self.scopeDepth > 0) { + self.markInitialized(); + return; + } + + try self.emitConstWithIndex( + chunks.OpCode.DefineGlobal, + chunks.OpCode.DefineGlobalLong, + global, + ); + } + + fn varDecl(self: *Compiler) !void { + var global = try self.parseVariable("Expect variable name."); + + if (try self.match(.EQUAL)) { + try self.expression(); + } else { + try self.emitByte(chunks.OpCode.Nil); + } + + // check scopeDepth here + + try self.consume(.SEMICOLON, "Expect ';' after variable declaration."); + try self.defineVariable(global); + } + + fn declaration(self: *Compiler) anyerror!void { + if (try self.match(.VAR)) { + try self.varDecl(); + } else { + try self.statement(); + } + if (self.parser.panicMode) try self.synchronize(); + } + + fn block(self: *Compiler) anyerror!void { + while (!self.check(.RIGHT_BRACE) and !self.check(.EOF)) { + try self.declaration(); + } + + try self.consume(.RIGHT_BRACE, "Expect '}' after block."); + } + + fn statement(self: *Compiler) !void { + if (try self.match(.PRINT)) { + try self.printStmt(); + } else if (try self.match(.LEFT_BRACE)) { + self.beginScope(); + try self.block(); + try self.endScope(); + } else { + try self.exprStmt(); + } + } + + /// Compile the source given when initializing the compiler + /// into the given chunk. + pub fn compile(self: *Compiler, chunk: *Chunk) !bool { + self.scanr = try scanner.Scanner.init(self.allocator, self.src); + + try self.advance(); + while (!(try self.match(.EOF))) { + try self.declaration(); + } + // try self.expression(); + // try self.consume(.EOF, "Expect end of expression."); + try self.end(); + + return !self.parser.hadError; + } +}; diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..4189e97 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,128 @@ +const std = @import("std"); + +const Allocator = std.mem.Allocator; + +// const Scanner = @import("scanner.zig").Scanner; +const chunk = @import("chunk.zig"); +const vm = @import("vm.zig"); + +const InterpretResult = vm.InterpretResult; + +//const Compiler = @import("compiler.zig").Compiler; + +pub var hadError = false; + +fn run(allocator: *Allocator, data: []u8) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + var vmach = try vm.VM.init(allocator, stdout, true); + defer vmach.deinit(); + try vmach.interpret(data); +} + +fn runWithVM(vmach: *vm.VM, data: []u8) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + defer vmach.deinit(); + try vmach.interpret(data); +} + +pub fn doError(line: usize, message: []const u8) !void { + try errorReport(line, "", message); +} + +pub fn errorReport(line: usize, where: []const u8, message: []const u8) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + try stdout.print("[line {}] Error {}: {}\n", line, where, message); + hadError = true; +} + +fn runFile(allocator: *Allocator, path: []const u8) !void { + var lox_file = try std.fs.File.openRead(path); + defer lox_file.close(); + + const total_bytes = try lox_file.getEndPos(); + var slice = try allocator.alloc(u8, total_bytes); + _ = try lox_file.read(slice); + + run(allocator, slice) catch |err| { + switch (err) { + InterpretResult.Ok => {}, + InterpretResult.CompileError => std.os.exit(65), + InterpretResult.RuntimeError => std.os.exit(70), + else => return err, + } + }; +} + +fn runPrompt(allocator: *Allocator) !void { + var stdout_file = try std.io.getStdOut(); + const stdout = &stdout_file.outStream().stream; + + var vmach = try vm.VM.init(allocator, stdout, true); + defer vmach.deinit(); + + while (true) { + try stdout.print(">"); + var buffer = try std.Buffer.init(allocator, ""[0..]); + + var line = std.io.readLine(&buffer) catch |err| { + if (err == error.EndOfStream) return; + + return err; + }; + + runWithVM(&vmach, line) catch |err| { + switch (err) { + InterpretResult.Ok => {}, + InterpretResult.CompileError => blk: { + try stdout.print("compile error.\n"); + }, + InterpretResult.RuntimeError => blk: { + try stdout.print("runtime error.\n"); + }, + else => return err, + } + }; + + vmach.resetStack(); + } +} + +pub fn main() anyerror!void { + var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator); + defer arena.deinit(); + var allocator = &arena.allocator; + + var args_it = std.process.args(); + + var jorts_arg0 = try (args_it.next(allocator) orelse { + // if you ever reach this, tell me what is your os lmao + unreachable; + }); + + var lox_path = try (args_it.next(allocator) orelse { + try runPrompt(allocator); + return; + }); + + try runFile(allocator, lox_path); +} + +pub fn oldMain() !void { + var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator); + defer arena.deinit(); + var allocator = &arena.allocator; + + var stdout_file = try std.io.getStdOut(); + var stdout = &stdout_file.outStream().stream; + + // this crashes zig??? lol + // var chk = try chunk.Chunk.init(allocator); + //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); + //try chk.write(chunk.OpCode.Return); +} diff --git a/src/object.zig b/src/object.zig new file mode 100644 index 0000000..17418df --- /dev/null +++ b/src/object.zig @@ -0,0 +1,54 @@ +const std = @import("std"); +const vm = @import("vm.zig"); + +const Allocator = std.mem.Allocator; + +pub const ObjType = enum { + String, +}; + +pub const ObjValue = struct { + String: []u8, +}; + +pub const Object = struct { + otype: ObjType, + value: ObjValue, + next: ?*Object = null, +}; + +pub fn allocateObject( + vmach: *vm.VM, + otype: ObjType, + value: ObjValue, +) !*Object { + var obj = try vmach.allocator.create(Object); + obj.otype = otype; + obj.value = value; + + obj.next = vmach.objs; + vmach.objs = obj; + return obj; +} + +fn createString(vmach: *vm.VM, data: []u8) !*Object { + return allocateObject(vmach, ObjType.String, ObjValue{ .String = data }); +} + +pub fn copyString(vmach: *vm.VM, data: []const u8) !*Object { + var str = try vmach.allocator.alloc(u8, data.len); + std.mem.copy(u8, str, data); + return try createString(vmach, str); +} + +/// Assumes it can take ownership of the given data. +pub fn takeString(vmach: *vm.VM, data: []u8) !*Object { + return try createString(vmach, data); +} + +pub fn printObject(stdout: var, obj: Object) !void { + switch (obj.otype) { + .String => try stdout.print("{}", obj.value.String), + else => unreachable, + } +} diff --git a/src/scanner.zig b/src/scanner.zig new file mode 100644 index 0000000..77807cd --- /dev/null +++ b/src/scanner.zig @@ -0,0 +1,276 @@ +const std = @import("std"); +const tokens = @import("token.zig"); + +const Token = tokens.Token; +const TokenType = tokens.TokenType; + +const Allocator = std.mem.Allocator; + +pub const TokenError = error{ + Unexpected, + Unterminated, +}; + +fn isDigit(char: u8) bool { + return char >= '0' and char <= '9'; +} + +fn isAlpha(c: u8) bool { + return (c >= 'a' and c <= 'z') or + (c >= 'A' and c <= 'Z') or + c == '_'; +} + +fn isAlphaNumeric(char: u8) bool { + return isAlpha(char) or isDigit(char); +} + +pub const KeywordMap = std.StringHashMap(u6); + +/// The book does say that C doesn't have hashmaps. but Zig does. and I can +/// use it here. +fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { + var map = KeywordMap.init(allocator); + + const keywords = [_][]const u8{ + "and"[0..], + "class"[0..], + "else"[0..], + "false"[0..], + "for"[0..], + "fun"[0..], + "if"[0..], + "nil"[0..], + "or"[0..], + "print"[0..], + "return"[0..], + "super"[0..], + "this"[0..], + "true"[0..], + "var"[0..], + "while"[0..], + }; + + const tags = [_]TokenType{ + TokenType.AND, + TokenType.CLASS, + TokenType.ELSE, + TokenType.FALSE, + TokenType.FOR, + TokenType.FUN, + TokenType.IF, + TokenType.NIL, + TokenType.OR, + TokenType.PRINT, + TokenType.RETURN, + TokenType.SUPER, + TokenType.THIS, + TokenType.TRUE, + TokenType.VAR, + TokenType.WHILE, + }; + + for (keywords) |keyword, idx| { + var tag = @enumToInt(tags[idx]); + _ = try map.put(keyword, tag); + } + + return map; +} + +pub const Scanner = struct { + source: []const u8, + keywords: KeywordMap, + + start: usize = 0, + current: usize = 0, + line: usize = 1, + + allocator: *Allocator, + + pub fn init(allocator: *Allocator, data: []const u8) !Scanner { + return Scanner{ + .source = data, + .keywords = try initKeywordMap(allocator), + .allocator = allocator, + }; + } + + fn isAtEnd(self: *Scanner) bool { + return self.current >= self.source.len; + } + + fn advance(self: *Scanner) u8 { + self.current += 1; + return self.source[self.current - 1]; + } + + pub fn currentLexeme(self: *Scanner) []const u8 { + return self.source[self.start..self.current]; + } + + fn makeToken(self: *Scanner, ttype: TokenType) Token { + return Token{ + .ttype = ttype, + .lexeme = self.currentLexeme(), + .line = self.line, + }; + } + + /// Check if the next character matches what is expected. + fn match(self: *Scanner, expected: u8) bool { + if (self.isAtEnd()) return false; + if (self.source[self.current] != expected) return false; + + self.current += 1; + return true; + } + + /// Add a SimpleToken of type_match if the next character is + /// `expected`. Adds a SimpleToken of type_nomatch when it is not. + fn makeMatchToken( + self: *Scanner, + expected: u8, + type_match: TokenType, + type_nomatch: TokenType, + ) Token { + if (self.match(expected)) { + return self.makeToken(type_match); + } else { + return self.makeToken(type_nomatch); + } + } + + fn peek(self: *Scanner) u8 { + if (self.isAtEnd()) return 0; + return self.source[self.current]; + } + + fn peekNext(self: *Scanner) u8 { + if (self.current + 1 >= self.source.len) return 0; + return self.source[self.current + 1]; + } + + fn skipWhitespace(self: *Scanner) void { + while (true) { + var c = self.peek(); + switch (c) { + ' ', '\r', '\t' => blk: { + _ = self.advance(); + }, + '\n' => blk: { + self.line += 1; + _ = self.advance(); + }, + else => return, + } + } + } + + fn doString(self: *Scanner) !Token { + // consume entire string + while (self.peek() != '"' and !self.isAtEnd()) { + if (self.peek() == '\n') self.line += 1; + _ = self.advance(); + } + + // unterminated string. + if (self.isAtEnd()) { + return TokenError.Unterminated; + } + + // the closing ". + _ = self.advance(); + + // trim the surrounding quotes. + return self.makeToken(.STRING); + } + + /// Consume a number + fn doNumber(self: *Scanner) Token { + while (isDigit(self.peek())) { + _ = self.advance(); + } + + // check if its a number like 12.34, where the '.' character + // exists and the one next to it is a digit. + if (self.peek() == '.' and isDigit(self.peekNext())) { + _ = self.advance(); + + while (isDigit(self.peek())) { + _ = self.advance(); + } + } + + return self.makeToken(.NUMBER); + } + + /// Either a keyword or an identifier come out of this. + fn doIdentifier(self: *Scanner) Token { + while (isAlphaNumeric(self.peek())) { + _ = self.advance(); + } + + // after reading the identifier, we check + // if it is any of our keywords, if it is, then we add + // the specificed keyword type. if not, just .IDENTIFIER + var text = self.source[self.start..self.current]; + var type_opt = self.keywords.get(text); + var toktype: TokenType = undefined; + + if (type_opt) |kv| { + toktype = @intToEnum(TokenType, kv.value); + } else { + toktype = TokenType.IDENTIFIER; + } + + return self.makeToken(toktype); + } + + pub fn scanToken(self: *Scanner) !?Token { + self.skipWhitespace(); + self.start = self.current; + + if (self.isAtEnd()) return self.makeToken(TokenType.EOF); + + var c = self.advance(); + if (isAlpha(c)) return self.doIdentifier(); + if (isDigit(c)) return self.doNumber(); + + var token = switch (c) { + '(' => self.makeToken(.LEFT_PAREN), + ')' => self.makeToken(.RIGHT_PAREN), + '{' => self.makeToken(.LEFT_BRACE), + '}' => self.makeToken(.RIGHT_BRACE), + ',' => self.makeToken(.COMMA), + '.' => self.makeToken(.DOT), + '-' => self.makeToken(.MINUS), + '+' => self.makeToken(.PLUS), + ';' => self.makeToken(.SEMICOLON), + '*' => self.makeToken(.STAR), + + '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), + '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), + '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), + '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), + + '/' => blk: { + if (self.peekNext() == '/') { + while (self.peek() != '\n' and !self.isAtEnd()) { + _ = self.advance(); + } + + break :blk null; + } else { + break :blk self.makeToken(.SLASH); + } + }, + + '"' => try self.doString(), + + else => return TokenError.Unexpected, + }; + + return token; + } +}; diff --git a/src/token.zig b/src/token.zig new file mode 100644 index 0000000..308e60e --- /dev/null +++ b/src/token.zig @@ -0,0 +1,57 @@ +const std = @import("std"); + +pub const TokenType = enum(u6) { + // Single-character tokens. + LEFT_PAREN, + RIGHT_PAREN, + LEFT_BRACE, + RIGHT_BRACE, + COMMA, + DOT, + MINUS, + PLUS, + SEMICOLON, + SLASH, + STAR, + + // One or two character tokens. + BANG, + BANG_EQUAL, + EQUAL, + EQUAL_EQUAL, + GREATER, + GREATER_EQUAL, + LESS, + LESS_EQUAL, + + // Literals. + IDENTIFIER, + STRING, + NUMBER, + + // Keywords. + AND, + CLASS, + ELSE, + FALSE, + FUN, + FOR, + IF, + NIL, + OR, + PRINT, + RETURN, + SUPER, + THIS, + TRUE, + VAR, + WHILE, + + EOF, +}; + +pub const Token = struct { + ttype: TokenType = TokenType.EOF, + lexeme: []const u8 = ""[0..], + line: usize = 0, +}; diff --git a/src/value.zig b/src/value.zig new file mode 100644 index 0000000..b1c927c --- /dev/null +++ b/src/value.zig @@ -0,0 +1,80 @@ +const std = @import("std"); +const objects = @import("object.zig"); + +const Allocator = std.mem.Allocator; + +pub const ValueType = enum(u8) { + Bool, + Nil, + Number, + Object, +}; + +pub const ValueValue = union(ValueType) { + Bool: bool, + Nil: void, + Number: f64, + Object: *objects.Object, +}; + +pub const Value = struct { + vtype: ValueType, + as: ValueValue, +}; + +// helper functions +pub fn BoolVal(val: bool) Value { + return Value{ .vtype = .Bool, .as = ValueValue{ .Bool = val } }; +} + +pub fn NilVal() Value { + return Value{ .vtype = .Nil, .as = ValueValue{ .Nil = {} } }; +} + +pub fn NumberVal(val: f64) Value { + return Value{ .vtype = .Number, .as = ValueValue{ .Number = val } }; +} + +pub fn ObjVal(val: *objects.Object) Value { + return Value{ .vtype = .Object, .as = ValueValue{ .Object = val } }; +} + +pub fn isObjType(val: Value, otype: objects.ObjType) bool { + return val.vtype == .Object and val.as.Object.otype == otype; +} + +pub fn printValue(stdout: var, value: Value) !void { + switch (value.as) { + .Nil => try stdout.print("nil"), + .Bool => try stdout.print("{}", value.as.Bool), + .Number => try stdout.print("{}", value.as.Number), + .Object => try objects.printObject(stdout, value.as.Object.*), + else => unreachable, + } +} + +pub const ValueList = struct { + count: usize, + values: []Value, + allocator: *Allocator, + + pub fn init(allocator: *Allocator) !ValueList { + return ValueList{ + .count = 0, + .allocator = allocator, + .values = try allocator.alloc(Value, 0), + }; + } + + pub fn write(self: *ValueList, value: Value) !void { + if (self.values.len < self.count + 1) { + self.values = try self.allocator.realloc( + self.values, + self.count + 1, + ); + } + + self.values[self.count] = value; + self.count += 1; + } +}; diff --git a/src/vm.zig b/src/vm.zig new file mode 100644 index 0000000..1a41288 --- /dev/null +++ b/src/vm.zig @@ -0,0 +1,411 @@ +const std = @import("std"); +const chunk = @import("chunk.zig"); +const value = @import("value.zig"); +const values = value; +const compiler = @import("compiler.zig"); +const objects = @import("object.zig"); + +const Chunk = chunk.Chunk; +const Value = value.Value; +const Compiler = compiler.Compiler; + +pub const StdOut = *std.io.OutStream(std.fs.File.WriteError); + +pub const InterpretResult = error{ + Ok, + CompileError, + RuntimeError, +}; + +fn isFalsey(val: value.Value) bool { + return val.vtype == .Nil or (val.vtype == .Bool and !val.as.Bool); +} + +fn valuesEqual(a: value.Value, b: value.Value) bool { + if (a.vtype != b.vtype) return false; + + switch (a.vtype) { + .Nil => return true, + .Bool => return a.as.Bool == b.as.Bool, + .Number => return a.as.Number == b.as.Number, + .Object => blk: { + var aStr = a.as.Object.value.String; + var bStr = b.as.Object.value.String; + return std.mem.compare(u8, aStr, bStr) == .Equal; + }, + } +} + +pub const ValueMap = std.StringHashMap(values.Value); + +pub const VM = struct { + chk: *Chunk = undefined, + ip: usize = 0, + + stack: []Value, + stackTop: usize = 0, + + stdout: StdOut, + debug_flag: bool, + allocator: *std.mem.Allocator, + + objs: ?*objects.Object = null, + globals: ValueMap, + + fn resetStack(self: *VM) void { + self.stackTop = 0; + } + + pub fn init( + allocator: *std.mem.Allocator, + stdout: StdOut, + debug_flag: bool, + ) !VM { + var self = VM{ + .stack = try allocator.alloc(Value, 256), + .stdout = stdout, + .debug_flag = debug_flag, + .allocator = allocator, + + .globals = ValueMap.init(allocator), + }; + + self.resetStack(); + + return self; + } + + fn deinitObject(self: *VM, obj: *objects.Object) void { + switch (obj.otype) { + .String => blk: { + self.allocator.free(obj.value.String); + self.allocator.destroy(obj); + break :blk; + }, + else => unreachable, + } + } + + fn deinitObjects(self: *VM) void { + var obj_opt: ?*objects.Object = self.objs; + + // doing a while(obj != null) but with optionals + while (true) { + if (obj_opt) |obj| { + var next = obj.next; + self.deinitObject(obj); + obj_opt = next; + } else { + break; + } + } + } + + pub fn deinit(self: *VM) void { + self.globals.deinit(); + self.deinitObjects(); + } + + pub fn debug(self: *VM, comptime fmt: []const u8, args: ...) void { + if (self.debug_flag) { + std.debug.warn(fmt, args); + } + } + + fn readByte(self: *VM) u8 { + var byte: u8 = self.chk.code[self.ip]; + self.ip += 1; + return byte; + } + + fn readConst(self: *VM) Value { + return self.chk.constants.values[self.readByte()]; + } + + fn readConstLong(self: *VM) Value { + const v3 = self.readByte(); + const v2 = self.readByte(); + const v1 = self.readByte(); + const const_idx = (@intCast(u24, v3) << 16) | + (@intCast(u24, v2) << 8) | + v1; + + return self.chk.constants.values[const_idx]; + } + + fn debugStack(self: *VM) !void { + try self.stdout.print(" "); + for (self.stack) |val, idx| { + if (idx >= self.stackTop) break; + + try self.stdout.print("[ "); + try value.printValue(self.stdout, val); + try self.stdout.print(" ]"); + } + try self.stdout.print("\n"); + } + + /// gets a f64 out of a value on the top of the stack. + fn popNum(self: *VM) !f64 { + var val: Value = self.pop(); + + switch (val.vtype) { + .Number => return val.as.Number, + + else => |vtype| blk: { + self.runtimeError("Expected number, got {x}", vtype); + return InterpretResult.RuntimeError; + }, + } + } + + fn concatenateStrings(self: *VM) !void { + var b = self.pop().as.Object.value.String; + var a = self.pop().as.Object.value.String; + + var res_str = try std.mem.join( + self.allocator, + "", + [_][]u8{ a, b }, + ); + + var val = values.ObjVal(try objects.takeString(self, res_str)); + try self.push(val); + } + + fn doAdd(self: *VM) !void { + if (values.isObjType(self.peek(0), .String) and + values.isObjType(self.peek(1), .String)) + { + return try self.concatenateStrings(); + } + + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.NumberVal(a + b)); + } + + fn doSub(self: *VM) !void { + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.NumberVal(a - b)); + } + + fn doMul(self: *VM) !void { + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.NumberVal(a * b)); + } + + fn doDiv(self: *VM) !void { + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.NumberVal(a / b)); + } + + fn doGreater(self: *VM) !void { + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.BoolVal(a > b)); + } + + fn doLess(self: *VM) !void { + var b = try self.popNum(); + var a = try self.popNum(); + try self.push(values.BoolVal(a < b)); + } + + fn runtimeError(self: *VM, comptime fmt: []const u8, args: ...) void { + std.debug.warn(fmt, args); + std.debug.warn("\n[line {}] in script\n", self.chk.lines[self.ip]); + self.resetStack(); + } + + fn defGlobal(self: *VM, name: []const u8) !void { + _ = try self.globals.put(name, self.peek(0)); + _ = self.pop(); + } + + fn readString(self: *VM) []u8 { + return self.readConst().as.Object.value.String; + } + + fn readStringLong(self: *VM) []u8 { + return self.readConstLong().as.Object.value.String; + } + + fn doGetGlobal(self: *VM, name: []u8) !void { + var kv_opt = self.globals.get(name); + + if (kv_opt) |kv| { + try self.push(kv.value); + } else { + self.runtimeError("Undefined variable '{}'.", name); + return InterpretResult.RuntimeError; + } + } + + fn doSetGlobal(self: *VM, name: []u8) !void { + var res = try self.globals.getOrPut(name); + + if (res.found_existing) { + res.kv.value = self.peek(0); + } else { + self.runtimeError("Undefined variable '{}'.", name); + return InterpretResult.RuntimeError; + } + } + + fn run(self: *VM) !void { + while (true) { + if (self.debug_flag) { + try self.debugStack(); + _ = try self.chk.disassembleInstruction(self.stdout, self.ip); + } + + var instruction = self.readByte(); + + switch (instruction) { + chunk.OpCode.Constant => blk: { + var constant = self.readConst(); + try self.push(constant); + break :blk; + }, + chunk.OpCode.ConstantLong => blk: { + var constant = self.readConstLong(); + try self.push(constant); + break :blk; + }, + + chunk.OpCode.Print => blk: { + try value.printValue(self.stdout, self.pop()); + try self.stdout.print("\n"); + break :blk; + }, + + chunk.OpCode.Return => blk: { + // Exit VM + return InterpretResult.Ok; + }, + + chunk.OpCode.Nil => try self.push(values.NilVal()), + chunk.OpCode.True => try self.push(values.BoolVal(true)), + chunk.OpCode.False => try self.push(values.BoolVal(false)), + + chunk.OpCode.Pop => blk: { + _ = self.pop(); + }, + + chunk.OpCode.GetLocal => blk: { + var slot = self.readByte(); + try self.push(self.stack[slot]); + }, + chunk.OpCode.SetLocal => blk: { + var slot = self.readByte(); + self.stack[slot] = self.peek(0); + }, + + chunk.OpCode.GetGlobal => blk: { + try self.doGetGlobal(self.readString()); + }, + chunk.OpCode.SetGlobal => blk: { + try self.doSetGlobal(self.readString()); + break :blk; + }, + + // extracting the name is different depending of the + // op code since one just uses a single byte, the other + // uses three bytes since its a u24. + chunk.OpCode.DefineGlobal => blk: { + try self.defGlobal(self.readString()); + break :blk; + }, + chunk.OpCode.DefineGlobalLong => blk: { + try self.defGlobal(self.readStringLong()); + break :blk; + }, + + chunk.OpCode.Equal => blk: { + var a = self.pop(); + var b = self.pop(); + try self.push(values.BoolVal(valuesEqual(a, b))); + }, + + chunk.OpCode.Greater => try self.doGreater(), + chunk.OpCode.Less => try self.doLess(), + + chunk.OpCode.Add => try self.doAdd(), + chunk.OpCode.Subtract => try self.doSub(), + chunk.OpCode.Multiply => try self.doMul(), + chunk.OpCode.Divide => try self.doDiv(), + chunk.OpCode.Not => blk: { + try self.push(values.BoolVal(isFalsey(self.pop()))); + }, + + chunk.OpCode.Negate => blk: { + var val = self.peek(0); + if (val.vtype != .Bool) { + self.runtimeError("Operand must be a number."); + return InterpretResult.RuntimeError; + } + + val = self.pop(); + switch (val.as) { + .Number => |num| { + try self.push(values.NumberVal(-num)); + }, + else => unreachable, + } + }, + + else => blk: { + std.debug.warn("Unknown instruction: {x}\n", instruction); + return InterpretResult.RuntimeError; + }, + } + } + } + + pub fn interpret(self: *VM, src: []const u8) !void { + //self.ip = 0; + //self.debug("VM start\n"); + //var res = try self.run(); + //self.debug("VM end\n"); + //return res; + var chk = try Chunk.init(self.allocator); + + var cmpr = Compiler.init( + self.allocator, + &chk, + self.stdout, + src, + self.debug_flag, + self, + ); + if (!try cmpr.compile(&chk)) { + return InterpretResult.CompileError; + } + + self.chk = &chk; + self.ip = 0; + return try self.run(); + } + + pub fn push(self: *VM, val: Value) !void { + if (self.stackTop > 0 and self.stackTop - 1 > self.stack.len) { + self.stack = try self.allocator.realloc(self.stack, self.stack.len + 1); + } + + self.stack[self.stackTop] = val; + self.stackTop += 1; + } + + pub fn pop(self: *VM) Value { + self.stackTop -= 1; + return self.stack[self.stackTop]; + } + + pub fn peek(self: *VM, distance: usize) Value { + return self.stack[self.stackTop - 1 - distance]; + } +};