diff --git a/.gitignore b/.gitignore index 3cef7be..0447b8b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,116 @@ -zig-cache/ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/README.md b/README.md index b8e6fc4..9b3bc4e 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,19 @@ # jorts -a compiler for the lox language from https://craftinginterpreters.com +jorts programming language -this is a learning project. the implemtation is based heavily off the C part -of the book, but also the Java part for the scanner. +## installing -## notes - - - jorts' lox bytecode is not compatible with any implementation. - -## how do? - -``` -zig build run +```sh +git clone https://gitdab.com/luna/jorts +cd jorts +pip install --user --editable . ``` -and play around with it +## using + +right now, its not pretty, nor finished + +``` +cat examples/hello.jt | jortsc +``` diff --git a/build.zig b/build.zig deleted file mode 100644 index 371246c..0000000 --- a/build.zig +++ /dev/null @@ -1,15 +0,0 @@ -const Builder = @import("std").build.Builder; - -pub fn build(b: *Builder) void { - const mode = b.standardReleaseOptions(); - const exe = b.addExecutable("jorts", "src/main.zig"); - exe.setBuildMode(mode); - - const run_cmd = exe.run(); - - const run_step = b.step("run", "Run the app"); - run_step.dependOn(&run_cmd.step); - - b.default_step.dependOn(&exe.step); - b.installArtifact(exe); -} diff --git a/examples/add.jt b/examples/add.jt new file mode 100644 index 0000000..447355f --- /dev/null +++ b/examples/add.jt @@ -0,0 +1,23 @@ +import io + +fn add (int a, int b) -> int { + a + b +} + +// return type is void by default +fn main () { + // explicit types, or + int val = add(2, 2) + + // type inferred from the functions' return value + val := add(2, 2) + + // variables are immutable, however, you can update them with + // the value of the old one. + val = val + 1 + + // a shorthand is val++, same for val--. + + // string interpolation is implicit + io.puts("2 plus 2 = {val}") +} diff --git a/examples/closures.jt b/examples/closures.jt new file mode 100644 index 0000000..6a1291a --- /dev/null +++ b/examples/closures.jt @@ -0,0 +1,22 @@ +import io + +fn main () { + x := 0 + + // since variable are immutable but updatable, x is 1 inside clojure, but + // 0 inside main() + fn inner() { + x++ + } + + inner() + + // shows 0 + io.puts("x is {x}") + + // however, if you explicitly update x: + x = inner() + + // shows 1 + io.puts("x is {x}") +} diff --git a/examples/custom-types.jt b/examples/custom-types.jt new file mode 100644 index 0000000..534e670 --- /dev/null +++ b/examples/custom-types.jt @@ -0,0 +1,11 @@ +import io + +// you can create your own types with 'type' +type T = int + +fn main () { + T a = 2 + + // since T is int, io.puts with an int works + io.puts(a) +} diff --git a/examples/extending-structs.jt b/examples/extending-structs.jt new file mode 100644 index 0000000..3d8d8bc --- /dev/null +++ b/examples/extending-structs.jt @@ -0,0 +1,15 @@ + +struct A { + int a, + int b +} + +struct B <- A { + int c +} + +fn main () { + a := A{1, 2} + b := B{1, 2, 3} +} + diff --git a/examples/function-overload.jt b/examples/function-overload.jt new file mode 100644 index 0000000..e47830a --- /dev/null +++ b/examples/function-overload.jt @@ -0,0 +1,15 @@ +import io +import integer + +fn my_puts(string str) { + io.puts(str) +} + +fn my_puts(int my_int) { + io.puts(integer.to_str(my_int)) +} + +fn main () { + my_puts(2) + my_puts("aaa") +} diff --git a/examples/hello.jt b/examples/hello.jt new file mode 100644 index 0000000..4bc50f6 --- /dev/null +++ b/examples/hello.jt @@ -0,0 +1,11 @@ +import io + +// if a return type is not defined, it is implicitly void and so the function +// returns nil (the only instance of void) + +// main can return int or void, void mains are handled by jortsc +fn main () -> int { + // todo: put it back to io.puts + ioputs("pants") + 0 +} diff --git a/examples/higher-order-functions.jt b/examples/higher-order-functions.jt new file mode 100644 index 0000000..34776dd --- /dev/null +++ b/examples/higher-order-functions.jt @@ -0,0 +1,26 @@ +import io + +// takes a function that receives two ints, returns an int +// Func is the function type keyword, to not switch it with fn (which declares +// a function) +fn function_tester (Func func ([int, int] -> int)) -> int { + func(2, 2) +} + +fn add(int a, int b) -> int { + a + b +} + +fn main () { + // passes the function add to function_tester + res := function_tester(add) + + // you can also create functions and put them in variables. not putting a + // function name on the fn block makes it return a Func instance to be put + // in a variable + anonymous := (fn () {}) + + // anonymous has type Func ([] -> void) + + io.puts("res = {res}") +} diff --git a/examples/sockets.jt b/examples/sockets.jt new file mode 100644 index 0000000..08ab425 --- /dev/null +++ b/examples/sockets.jt @@ -0,0 +1,10 @@ +import socket +import io + +fn main () { + sock := socket.tcp_connect("example.com", 80) + sock.send("HTTP/1.1\n") + frame := sock.recv(1024) + sock.close() + io.puts(frame) +} diff --git a/examples/strings.jt b/examples/strings.jt new file mode 100644 index 0000000..75f0931 --- /dev/null +++ b/examples/strings.jt @@ -0,0 +1,15 @@ +import io + +fn main () { + s := "this is a string" + io.puts(s) + + s := "this is {s}" + io.puts(s) + + s := s + 2 // invalid + + // this however, is valid, there is an io.puts that handles int, + // more on function overload in a bit + io.puts(2) +} diff --git a/examples/struct-functions.jt b/examples/struct-functions.jt new file mode 100644 index 0000000..38df73e --- /dev/null +++ b/examples/struct-functions.jt @@ -0,0 +1,60 @@ +import io + +struct A { + int val1, + int val2 +} + +// self is injected and represents the struct A +// from the functions' definition +fn A:sum_fields() -> int { + self.val1 + self.val2 +} + +// type of sum_fields is: +// Func ([A] -> int) + +// the mut keyword signals that self is a "reference" +// to self, instead of a copy + +// however, what actually happens is that an instance of +// A is returned from the function implicitly + +fn mut A:incr_both_fields() { + self.val1++ + self.val2++ +} + +// and so, the type becomes: +// Func ([A] -> A) + +fn mut A:incr_and_sum () { + self.val1++ + self.val2++ + + self.val1 + self.val2 +} + +// type is: +// Func ([A] -> (A, int)) + +fn main () { + a := A{0, 0} + + a.incr_both_fields() + + /* + translates to: + a := incr_both_fields(a) + */ + + sum := a.sum_fields() + io.puts(sum) + + val = a.incr_and_sum() + + /* + translates to: + a, val := incr_and_sum(a) + */ +} diff --git a/examples/structs.jt b/examples/structs.jt new file mode 100644 index 0000000..229058d --- /dev/null +++ b/examples/structs.jt @@ -0,0 +1,14 @@ +import io + +struct MyStruct { + int var1, + int var2, + int var3 +} + +fn main () { + st = MyStruct{1, 2, 3} + + // TODO: define a way for printable things + io.puts(st) +} diff --git a/jortsc/__init__.py b/jortsc/__init__.py new file mode 100644 index 0000000..da3f54a --- /dev/null +++ b/jortsc/__init__.py @@ -0,0 +1,3 @@ +from .main import main + +__all__ = ['main'] diff --git a/jortsc/main.py b/jortsc/main.py new file mode 100644 index 0000000..16d1ec2 --- /dev/null +++ b/jortsc/main.py @@ -0,0 +1,27 @@ +#!/usr/bin/python3 + +import sys +import pprint +import logging + +from jortsc.parser.lexer import lex_jorts +from jortsc.parser.syntatic import syntatic + +logging.basicConfig(level=logging.DEBUG) + +def main(): + """main entry point""" + try: + in_data = sys.stdin.read() + except EOFError: + pass + + tokens = lex_jorts(in_data) + pprint.pprint(tokens) + + tree = syntatic(tokens) + print(tree) + + +if __name__ == '__main__': + main() diff --git a/jortsc/parser/__init__.py b/jortsc/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jortsc/parser/ast_nodes.py b/jortsc/parser/ast_nodes.py new file mode 100644 index 0000000..bff1c05 --- /dev/null +++ b/jortsc/parser/ast_nodes.py @@ -0,0 +1,45 @@ +from dataclasses import dataclass + +@dataclass +class TypedVar: + type_: str + name: str + + +@dataclass +class ReturnType: + type_: str + + +@dataclass +class Function: + name: str + arguments: str + ret_type: ReturnType + block: list + + +@dataclass +class Identifier: + name: str + + +@dataclass +class Import: + module: str + + +@dataclass +class String: + value: str + + +@dataclass +class Number: + value: str + + +@dataclass +class FunctionCall: + function: str + args: list diff --git a/jortsc/parser/lexer.py b/jortsc/parser/lexer.py new file mode 100644 index 0000000..7d61ff7 --- /dev/null +++ b/jortsc/parser/lexer.py @@ -0,0 +1,112 @@ +import re + +from dataclasses import dataclass +from enum import Enum, auto + + +class TokenType(Enum): + """Defines the type of a token""" + reserved = auto() + identifier = auto() + comment = auto() + comment_start = auto() + comment_end = auto() + whitespace = auto() + number = auto() + string = auto() + + +@dataclass +class Token: + value: str + type_: TokenType + + +class LexerError(Exception): + """Lexer error.""" + pass + + +TOKENS = [ + (r'[ \n\t]+', TokenType.whitespace), + + # single line comments and multiline comments + (r'//[^\n]*', TokenType.comment), + + # TODO: shouldnt this be /* */ instead of + # only tokenizing on the start and end? + (r'/\*', TokenType.comment_start), + (r'\*/', TokenType.comment_end), + + (r'fn', TokenType.reserved), + (r'if', TokenType.reserved), + (r'import', TokenType.reserved), + + (r'\(', TokenType.reserved), + (r'\)', TokenType.reserved), + + (r'\{', TokenType.reserved), + (r'\}', TokenType.reserved), + + (r'\-\>', TokenType.reserved), + (r'\.', TokenType.reserved), + + (r'\"[^\n]*\"', TokenType.string), + + # basic math ops + (r'[\+\-\/\*]', TokenType.reserved), + + (r'[0-9]+', TokenType.number), + (r'[A-Za-z][A-Za-z0-9_]*', TokenType.identifier) +] + + +def lex(string: str, token_defs: list) -> list: + """Generate tokens out of the given string.""" + pos = 0 + strlen = len(string) + tokens = [] + + # generate a dict for compiled regexes out of the token defs + # instead of compiling on each token definition per token. + compiled = {pattern: re.compile(pattern) + for pattern, _ in token_defs} + + # we use this instead of for pos in range(len(string)) because we + # need to increment pos to a whole token length's, and that wouldn't + # be easy on a for .. in range(..) + while pos < strlen: + valid = False + + for definition in token_defs: + pattern, tok_type = definition + regex = compiled[pattern] + + match = regex.match(string, pos) + + if not match: + continue + + text = match.group(0) + + # update pos to the end of the token + pos = match.end(0) + + valid = True + tokens.append(Token(text, tok_type)) + + # go to next token instead of checking other + # definitions for tokens, e.g if its a reserved token + # we shouldn't go down the path of an identifier. + break + + if not valid: + print(f'context: {pos} {len(string)} {string[pos-1:pos+20]!r}') + raise LexerError(f'Invalid character: {string[pos]!r}') + + return tokens + + +def lex_jorts(string: str) -> list: + """Lex with the jorts token definitions""" + return lex(string, TOKENS) diff --git a/jortsc/parser/parser.py b/jortsc/parser/parser.py new file mode 100644 index 0000000..7333bd6 --- /dev/null +++ b/jortsc/parser/parser.py @@ -0,0 +1,44 @@ + +from lark import Lark + +GRAMMAR = """ +FN: "fn" +IMPORT: "import" +COMMA: "," +DOT: "." +SINGLE_COMMENT: "//" +NEWLINE: /(\\r?\\n)+\\s*/ +ANY: /.+/ +WHITESPACE: " " +INTEGER: /[0-9]+/ +ARROW: "->" +COM_START: "/*" +COM_END: "*/" +QUOTE: "\\"" + +identifier: WHITESPACE* ANY WHITESPACE* + +single_comment: SINGLE_COMMENT ANY* NEWLINE +multi_comment: COM_START ANY* COM_END + +import_stmt: IMPORT identifier NEWLINE + +fn_arg: identifier identifier +parameters: fn_arg (COMMA fn_arg) +fn_stmt: FN identifier? "(" parameters? ")" [ARROW identifier] "{" NEWLINE? [stmt NEWLINE]* "}" + +sign_int: "+" | "-" +string: QUOTE ANY* QUOTE +value: (sign_int* INTEGER) | string + +call_stmt: [identifier DOT] identifier "(" [value COMMA]* ")" + +stmt: value | import_stmt | fn_stmt | call_stmt + +start: (NEWLINE | stmt)* +""" + +def parse(string: str): + """Parse using Lark""" + parser = Lark(GRAMMAR, parser='lalr', debug=True) + return parser.parse(string) diff --git a/jortsc/parser/syntatic.py b/jortsc/parser/syntatic.py new file mode 100644 index 0000000..7e9bc62 --- /dev/null +++ b/jortsc/parser/syntatic.py @@ -0,0 +1,272 @@ +from typing import Optional, Any, List + +from jortsc.parser.lexer import Token, TokenType +from jortsc.parser.ast_nodes import ( + Function, TypedVar, Identifier, Import, ReturnType, String, Number, + FunctionCall +) + + +class ParseError(Exception): + """Represents a parse error.""" + pass + + +class Reader: + """Main reader class""" + def __init__(self, tokens: List[Token]): + self.tokens = tokens + self.cur = 0 + + def __repr__(self): + return (f'') + + def peek(self) -> Optional[Token]: + """Peek at the current token.""" + try: + token = self.tokens[self.cur] + return token + except IndexError: + return None + + def next(self) -> Optional[Token]: + """Fetch the current token then skip to the next one.""" + token = self.peek() + self.cur += 1 + return token + + def expect(self, token_type: TokenType) -> Token: + """Check for a specific token type and error if it fails""" + token = self.next() + + if token.type_ != token_type: + raise ParseError(f'Expected {token_type}, got ' + f'{token.type_} {token.value!r}') + + return token + + def expect_val(self, value: str) -> Token: + """Check the next token to see if it matches against a given value, + instead of a type.""" + token = self.next() + + if token.value != value: + raise ParseError(f'Expected {value!r}, got ' + f'{token.type_} {token.value!r}') + + return token + + + def next_safe(self) -> Token: + """'Safe' version of next(). + + Raises an 'Unexpected EOF' error if next() returns None. + """ + token = self.next() + + if token is None: + raise ParseError('Unexpected EOF') + + return token + + def ignore(self, token_type: TokenType): + """Only increase self.cur if token_type is the upcoming token.""" + try: + assert self.tokens[self.cur].type_ == token_type + self.cur += 1 + except AssertionError: + pass + + +def _fn_read_args(reader: Reader, cur: List = None) -> List: + """Recursively read the arguments of the function.""" + if cur is None: + cur = [] + + # it can be an identifier for the arguments' type, OR a RPAREN + # if it is rparen, we stop + # if it isnt, we keep going until that happens + token = reader.next_safe() + + if token.value == ')': + return cur + + argtype = token + reader.expect(TokenType.whitespace) + argname = reader.next_safe() + + cur.append(TypedVar(argtype.value, argname.value)) + return _fn_read_args(reader, cur) + + +def _fn_ret_type(reader: Reader) -> ReturnType: + """Fetch the return type of a function. Defaults to void.""" + try: + reader.expect_val('->') + except ParseError: + return ReturnType('void') + + reader.ignore(TokenType.whitespace) + token = reader.expect(TokenType.identifier) + return ReturnType(token.value) + + +def read_function(reader: Reader): + """Read a function block.""" + reader.expect(TokenType.whitespace) + + token = reader.next() + + fn_name = '_anonymous' + fn_args = [] + + print('function token', token) + + if token.type_ == TokenType.identifier: + fn_name = token.value + + reader.expect(TokenType.whitespace) + reader.expect_val('(') + + fn_args = _fn_read_args(reader) + + reader.expect(TokenType.whitespace) + fn_ret_type = _fn_ret_type(reader) + + # only skip whitespace if we see it + reader.ignore(TokenType.whitespace) + block = read_start(reader) + elif token.value == '(': + fn_args = _fn_read_args(reader) + fn_ret_type = _fn_ret_type(reader) + block = read_start(reader) + + print('final function', fn_name, fn_args, fn_ret_type, block) + + return Function(fn_name, fn_args, fn_ret_type, block) + + +def read_import(reader): + """Read an import""" + reader.expect(TokenType.whitespace) + module = reader.next_safe() + return Import(module.value) + + +HANDLERS = { + 'fn': read_function, + 'import': read_import, +} + + +def read_reserved(token: Token, reader: Reader): + """Read reserved statements.""" + try: + handler = HANDLERS[token.value] + except KeyError: + raise ParseError(f'Unexpected reserved word {token.value!r}') + + return handler(reader) + + +def read_value(token: Token, _reader: Reader): + """Read a given value""" + if token.type_ == TokenType.string: + return String(token.value) + elif token.type_ == TokenType.number: + return Number(token.value) + + +def read_statement(token: Token, reader: Reader): + """Read a statement""" + # token is an identifier, so first check for a function call + + # TODO: handle more things than a function call + call_fn_name = token.value + token = reader.expect_val('(') + res = [] + + while True: + token = reader.next_safe() + + if token.value == ')': + break + + res.append(read_value(token, reader)) + + return FunctionCall(call_fn_name, res) + + +def read_start(reader: Reader): + """Read the start of a program.""" + print('reader', reader) + + token = reader.next() + + if token is None: + print('eof!') + return None + + ast = [] + res = [] + + # handle blocks + if token.value == '{': + # next can be a whitespace, or a } + token = reader.next() + + print('block start!, next:', token) + + if token.type_ == TokenType.whitespace: + # keep going on reading + while True: + token = reader.peek() + print('block append', token) + + if token.value == '}': + print('block end') + reader.next() + break + + res.extend(read_start(reader)) + elif token.value == '}': + res = [] + + # import, fn, etc + elif token.type_ == TokenType.reserved: + res = read_reserved(token, reader) + + elif token.type_ == TokenType.comment: + return [] + + elif token.type_ == TokenType.identifier: + res = read_statement(token, reader) + else: + res = read_value(token, reader) + + ast.append(res) + return ast + + +def read_loop(reader: Reader): + """Read the AST.""" + final_ast = [] + + while True: + ast = read_start(reader) + + # break when eof + if ast is None: + break + + # TODO: better ast cleanup + + final_ast.append(ast) + + return final_ast + + +def syntatic(tokens: List[Token]): + """Create an AST out of the tokens.""" + return read_loop(Reader(tokens)) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..45d4212 --- /dev/null +++ b/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup + +setup( + name='jortsc', + version='0.1', + py_modules=['jortsc'], + install_requires=[ + 'lark-parser==0.6.7' + ], + entry_points=''' + [console_scripts] + jortsc=jortsc:main + ''' +) diff --git a/src/chunk.zig b/src/chunk.zig deleted file mode 100644 index ae6b958..0000000 --- a/src/chunk.zig +++ /dev/null @@ -1,286 +0,0 @@ -const std = @import("std"); - -const value = @import("value.zig"); -const Allocator = std.mem.Allocator; - -// hack. ugly hack. zig has compiler crash. -const AllOpcodes = struct { - Return: u8 = 0, - Constant: u8 = 1, - ConstantLong: u8 = 2, - Add: u8 = 3, - Subtract: u8 = 4, - Multiply: u8 = 5, - Divide: u8 = 6, - Negate: u8 = 7, - - // basic type op codes - Nil: u8 = 8, - True: u8 = 9, - False: u8 = 10, - - Not: u8 = 11, - - // comparison op codes! - Equal: u8 = 12, - Greater: u8 = 13, - Less: u8 = 14, - - Print: u8 = 15, - Pop: u8 = 16, - - DefineGlobal: u8 = 17, - DefineGlobalLong: u8 = 18, - GetGlobal: u8 = 19, - GetGlobalLong: u8 = 20, - SetGlobal: u8 = 21, - SetGlobalLong: u8 = 22, - - GetLocal: u8 = 23, - SetLocal: u8 = 24, -}; - -pub const OpCode = AllOpcodes{}; - -fn simpleInstruction( - stdout: var, - comptime name: []const u8, - index: usize, -) !usize { - try stdout.print("{}\n", name); - return index + 1; -} - -fn constantInstruction( - stdout: var, - comptime name: []const u8, - chunk: *Chunk, - index: usize, -) !usize { - // get the constant's index in constants slice - var idx = chunk.code[index + 1]; - - try stdout.print("\t{}\t{} '", name, idx); - try value.printValue(stdout, chunk.constants.values[idx]); - try stdout.print("'\n"); - - return index + 2; -} - -fn constantLongInstruction( - stdout: var, - comptime name: []const u8, - chunk: *Chunk, - offset: usize, -) !usize { - // constantLong uses three u8's that encode a u24 as the - // contants' index. - var v3: u8 = chunk.code[offset + 1]; - var v2: u8 = chunk.code[offset + 2]; - var v1: u8 = chunk.code[offset + 3]; - - var idx: u24 = (@intCast(u24, v3) << 16) | (@intCast(u24, v2) << 8) | v1; - - try stdout.print("\t{}\t{} '", name, idx); - try value.printValue(stdout, chunk.constants.values[idx]); - try stdout.print("'\n"); - - return offset + 4; -} - -fn byteInstruction( - stdout: var, - name: []const u8, - chunk: *Chunk, - index: usize, -) !usize { - var slot: u8 = chunk.code[index + 1]; - try stdout.print("{} {}", name, slot); - return index + 2; -} - -pub const ConstantIndexTag = enum { - Small, - Long, -}; - -pub const ConstantIndex = union(ConstantIndexTag) { - Small: u8, - Long: [3]u8, -}; - -pub const Chunk = struct { - count: usize, - lines: []usize, - code: []u8, - - allocator: *Allocator, - constants: value.ValueList, - - pub fn init(allocator: *Allocator) !Chunk { - return Chunk{ - .count = 0, - .allocator = allocator, - .code = try allocator.alloc(u8, 0), - .lines = try allocator.alloc(usize, 0), - .constants = try value.ValueList.init(allocator), - }; - } - - pub fn write(self: *Chunk, byte: u8, line: usize) !void { - if (self.code.len < self.count + 1) { - self.code = try self.allocator.realloc( - self.code, - self.count + 1, - ); - - self.lines = try self.allocator.realloc( - self.lines, - self.count + 1, - ); - } - - self.code[self.count] = byte; - self.lines[self.count] = line; - self.count += 1; - } - - pub fn addConstant(self: *Chunk, val: value.Value) !u8 { - try self.constants.write(val); - return self.constants.count - 1; - } - - pub fn writeConstantRaw( - self: *Chunk, - val: value.Value, - line: usize, - ) !ConstantIndex { - try self.constants.write(val); - var constant_idx = self.constants.count - 1; - - if (constant_idx < 256) { - var idx_small = @intCast(u8, constant_idx); - return ConstantIndex{ .Small = idx_small }; - } else { - var idx_u24: u24 = @intCast(u24, constant_idx); - - const mask = @intCast(u24, 0xff); - - const v1: u8 = @intCast(u8, idx_u24 & mask); - const v2: u8 = @intCast(u8, (idx_u24 >> 8) & mask); - const v3: u8 = @intCast(u8, (idx_u24 >> 16) & mask); - - return ConstantIndex{ .Long = [_]u8{ v3, v2, v1 } }; - } - } - - pub fn writeConstant( - self: *Chunk, - val: value.Value, - line: usize, - ) !ConstantIndex { - var idx = try self.writeConstantRaw(val, line); - - switch (idx) { - .Small => |idx_small| blk: { - try self.write(OpCode.Constant, line); - try self.write(idx_small, line); - break :blk; - }, - .Long => |long_u8| blk: { - try self.write(OpCode.ConstantLong, line); - try self.write(long_u8[0], line); - try self.write(long_u8[1], line); - try self.write(long_u8[2], line); - }, - else => unreachable, - } - - return idx; - } - - pub fn disassembleInstruction( - self: *Chunk, - stdout: var, - index: usize, - ) !usize { - try stdout.print("{} ", index); - - if (index > 0 and self.lines[index] == self.lines[index - 1]) { - try stdout.print(" | "); - } else { - try stdout.print("{} ", self.lines[index]); - } - - var instruction = self.code[index]; - - if (instruction == OpCode.Return) { - return try simpleInstruction(stdout, "OP_RETURN", index); - } else if (instruction == OpCode.Constant) { - return try constantInstruction(stdout, "OP_CONSTANT", self, index); - } else if (instruction == OpCode.ConstantLong) { - return try constantLongInstruction( - stdout, - "OP_CONSTANT_LONG", - self, - index, - ); - } else if (instruction == OpCode.Negate) { - return try simpleInstruction(stdout, "OP_NEGATE", index); - } else if (instruction == OpCode.Add) { - return try simpleInstruction(stdout, "OP_ADD", index); - } else if (instruction == OpCode.Subtract) { - return try simpleInstruction(stdout, "OP_SUBTRACT", index); - } else if (instruction == OpCode.Multiply) { - return try simpleInstruction(stdout, "OP_MULTIPLY", index); - } else if (instruction == OpCode.Divide) { - return try simpleInstruction(stdout, "OP_DIVIDE", index); - } else if (instruction == OpCode.Nil) { - return try simpleInstruction(stdout, "OP_NIL", index); - } else if (instruction == OpCode.True) { - return try simpleInstruction(stdout, "OP_TRUE", index); - } else if (instruction == OpCode.False) { - return try simpleInstruction(stdout, "OP_FALSE", index); - } else if (instruction == OpCode.Not) { - return try simpleInstruction(stdout, "OP_NOT", index); - } else if (instruction == OpCode.Equal) { - return try simpleInstruction(stdout, "OP_EQUAL", index); - } else if (instruction == OpCode.Greater) { - return try simpleInstruction(stdout, "OP_GREATER", index); - } else if (instruction == OpCode.Less) { - return try simpleInstruction(stdout, "OP_LESS", index); - } else if (instruction == OpCode.Print) { - return try simpleInstruction(stdout, "OP_PRINT", index); - } else if (instruction == OpCode.Pop) { - return try simpleInstruction(stdout, "OP_POP", index); - } else if (instruction == OpCode.DefineGlobal) { - return try constantInstruction(stdout, "OP_DEFGLOBAL", self, index); - } else if (instruction == OpCode.DefineGlobalLong) { - return try constantLongInstruction(stdout, "OP_DEFGLOBAL_LONG", self, index); - } else if (instruction == OpCode.GetGlobal) { - return try constantInstruction(stdout, "OP_GETGLOBAL", self, index); - } else if (instruction == OpCode.GetGlobalLong) { - return try constantLongInstruction(stdout, "OP_GETGLOBAL_LONG", self, index); - } else if (instruction == OpCode.SetGlobal) { - return try constantInstruction(stdout, "OP_SETGLOBAL", self, index); - } else if (instruction == OpCode.SetGlobalLong) { - return try constantLongInstruction(stdout, "OP_SETGLOBAL_LONG", self, index); - } else if (instruction == OpCode.GetLocal) { - return try byteInstruction(stdout, "OP_GETLOCAL", self, index); - } else if (instruction == OpCode.SetLocal) { - return try byteInstruction(stdout, "OP_GETLOCAL", self, index); - } else { - try stdout.print("Unknown opcode: {}\n", instruction); - return index + 1; - } - } - - pub fn disassemble(self: *Chunk, stdout: var, name: []const u8) !void { - try stdout.print("== {} ==\n", name); - - var i: usize = 0; - while (i < self.count) { - i = try self.disassembleInstruction(stdout, i); - } - } -}; diff --git a/src/compiler.zig b/src/compiler.zig deleted file mode 100644 index 12e710b..0000000 --- a/src/compiler.zig +++ /dev/null @@ -1,587 +0,0 @@ -const std = @import("std"); -const scanner = @import("scanner.zig"); -const vm = @import("vm.zig"); -const chunks = @import("chunk.zig"); -const tokens = @import("token.zig"); -const values = @import("value.zig"); -const objects = @import("object.zig"); - -const Allocator = std.mem.Allocator; -const Scanner = scanner.Scanner; -const Chunk = chunks.Chunk; -const Token = tokens.Token; -const TokenType = tokens.TokenType; -const Value = values.Value; -const OpCode = chunks.OpCode; - -/// Holds parser state for the compiler. -const Parser = struct { - previous: Token = undefined, - current: Token = undefined, - - // TODO are those needed - hadError: bool = false, - panicMode: bool = false, -}; - -/// Represents the order of operations in the parser. -const Precedence = enum(u5) { - None, - Assignment, // = - Or, // or - And, // and - Equality, // == != - Comparison, // < > <= >= - Term, // + - - Factor, // * / - Unary, // ! - - Call, // . () [] - Primary, -}; - -const ParseFn = fn (*Compiler, bool) anyerror!void; - -const ParseRule = struct { - prefix: ?ParseFn = null, - infix: ?ParseFn = null, - precedence: Precedence = Precedence.None, -}; - -/// For each token, this defines a parse rule for it. -var rules = [_]ParseRule{ - // for LEFT_PAREN, we determine it as a call precedence - // plus a prefix parse function of grouping - ParseRule{ .prefix = Compiler.grouping, .precedence = .Call }, - ParseRule{}, - ParseRule{}, - ParseRule{}, - ParseRule{}, - - // dot token, means a call too, for things like a.b - ParseRule{ .precedence = .Call }, - - // specific to -, as it can be an unary operator when its a prefix - // of something, or a binary one, when its a infix or another thing. - ParseRule{ - .prefix = Compiler.unary, - .infix = Compiler.binary, - .precedence = .Term, - }, - - ParseRule{ .infix = Compiler.binary, .precedence = .Term }, - ParseRule{}, - - // slash is a binary operator, as well as star. - ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, - ParseRule{ .infix = Compiler.binary, .precedence = .Factor }, - - // as the token enum says, those are 1/2 char tokens. - ParseRule{ .prefix = Compiler.unary }, - // this is specifically for the != operator - ParseRule{ .infix = Compiler.binary, .precedence = .Equality }, - ParseRule{}, - // this is specifically for the == operator - ParseRule{ .infix = Compiler.binary, .precedence = .Equality }, - - // all the comparison ones - ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, - ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, - ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, - ParseRule{ .infix = Compiler.binary, .precedence = .Comparison }, - - ParseRule{ .prefix = Compiler.variable }, - ParseRule{ .prefix = Compiler.string }, - ParseRule{ .prefix = Compiler.number }, - ParseRule{ .precedence = .And }, - ParseRule{}, - ParseRule{}, - - // false - ParseRule{ .prefix = Compiler.literal }, - ParseRule{}, - ParseRule{}, - ParseRule{}, - ParseRule{ .prefix = Compiler.literal }, - ParseRule{ .precedence = .Or }, - ParseRule{}, - ParseRule{}, - ParseRule{}, - ParseRule{}, - ParseRule{ .prefix = Compiler.literal }, - ParseRule{}, - ParseRule{}, - ParseRule{}, -}; - -pub const Local = struct { - name: tokens.Token, - depth: i32, -}; - -pub const Compiler = struct { - src: []const u8, - stdout: vm.StdOut, - allocator: *Allocator, - parser: Parser, - scanr: Scanner = undefined, - chunk: *chunks.Chunk, - debug_flag: bool = false, - vmach: *vm.VM, - - locals: [256]Local, - localCount: i32 = 0, - scopeDepth: i32 = 0, - - pub fn init( - allocator: *Allocator, - chunk: *chunks.Chunk, - stdout: vm.StdOut, - source: []const u8, - debug_flag: bool, - vmach: *vm.VM, - ) Compiler { - return Compiler{ - .src = source, - .chunk = chunk, - .allocator = allocator, - .stdout = stdout, - .parser = Parser{}, - .debug_flag = debug_flag, - .vmach = vmach, - - // local variable resolution - .locals = [_]Local{Local{ - .name = Token{}, - .depth = -1, - }} ** 256, - }; - } - - fn errorAt(self: *Compiler, token: Token, msg: []const u8) void { - if (self.parser.panicMode) return; - self.parser.panicMode = true; - - std.debug.warn("[line {}] Error", token.line); - if (token.ttype == TokenType.EOF) { - std.debug.warn(" at end"); - } else { - std.debug.warn(" at '{}'", token.lexeme); - } - - std.debug.warn(": {}\n", msg); - self.parser.hadError = true; - } - - fn errorCurrent(self: *Compiler, msg: []const u8) void { - self.errorAt(self.parser.current, msg); - } - - fn errorPrevious(self: *Compiler, msg: []const u8) void { - self.errorAt(self.parser.previous, msg); - } - - fn advance(self: *Compiler) !void { - self.parser.previous = self.parser.current; - - while (true) { - var token_opt = try self.scanr.scanToken(); - if (token_opt) |token| { - self.parser.current = token; - break; - } - } - } - - fn consume(self: *Compiler, ttype: TokenType, msg: []const u8) !void { - if (self.parser.current.ttype == ttype) { - try self.advance(); - return; - } - - self.errorCurrent(msg); - } - - fn check(self: *Compiler, ttype: TokenType) bool { - return self.parser.current.ttype == ttype; - } - - fn match(self: *Compiler, ttype: TokenType) !bool { - if (!(self.check(ttype))) return false; - - try self.advance(); - return true; - } - - fn currentChunk(self: *Compiler) *chunks.Chunk { - return self.chunk; - } - - fn emitByte(self: *Compiler, byte: u8) !void { - try self.currentChunk().write(byte, self.parser.previous.line); - } - - fn emitBytes(self: *Compiler, byte1: u8, byte2: u8) !void { - try self.emitByte(byte1); - try self.emitByte(byte2); - } - - fn emitReturn(self: *Compiler) !void { - try self.emitByte(OpCode.Return); - } - - fn emitConstant(self: *Compiler, value: Value) !void { - _ = try self.currentChunk().writeConstant( - value, - self.parser.previous.line, - ); - } - - fn end(self: *Compiler) !void { - try self.emitReturn(); - - if (self.debug_flag and !self.parser.hadError) { - try self.currentChunk().disassemble(self.stdout, "code"); - } - } - - fn beginScope(self: *Compiler) void { - self.scopeDepth += 1; - } - - fn endScope(self: *Compiler) !void { - self.scopeDepth -= 1; - - // clear the current scope in the stack - while (self.localCount > 0 and self.locals[@intCast(usize, self.localCount - 1)].depth > self.scopeDepth) { - try self.emitByte(chunks.OpCode.Pop); - self.localCount -= 1; - } - } - - fn grouping(self: *Compiler, canAssign: bool) !void { - try self.expression(); - try self.consume(.RIGHT_PAREN, "Expect ')' after expression."); - } - - /// Emits bytecode for a number being loaded into the code. - fn number(self: *Compiler, canAssign: bool) !void { - var value: f64 = try std.fmt.parseFloat( - f64, - self.parser.previous.lexeme, - ); - try self.emitConstant(values.NumberVal(value)); - } - - fn string(self: *Compiler, canAssign: bool) !void { - const lexeme_len = self.parser.previous.lexeme.len; - - try self.emitConstant(values.ObjVal(try objects.copyString( - self.vmach, - self.parser.previous.lexeme[1 .. lexeme_len - 1], - ))); - } - - fn resolveLocal(self: *Compiler, name: *Token) i32 { - var i = self.localCount - 1; - while (i >= 0) : (i -= 1) { - var idx = @intCast(usize, i); - var local = &self.locals[idx]; - if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { - if (local.depth == -1) { - self.errorCurrent("Cannot read local variable in its own initializer."); - } - return i; - } - } - - return -1; - } - - fn namedVariable(self: *Compiler, tok: *Token, canAssign: bool) !void { - // writeConstant always writes OP_CODE which may be not - // what we want, so. - var getOp: u8 = undefined; - var setOp: u8 = undefined; - - // we try to resolve the local. depending if it gets resolved - // or not, we select the necessary get/set op codes. - var arg: i32 = self.resolveLocal(tok); - - if (arg != -1) { - getOp = chunks.OpCode.GetLocal; - setOp = chunks.OpCode.SetLocal; - } else { - arg = (try self.identifierConstant(tok)).Small; - getOp = chunks.OpCode.GetGlobal; - setOp = chunks.OpCode.SetGlobal; - } - - var idx: u8 = @intCast(u8, arg); - - if (canAssign and try self.match(.EQUAL)) { - try self.expression(); - try self.emitBytes(setOp, idx); - } else { - try self.emitBytes(getOp, idx); - } - } - - fn variable(self: *Compiler, canAssign: bool) !void { - try self.namedVariable(&self.parser.previous, canAssign); - } - - /// Emits bytecode for a given unary. - fn unary(self: *Compiler, canAssign: bool) !void { - var ttype = self.parser.previous.ttype; - try self.parsePrecedence(.Unary); - - switch (ttype) { - .MINUS => try self.emitByte(OpCode.Negate), - .BANG => try self.emitByte(OpCode.Not), - else => unreachable, - } - } - - fn binary(self: *Compiler, canAssign: bool) !void { - var op_type = self.parser.previous.ttype; - var rule: *ParseRule = self.getRule(op_type); - try self.parsePrecedence(@intToEnum(Precedence, @enumToInt(rule.precedence) + 1)); - - switch (op_type) { - .PLUS => try self.emitByte(OpCode.Add), - .MINUS => try self.emitByte(OpCode.Subtract), - .STAR => try self.emitByte(OpCode.Multiply), - .SLASH => try self.emitByte(OpCode.Divide), - - .EQUAL_EQUAL => try self.emitByte(OpCode.Equal), - .GREATER => try self.emitByte(OpCode.Greater), - .LESS => try self.emitByte(OpCode.Less), - - .BANG_EQUAL => try self.emitBytes(OpCode.Equal, OpCode.Not), - .GREATER_EQUAL => try self.emitBytes(OpCode.Less, OpCode.Not), - .LESS_EQUAL => try self.emitBytes(OpCode.Greater, OpCode.Not), - - else => unreachable, - } - } - - fn literal(self: *Compiler, canAssign: bool) !void { - switch (self.parser.previous.ttype) { - .FALSE => try self.emitByte(OpCode.False), - .NIL => try self.emitByte(OpCode.Nil), - .TRUE => try self.emitByte(OpCode.True), - else => unreachable, - } - } - - fn parsePrecedence(self: *Compiler, precedence: Precedence) anyerror!void { - try self.advance(); - var as_int = @enumToInt(precedence); - var prefix_rule_opt = self.getRule(self.parser.previous.ttype).prefix; - - if (prefix_rule_opt) |prefix_rule| { - var canAssign: bool = as_int <= @enumToInt(Precedence.Assignment); - try prefix_rule(self, canAssign); - - while (as_int <= @enumToInt(self.getRule(self.parser.current.ttype).precedence)) { - try self.advance(); - var infix_rule_opt = self.getRule(self.parser.previous.ttype).infix; - if (infix_rule_opt) |infix_rule| { - try infix_rule(self, canAssign); - } - } - - if (canAssign and try self.match(.EQUAL)) { - self.errorPrevious("Invalid assignment target."); - try self.expression(); - } - } else { - self.errorPrevious("Expect expression."); - return; - } - } - - fn getRule(self: *Compiler, ttype: TokenType) *ParseRule { - return &rules[@enumToInt(ttype)]; - } - - fn expression(self: *Compiler) anyerror!void { - try self.parsePrecedence(.Assignment); - } - - fn printStmt(self: *Compiler) !void { - try self.expression(); - try self.consume(.SEMICOLON, "Expect ';' after value."); - try self.emitByte(OpCode.Print); - } - - fn exprStmt(self: *Compiler) !void { - try self.expression(); - try self.consume(.SEMICOLON, "Expect ';' after expression."); - try self.emitByte(OpCode.Pop); - } - - fn synchronize(self: *Compiler) !void { - self.parser.panicMode = false; - - while (self.parser.current.ttype != .EOF) { - if (self.parser.previous.ttype == .SEMICOLON) return; - - switch (self.parser.current.ttype) { - .CLASS, .FUN, .VAR, .FOR, .IF, .WHILE, .PRINT, .RETURN => return, - else => {}, - } - - try self.advance(); - } - } - - /// Write an identifier constant to the bytecode. - fn identifierConstant( - self: *Compiler, - token: *Token, - ) !chunks.ConstantIndex { - return try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString( - self.vmach, - token.lexeme, - )), token.line); - } - - fn addLocal(self: *Compiler, name: Token) void { - if (self.localCount == 256) { - self.errorCurrent("Too many variables in function."); - return; - } - - self.localCount += 1; - var local: *Local = &self.locals[@intCast(usize, self.localCount)]; - local.name = name; - //local.depth = self.scopeDepth; - local.depth = -1; - } - - fn declareVariable(self: *Compiler) void { - if (self.scopeDepth == 0) return; - var name: *Token = &self.parser.previous; - - // check if we're redeclaring an existing variable - // in the *CURRENT* scope. - - // go from current down to global - var i = self.localCount; - while (i >= 0) : (i -= 1) { - var local = self.locals[@intCast(usize, i)]; - if (local.depth == -1 and local.depth < self.scopeDepth) break; - - if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) { - self.errorCurrent("Variable with this name already declared in this scope."); - } - } - - self.addLocal(name.*); - } - - fn parseVariable(self: *Compiler, msg: []const u8) !chunks.ConstantIndex { - try self.consume(.IDENTIFIER, msg); - self.declareVariable(); - if (self.scopeDepth > 0) return chunks.ConstantIndex{ .Small = 0 }; - return try self.identifierConstant(&self.parser.previous); - } - - fn emitConstWithIndex( - self: *Compiler, - op_short: u8, - op_long: u8, - idx: chunks.ConstantIndex, - ) !void { - switch (idx) { - .Small => |val| try self.emitBytes(op_short, val), - .Long => |val| blk: { - try self.emitByte(op_long); - try self.emitByte(val[0]); - try self.emitByte(val[1]); - try self.emitByte(val[2]); - }, - else => unreachable, - } - } - - fn markInitialized(self: *Compiler) void { - if (self.scopeDepth == 0) return; - var idx = @intCast(usize, self.localCount); - self.locals[idx].depth = self.scopeDepth; - } - - fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void { - if (self.scopeDepth > 0) { - self.markInitialized(); - return; - } - - try self.emitConstWithIndex( - chunks.OpCode.DefineGlobal, - chunks.OpCode.DefineGlobalLong, - global, - ); - } - - fn varDecl(self: *Compiler) !void { - var global = try self.parseVariable("Expect variable name."); - - if (try self.match(.EQUAL)) { - try self.expression(); - } else { - try self.emitByte(chunks.OpCode.Nil); - } - - // check scopeDepth here - - try self.consume(.SEMICOLON, "Expect ';' after variable declaration."); - try self.defineVariable(global); - } - - fn declaration(self: *Compiler) anyerror!void { - if (try self.match(.VAR)) { - try self.varDecl(); - } else { - try self.statement(); - } - if (self.parser.panicMode) try self.synchronize(); - } - - fn block(self: *Compiler) anyerror!void { - while (!self.check(.RIGHT_BRACE) and !self.check(.EOF)) { - try self.declaration(); - } - - try self.consume(.RIGHT_BRACE, "Expect '}' after block."); - } - - fn statement(self: *Compiler) !void { - if (try self.match(.PRINT)) { - try self.printStmt(); - } else if (try self.match(.LEFT_BRACE)) { - self.beginScope(); - try self.block(); - try self.endScope(); - } else { - try self.exprStmt(); - } - } - - /// Compile the source given when initializing the compiler - /// into the given chunk. - pub fn compile(self: *Compiler, chunk: *Chunk) !bool { - self.scanr = try scanner.Scanner.init(self.allocator, self.src); - - try self.advance(); - while (!(try self.match(.EOF))) { - try self.declaration(); - } - // try self.expression(); - // try self.consume(.EOF, "Expect end of expression."); - try self.end(); - - return !self.parser.hadError; - } -}; diff --git a/src/main.zig b/src/main.zig deleted file mode 100644 index 4189e97..0000000 --- a/src/main.zig +++ /dev/null @@ -1,128 +0,0 @@ -const std = @import("std"); - -const Allocator = std.mem.Allocator; - -// const Scanner = @import("scanner.zig").Scanner; -const chunk = @import("chunk.zig"); -const vm = @import("vm.zig"); - -const InterpretResult = vm.InterpretResult; - -//const Compiler = @import("compiler.zig").Compiler; - -pub var hadError = false; - -fn run(allocator: *Allocator, data: []u8) !void { - var stdout_file = try std.io.getStdOut(); - const stdout = &stdout_file.outStream().stream; - - var vmach = try vm.VM.init(allocator, stdout, true); - defer vmach.deinit(); - try vmach.interpret(data); -} - -fn runWithVM(vmach: *vm.VM, data: []u8) !void { - var stdout_file = try std.io.getStdOut(); - const stdout = &stdout_file.outStream().stream; - - defer vmach.deinit(); - try vmach.interpret(data); -} - -pub fn doError(line: usize, message: []const u8) !void { - try errorReport(line, "", message); -} - -pub fn errorReport(line: usize, where: []const u8, message: []const u8) !void { - var stdout_file = try std.io.getStdOut(); - const stdout = &stdout_file.outStream().stream; - - try stdout.print("[line {}] Error {}: {}\n", line, where, message); - hadError = true; -} - -fn runFile(allocator: *Allocator, path: []const u8) !void { - var lox_file = try std.fs.File.openRead(path); - defer lox_file.close(); - - const total_bytes = try lox_file.getEndPos(); - var slice = try allocator.alloc(u8, total_bytes); - _ = try lox_file.read(slice); - - run(allocator, slice) catch |err| { - switch (err) { - InterpretResult.Ok => {}, - InterpretResult.CompileError => std.os.exit(65), - InterpretResult.RuntimeError => std.os.exit(70), - else => return err, - } - }; -} - -fn runPrompt(allocator: *Allocator) !void { - var stdout_file = try std.io.getStdOut(); - const stdout = &stdout_file.outStream().stream; - - var vmach = try vm.VM.init(allocator, stdout, true); - defer vmach.deinit(); - - while (true) { - try stdout.print(">"); - var buffer = try std.Buffer.init(allocator, ""[0..]); - - var line = std.io.readLine(&buffer) catch |err| { - if (err == error.EndOfStream) return; - - return err; - }; - - runWithVM(&vmach, line) catch |err| { - switch (err) { - InterpretResult.Ok => {}, - InterpretResult.CompileError => blk: { - try stdout.print("compile error.\n"); - }, - InterpretResult.RuntimeError => blk: { - try stdout.print("runtime error.\n"); - }, - else => return err, - } - }; - - vmach.resetStack(); - } -} - -pub fn main() anyerror!void { - var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator); - defer arena.deinit(); - var allocator = &arena.allocator; - - var args_it = std.process.args(); - - var jorts_arg0 = try (args_it.next(allocator) orelse { - // if you ever reach this, tell me what is your os lmao - unreachable; - }); - - var lox_path = try (args_it.next(allocator) orelse { - try runPrompt(allocator); - return; - }); - - try runFile(allocator, lox_path); -} - -pub fn oldMain() !void { - var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator); - defer arena.deinit(); - var allocator = &arena.allocator; - - var stdout_file = try std.io.getStdOut(); - var stdout = &stdout_file.outStream().stream; - - // this crashes zig??? lol - // var chk = try chunk.Chunk.init(allocator); - //var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return); - //try chk.write(chunk.OpCode.Return); -} diff --git a/src/object.zig b/src/object.zig deleted file mode 100644 index 17418df..0000000 --- a/src/object.zig +++ /dev/null @@ -1,54 +0,0 @@ -const std = @import("std"); -const vm = @import("vm.zig"); - -const Allocator = std.mem.Allocator; - -pub const ObjType = enum { - String, -}; - -pub const ObjValue = struct { - String: []u8, -}; - -pub const Object = struct { - otype: ObjType, - value: ObjValue, - next: ?*Object = null, -}; - -pub fn allocateObject( - vmach: *vm.VM, - otype: ObjType, - value: ObjValue, -) !*Object { - var obj = try vmach.allocator.create(Object); - obj.otype = otype; - obj.value = value; - - obj.next = vmach.objs; - vmach.objs = obj; - return obj; -} - -fn createString(vmach: *vm.VM, data: []u8) !*Object { - return allocateObject(vmach, ObjType.String, ObjValue{ .String = data }); -} - -pub fn copyString(vmach: *vm.VM, data: []const u8) !*Object { - var str = try vmach.allocator.alloc(u8, data.len); - std.mem.copy(u8, str, data); - return try createString(vmach, str); -} - -/// Assumes it can take ownership of the given data. -pub fn takeString(vmach: *vm.VM, data: []u8) !*Object { - return try createString(vmach, data); -} - -pub fn printObject(stdout: var, obj: Object) !void { - switch (obj.otype) { - .String => try stdout.print("{}", obj.value.String), - else => unreachable, - } -} diff --git a/src/scanner.zig b/src/scanner.zig deleted file mode 100644 index 77807cd..0000000 --- a/src/scanner.zig +++ /dev/null @@ -1,276 +0,0 @@ -const std = @import("std"); -const tokens = @import("token.zig"); - -const Token = tokens.Token; -const TokenType = tokens.TokenType; - -const Allocator = std.mem.Allocator; - -pub const TokenError = error{ - Unexpected, - Unterminated, -}; - -fn isDigit(char: u8) bool { - return char >= '0' and char <= '9'; -} - -fn isAlpha(c: u8) bool { - return (c >= 'a' and c <= 'z') or - (c >= 'A' and c <= 'Z') or - c == '_'; -} - -fn isAlphaNumeric(char: u8) bool { - return isAlpha(char) or isDigit(char); -} - -pub const KeywordMap = std.StringHashMap(u6); - -/// The book does say that C doesn't have hashmaps. but Zig does. and I can -/// use it here. -fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap { - var map = KeywordMap.init(allocator); - - const keywords = [_][]const u8{ - "and"[0..], - "class"[0..], - "else"[0..], - "false"[0..], - "for"[0..], - "fun"[0..], - "if"[0..], - "nil"[0..], - "or"[0..], - "print"[0..], - "return"[0..], - "super"[0..], - "this"[0..], - "true"[0..], - "var"[0..], - "while"[0..], - }; - - const tags = [_]TokenType{ - TokenType.AND, - TokenType.CLASS, - TokenType.ELSE, - TokenType.FALSE, - TokenType.FOR, - TokenType.FUN, - TokenType.IF, - TokenType.NIL, - TokenType.OR, - TokenType.PRINT, - TokenType.RETURN, - TokenType.SUPER, - TokenType.THIS, - TokenType.TRUE, - TokenType.VAR, - TokenType.WHILE, - }; - - for (keywords) |keyword, idx| { - var tag = @enumToInt(tags[idx]); - _ = try map.put(keyword, tag); - } - - return map; -} - -pub const Scanner = struct { - source: []const u8, - keywords: KeywordMap, - - start: usize = 0, - current: usize = 0, - line: usize = 1, - - allocator: *Allocator, - - pub fn init(allocator: *Allocator, data: []const u8) !Scanner { - return Scanner{ - .source = data, - .keywords = try initKeywordMap(allocator), - .allocator = allocator, - }; - } - - fn isAtEnd(self: *Scanner) bool { - return self.current >= self.source.len; - } - - fn advance(self: *Scanner) u8 { - self.current += 1; - return self.source[self.current - 1]; - } - - pub fn currentLexeme(self: *Scanner) []const u8 { - return self.source[self.start..self.current]; - } - - fn makeToken(self: *Scanner, ttype: TokenType) Token { - return Token{ - .ttype = ttype, - .lexeme = self.currentLexeme(), - .line = self.line, - }; - } - - /// Check if the next character matches what is expected. - fn match(self: *Scanner, expected: u8) bool { - if (self.isAtEnd()) return false; - if (self.source[self.current] != expected) return false; - - self.current += 1; - return true; - } - - /// Add a SimpleToken of type_match if the next character is - /// `expected`. Adds a SimpleToken of type_nomatch when it is not. - fn makeMatchToken( - self: *Scanner, - expected: u8, - type_match: TokenType, - type_nomatch: TokenType, - ) Token { - if (self.match(expected)) { - return self.makeToken(type_match); - } else { - return self.makeToken(type_nomatch); - } - } - - fn peek(self: *Scanner) u8 { - if (self.isAtEnd()) return 0; - return self.source[self.current]; - } - - fn peekNext(self: *Scanner) u8 { - if (self.current + 1 >= self.source.len) return 0; - return self.source[self.current + 1]; - } - - fn skipWhitespace(self: *Scanner) void { - while (true) { - var c = self.peek(); - switch (c) { - ' ', '\r', '\t' => blk: { - _ = self.advance(); - }, - '\n' => blk: { - self.line += 1; - _ = self.advance(); - }, - else => return, - } - } - } - - fn doString(self: *Scanner) !Token { - // consume entire string - while (self.peek() != '"' and !self.isAtEnd()) { - if (self.peek() == '\n') self.line += 1; - _ = self.advance(); - } - - // unterminated string. - if (self.isAtEnd()) { - return TokenError.Unterminated; - } - - // the closing ". - _ = self.advance(); - - // trim the surrounding quotes. - return self.makeToken(.STRING); - } - - /// Consume a number - fn doNumber(self: *Scanner) Token { - while (isDigit(self.peek())) { - _ = self.advance(); - } - - // check if its a number like 12.34, where the '.' character - // exists and the one next to it is a digit. - if (self.peek() == '.' and isDigit(self.peekNext())) { - _ = self.advance(); - - while (isDigit(self.peek())) { - _ = self.advance(); - } - } - - return self.makeToken(.NUMBER); - } - - /// Either a keyword or an identifier come out of this. - fn doIdentifier(self: *Scanner) Token { - while (isAlphaNumeric(self.peek())) { - _ = self.advance(); - } - - // after reading the identifier, we check - // if it is any of our keywords, if it is, then we add - // the specificed keyword type. if not, just .IDENTIFIER - var text = self.source[self.start..self.current]; - var type_opt = self.keywords.get(text); - var toktype: TokenType = undefined; - - if (type_opt) |kv| { - toktype = @intToEnum(TokenType, kv.value); - } else { - toktype = TokenType.IDENTIFIER; - } - - return self.makeToken(toktype); - } - - pub fn scanToken(self: *Scanner) !?Token { - self.skipWhitespace(); - self.start = self.current; - - if (self.isAtEnd()) return self.makeToken(TokenType.EOF); - - var c = self.advance(); - if (isAlpha(c)) return self.doIdentifier(); - if (isDigit(c)) return self.doNumber(); - - var token = switch (c) { - '(' => self.makeToken(.LEFT_PAREN), - ')' => self.makeToken(.RIGHT_PAREN), - '{' => self.makeToken(.LEFT_BRACE), - '}' => self.makeToken(.RIGHT_BRACE), - ',' => self.makeToken(.COMMA), - '.' => self.makeToken(.DOT), - '-' => self.makeToken(.MINUS), - '+' => self.makeToken(.PLUS), - ';' => self.makeToken(.SEMICOLON), - '*' => self.makeToken(.STAR), - - '!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG), - '=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL), - '<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS), - '>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER), - - '/' => blk: { - if (self.peekNext() == '/') { - while (self.peek() != '\n' and !self.isAtEnd()) { - _ = self.advance(); - } - - break :blk null; - } else { - break :blk self.makeToken(.SLASH); - } - }, - - '"' => try self.doString(), - - else => return TokenError.Unexpected, - }; - - return token; - } -}; diff --git a/src/token.zig b/src/token.zig deleted file mode 100644 index 308e60e..0000000 --- a/src/token.zig +++ /dev/null @@ -1,57 +0,0 @@ -const std = @import("std"); - -pub const TokenType = enum(u6) { - // Single-character tokens. - LEFT_PAREN, - RIGHT_PAREN, - LEFT_BRACE, - RIGHT_BRACE, - COMMA, - DOT, - MINUS, - PLUS, - SEMICOLON, - SLASH, - STAR, - - // One or two character tokens. - BANG, - BANG_EQUAL, - EQUAL, - EQUAL_EQUAL, - GREATER, - GREATER_EQUAL, - LESS, - LESS_EQUAL, - - // Literals. - IDENTIFIER, - STRING, - NUMBER, - - // Keywords. - AND, - CLASS, - ELSE, - FALSE, - FUN, - FOR, - IF, - NIL, - OR, - PRINT, - RETURN, - SUPER, - THIS, - TRUE, - VAR, - WHILE, - - EOF, -}; - -pub const Token = struct { - ttype: TokenType = TokenType.EOF, - lexeme: []const u8 = ""[0..], - line: usize = 0, -}; diff --git a/src/value.zig b/src/value.zig deleted file mode 100644 index b1c927c..0000000 --- a/src/value.zig +++ /dev/null @@ -1,80 +0,0 @@ -const std = @import("std"); -const objects = @import("object.zig"); - -const Allocator = std.mem.Allocator; - -pub const ValueType = enum(u8) { - Bool, - Nil, - Number, - Object, -}; - -pub const ValueValue = union(ValueType) { - Bool: bool, - Nil: void, - Number: f64, - Object: *objects.Object, -}; - -pub const Value = struct { - vtype: ValueType, - as: ValueValue, -}; - -// helper functions -pub fn BoolVal(val: bool) Value { - return Value{ .vtype = .Bool, .as = ValueValue{ .Bool = val } }; -} - -pub fn NilVal() Value { - return Value{ .vtype = .Nil, .as = ValueValue{ .Nil = {} } }; -} - -pub fn NumberVal(val: f64) Value { - return Value{ .vtype = .Number, .as = ValueValue{ .Number = val } }; -} - -pub fn ObjVal(val: *objects.Object) Value { - return Value{ .vtype = .Object, .as = ValueValue{ .Object = val } }; -} - -pub fn isObjType(val: Value, otype: objects.ObjType) bool { - return val.vtype == .Object and val.as.Object.otype == otype; -} - -pub fn printValue(stdout: var, value: Value) !void { - switch (value.as) { - .Nil => try stdout.print("nil"), - .Bool => try stdout.print("{}", value.as.Bool), - .Number => try stdout.print("{}", value.as.Number), - .Object => try objects.printObject(stdout, value.as.Object.*), - else => unreachable, - } -} - -pub const ValueList = struct { - count: usize, - values: []Value, - allocator: *Allocator, - - pub fn init(allocator: *Allocator) !ValueList { - return ValueList{ - .count = 0, - .allocator = allocator, - .values = try allocator.alloc(Value, 0), - }; - } - - pub fn write(self: *ValueList, value: Value) !void { - if (self.values.len < self.count + 1) { - self.values = try self.allocator.realloc( - self.values, - self.count + 1, - ); - } - - self.values[self.count] = value; - self.count += 1; - } -}; diff --git a/src/vm.zig b/src/vm.zig deleted file mode 100644 index 1a41288..0000000 --- a/src/vm.zig +++ /dev/null @@ -1,411 +0,0 @@ -const std = @import("std"); -const chunk = @import("chunk.zig"); -const value = @import("value.zig"); -const values = value; -const compiler = @import("compiler.zig"); -const objects = @import("object.zig"); - -const Chunk = chunk.Chunk; -const Value = value.Value; -const Compiler = compiler.Compiler; - -pub const StdOut = *std.io.OutStream(std.fs.File.WriteError); - -pub const InterpretResult = error{ - Ok, - CompileError, - RuntimeError, -}; - -fn isFalsey(val: value.Value) bool { - return val.vtype == .Nil or (val.vtype == .Bool and !val.as.Bool); -} - -fn valuesEqual(a: value.Value, b: value.Value) bool { - if (a.vtype != b.vtype) return false; - - switch (a.vtype) { - .Nil => return true, - .Bool => return a.as.Bool == b.as.Bool, - .Number => return a.as.Number == b.as.Number, - .Object => blk: { - var aStr = a.as.Object.value.String; - var bStr = b.as.Object.value.String; - return std.mem.compare(u8, aStr, bStr) == .Equal; - }, - } -} - -pub const ValueMap = std.StringHashMap(values.Value); - -pub const VM = struct { - chk: *Chunk = undefined, - ip: usize = 0, - - stack: []Value, - stackTop: usize = 0, - - stdout: StdOut, - debug_flag: bool, - allocator: *std.mem.Allocator, - - objs: ?*objects.Object = null, - globals: ValueMap, - - fn resetStack(self: *VM) void { - self.stackTop = 0; - } - - pub fn init( - allocator: *std.mem.Allocator, - stdout: StdOut, - debug_flag: bool, - ) !VM { - var self = VM{ - .stack = try allocator.alloc(Value, 256), - .stdout = stdout, - .debug_flag = debug_flag, - .allocator = allocator, - - .globals = ValueMap.init(allocator), - }; - - self.resetStack(); - - return self; - } - - fn deinitObject(self: *VM, obj: *objects.Object) void { - switch (obj.otype) { - .String => blk: { - self.allocator.free(obj.value.String); - self.allocator.destroy(obj); - break :blk; - }, - else => unreachable, - } - } - - fn deinitObjects(self: *VM) void { - var obj_opt: ?*objects.Object = self.objs; - - // doing a while(obj != null) but with optionals - while (true) { - if (obj_opt) |obj| { - var next = obj.next; - self.deinitObject(obj); - obj_opt = next; - } else { - break; - } - } - } - - pub fn deinit(self: *VM) void { - self.globals.deinit(); - self.deinitObjects(); - } - - pub fn debug(self: *VM, comptime fmt: []const u8, args: ...) void { - if (self.debug_flag) { - std.debug.warn(fmt, args); - } - } - - fn readByte(self: *VM) u8 { - var byte: u8 = self.chk.code[self.ip]; - self.ip += 1; - return byte; - } - - fn readConst(self: *VM) Value { - return self.chk.constants.values[self.readByte()]; - } - - fn readConstLong(self: *VM) Value { - const v3 = self.readByte(); - const v2 = self.readByte(); - const v1 = self.readByte(); - const const_idx = (@intCast(u24, v3) << 16) | - (@intCast(u24, v2) << 8) | - v1; - - return self.chk.constants.values[const_idx]; - } - - fn debugStack(self: *VM) !void { - try self.stdout.print(" "); - for (self.stack) |val, idx| { - if (idx >= self.stackTop) break; - - try self.stdout.print("[ "); - try value.printValue(self.stdout, val); - try self.stdout.print(" ]"); - } - try self.stdout.print("\n"); - } - - /// gets a f64 out of a value on the top of the stack. - fn popNum(self: *VM) !f64 { - var val: Value = self.pop(); - - switch (val.vtype) { - .Number => return val.as.Number, - - else => |vtype| blk: { - self.runtimeError("Expected number, got {x}", vtype); - return InterpretResult.RuntimeError; - }, - } - } - - fn concatenateStrings(self: *VM) !void { - var b = self.pop().as.Object.value.String; - var a = self.pop().as.Object.value.String; - - var res_str = try std.mem.join( - self.allocator, - "", - [_][]u8{ a, b }, - ); - - var val = values.ObjVal(try objects.takeString(self, res_str)); - try self.push(val); - } - - fn doAdd(self: *VM) !void { - if (values.isObjType(self.peek(0), .String) and - values.isObjType(self.peek(1), .String)) - { - return try self.concatenateStrings(); - } - - var b = try self.popNum(); - var a = try self.popNum(); - try self.push(values.NumberVal(a + b)); - } - - fn doSub(self: *VM) !void { - var b = try self.popNum(); - var a = try self.popNum(); - try self.push(values.NumberVal(a - b)); - } - - fn doMul(self: *VM) !void { - var b = try self.popNum(); - var a = try self.popNum(); - try self.push(values.NumberVal(a * b)); - } - - fn doDiv(self: *VM) !void { - var b = try self.popNum(); - var a = try self.popNum(); - try self.push(values.NumberVal(a / b)); - } - - fn doGreater(self: *VM) !void { - var b = try self.popNum(); - var a = try self.popNum(); - try self.push(values.BoolVal(a > b)); - } - - fn doLess(self: *VM) !void { - var b = try self.popNum(); - var a = try self.popNum(); - try self.push(values.BoolVal(a < b)); - } - - fn runtimeError(self: *VM, comptime fmt: []const u8, args: ...) void { - std.debug.warn(fmt, args); - std.debug.warn("\n[line {}] in script\n", self.chk.lines[self.ip]); - self.resetStack(); - } - - fn defGlobal(self: *VM, name: []const u8) !void { - _ = try self.globals.put(name, self.peek(0)); - _ = self.pop(); - } - - fn readString(self: *VM) []u8 { - return self.readConst().as.Object.value.String; - } - - fn readStringLong(self: *VM) []u8 { - return self.readConstLong().as.Object.value.String; - } - - fn doGetGlobal(self: *VM, name: []u8) !void { - var kv_opt = self.globals.get(name); - - if (kv_opt) |kv| { - try self.push(kv.value); - } else { - self.runtimeError("Undefined variable '{}'.", name); - return InterpretResult.RuntimeError; - } - } - - fn doSetGlobal(self: *VM, name: []u8) !void { - var res = try self.globals.getOrPut(name); - - if (res.found_existing) { - res.kv.value = self.peek(0); - } else { - self.runtimeError("Undefined variable '{}'.", name); - return InterpretResult.RuntimeError; - } - } - - fn run(self: *VM) !void { - while (true) { - if (self.debug_flag) { - try self.debugStack(); - _ = try self.chk.disassembleInstruction(self.stdout, self.ip); - } - - var instruction = self.readByte(); - - switch (instruction) { - chunk.OpCode.Constant => blk: { - var constant = self.readConst(); - try self.push(constant); - break :blk; - }, - chunk.OpCode.ConstantLong => blk: { - var constant = self.readConstLong(); - try self.push(constant); - break :blk; - }, - - chunk.OpCode.Print => blk: { - try value.printValue(self.stdout, self.pop()); - try self.stdout.print("\n"); - break :blk; - }, - - chunk.OpCode.Return => blk: { - // Exit VM - return InterpretResult.Ok; - }, - - chunk.OpCode.Nil => try self.push(values.NilVal()), - chunk.OpCode.True => try self.push(values.BoolVal(true)), - chunk.OpCode.False => try self.push(values.BoolVal(false)), - - chunk.OpCode.Pop => blk: { - _ = self.pop(); - }, - - chunk.OpCode.GetLocal => blk: { - var slot = self.readByte(); - try self.push(self.stack[slot]); - }, - chunk.OpCode.SetLocal => blk: { - var slot = self.readByte(); - self.stack[slot] = self.peek(0); - }, - - chunk.OpCode.GetGlobal => blk: { - try self.doGetGlobal(self.readString()); - }, - chunk.OpCode.SetGlobal => blk: { - try self.doSetGlobal(self.readString()); - break :blk; - }, - - // extracting the name is different depending of the - // op code since one just uses a single byte, the other - // uses three bytes since its a u24. - chunk.OpCode.DefineGlobal => blk: { - try self.defGlobal(self.readString()); - break :blk; - }, - chunk.OpCode.DefineGlobalLong => blk: { - try self.defGlobal(self.readStringLong()); - break :blk; - }, - - chunk.OpCode.Equal => blk: { - var a = self.pop(); - var b = self.pop(); - try self.push(values.BoolVal(valuesEqual(a, b))); - }, - - chunk.OpCode.Greater => try self.doGreater(), - chunk.OpCode.Less => try self.doLess(), - - chunk.OpCode.Add => try self.doAdd(), - chunk.OpCode.Subtract => try self.doSub(), - chunk.OpCode.Multiply => try self.doMul(), - chunk.OpCode.Divide => try self.doDiv(), - chunk.OpCode.Not => blk: { - try self.push(values.BoolVal(isFalsey(self.pop()))); - }, - - chunk.OpCode.Negate => blk: { - var val = self.peek(0); - if (val.vtype != .Bool) { - self.runtimeError("Operand must be a number."); - return InterpretResult.RuntimeError; - } - - val = self.pop(); - switch (val.as) { - .Number => |num| { - try self.push(values.NumberVal(-num)); - }, - else => unreachable, - } - }, - - else => blk: { - std.debug.warn("Unknown instruction: {x}\n", instruction); - return InterpretResult.RuntimeError; - }, - } - } - } - - pub fn interpret(self: *VM, src: []const u8) !void { - //self.ip = 0; - //self.debug("VM start\n"); - //var res = try self.run(); - //self.debug("VM end\n"); - //return res; - var chk = try Chunk.init(self.allocator); - - var cmpr = Compiler.init( - self.allocator, - &chk, - self.stdout, - src, - self.debug_flag, - self, - ); - if (!try cmpr.compile(&chk)) { - return InterpretResult.CompileError; - } - - self.chk = &chk; - self.ip = 0; - return try self.run(); - } - - pub fn push(self: *VM, val: Value) !void { - if (self.stackTop > 0 and self.stackTop - 1 > self.stack.len) { - self.stack = try self.allocator.realloc(self.stack, self.stack.len + 1); - } - - self.stack[self.stackTop] = val; - self.stackTop += 1; - } - - pub fn pop(self: *VM) Value { - self.stackTop -= 1; - return self.stack[self.stackTop]; - } - - pub fn peek(self: *VM, distance: usize) Value { - return self.stack[self.stackTop - 1 - distance]; - } -};