Compare commits

...

68 commits

Author SHA1 Message Date
d2e865adfc fix for latest zig 2019-11-02 10:22:23 -03:00
036e76d3a5 fix for latest zig 2019-06-30 00:01:03 -03:00
d7b78e09e3 finish impl for local vars 2019-06-03 16:24:54 -03:00
26d299cd23 add local/global "switch" 2019-06-03 16:10:49 -03:00
5138410be4 compiler: add scope support 2019-06-03 15:17:07 -03:00
25ee586acb compiler: add local scope basics 2019-06-03 15:10:12 -03:00
69dda36d16 add SetGlobal/SetGlobalLong opcodes
- main: split compile/runtime error messages
2019-06-03 01:41:22 -03:00
9f45dea2c0 vm: remove uneeded hack 2019-06-03 00:58:44 -03:00
8bc220d2f8 chunk: split writing side-effects into own function
split writeConstant() into that and writeConstantRaw() for the places
where we don't want OP_CONSTANT written as well.

this caused a bug where doing `"some const string" + x` would cause an
unecessary OP_CONSTANT to be added for the x variable and would cause
the wrong result to be given.

 - main: reset stack on repl tick (?)
2019-06-03 00:55:50 -03:00
922f3c530c chunk: fix disasm on the new const-load op codes 2019-06-03 00:07:11 -03:00
3936b4a426 main: keep a long-running vm instance for REPL
enables us to declare variables and keep them going for as long as the
REPL goes.

 - vm: remove source arg from init(), move to interpret()
2019-06-03 00:02:07 -03:00
005981fbbd vm: add getglobal support 2019-06-02 23:57:28 -03:00
887cb1adea add emitting of GetGlobal/GetGlobalLong 2019-06-02 23:43:12 -03:00
e3ac28d84e vm: deinit globals on vm deinit 2019-06-02 23:13:42 -03:00
3a6df2d9ea add basic global variable support
- chunk: make writeConstant return a ConstantIndex for better
    integration with the (optional) OP_CONST_LONG
 - compiler: quickfix emitConstant()
 - vm: add a global ValueMap
2019-06-02 22:52:19 -03:00
aa94396e51 compiler: add nicer error handling
- vm: fix pop opcode handler
2019-06-02 18:11:23 -03:00
06df2d37ee add expression statements 2019-06-02 18:04:36 -03:00
bea6e34365 add print statement
- remove opcode return's use as debug
2019-06-02 17:28:54 -03:00
3f2a8f3801 add object list cleaning on VM.deinit 2019-06-02 15:39:04 -03:00
cf53b6fc86 add basics of virtual machine object list
- object: move functions to accept VM pointer, not Allocator
2019-06-02 14:52:19 -03:00
38715af200 vm: add string concatenation
- compiler: fix string creation
2019-06-02 14:18:01 -03:00
9ac5fccc2b add objects and string comparison 2019-06-02 14:01:54 -03:00
44c27f43b7 move new_scanner.zig to scanner.zig 2019-06-02 13:17:32 -03:00
71dba5c77d vm: add greater and less 2019-06-02 00:23:50 -03:00
15c58a2216 comparison operators, part 1 2019-06-02 00:16:33 -03:00
39e28f01ac chunk: add printing of OP_NOT 2019-06-02 00:03:54 -03:00
c5d704a34f add not operator 2019-06-02 00:02:37 -03:00
1d774c6011 add support for nil, true and false literal values 2019-06-01 23:44:59 -03:00
ec652b29d9 remove print debug 2019-06-01 23:35:13 -03:00
589413488c move towards dynamically typed values
- remove InterpretResult as an enum, replace by error.
 - scanner: fix peekNext()
 - vm: add runtime errors, add VM.peek()
2019-06-01 23:33:53 -03:00
0f8e19adf1 compiler: finish parser (for math expressions) 2019-06-01 21:32:25 -03:00
230fef20b5 add other bytecode emitters 2019-06-01 20:48:26 -03:00
2736bee8d8 scanner: remove debug print 2019-06-01 20:34:09 -03:00
e1d0e3ec0b add basic compiler code 2019-06-01 20:33:43 -03:00
7d7aabbdd7 scanner: add keyword handling (copied off the old scanner)
as with most things, lol
2019-06-01 17:20:50 -03:00
d62c58a195 scanner: add identifiers 2019-06-01 17:15:35 -03:00
566d8313f3 add number tokens 2019-06-01 17:13:50 -03:00
9d1d253c94 scanner: fix peekNext 2019-06-01 17:12:00 -03:00
27b04e1612 scanner: add basic error handling, strings, comments 2019-06-01 17:07:22 -03:00
f4f1fe1fbc scanner: add whitespace handling 2019-06-01 16:54:15 -03:00
2c7cf356b3 scanner: add basic tokens and matched-tokens 2019-06-01 16:51:02 -03:00
6b9cc575d9 add draft scanner, remove messy Token union, replace by struct 2019-06-01 16:37:24 -03:00
63045e4df5 add src/new_scanner.zig 2019-06-01 16:21:36 -03:00
b80cd52c50 main: readd runPrompt and runFile
- main: make run() use the VM struct instance
2019-06-01 16:17:28 -03:00
a9dca436bd remove chunk running code, add draft compiler struct 2019-06-01 16:12:39 -03:00
088674bf0b vm: add dynamically-sized growing stack 2019-06-01 15:45:30 -03:00
3377d1675c vm, chunk: add binary operators 2019-06-01 15:40:18 -03:00
2822676707 vm: add negate opcode 2019-06-01 15:27:19 -03:00
61e463713c vm: add stack 2019-06-01 15:23:23 -03:00
456bc95138 vm: add debug flags 2019-06-01 15:01:39 -03:00
dae3c259fd add basic virtual machine code 2019-06-01 14:55:11 -03:00
14fa63e1f6 finish impl for ConstantLong 2019-06-01 14:18:44 -03:00
2d33e03efb add incomplete ConstantLong instruction
- move ValueList's count to usize for ConstantLong
2019-06-01 02:06:23 -03:00
ba78b39300 add constant values to the virtual machine 2019-06-01 01:46:01 -03:00
c4401dc8cf moving to the virtual machine part of the book, pt 1
the java part would still fuck me up since it lies on OOP stuff
that zig doesnt provide, so i'm skipping towards the C part of the book
which will hopefully be more understandable from a zig perspective.
2019-06-01 01:20:06 -03:00
727a259638 rm make_exprs.py, moving to the c part 2019-06-01 00:04:04 -03:00
6ff75a0926 finish make_exprs.py script
- add src/expr.zig
2019-05-31 23:43:46 -03:00
8007df6853 add mypy_cache to gitignore 2019-05-31 23:38:21 -03:00
3b73978f40 add make_exprs.py script 2019-05-31 23:37:45 -03:00
756f85d77d add multiline block comments 2019-05-31 22:45:23 -03:00
bba969922f add reading of keywords on doIdentifier 2019-05-31 22:39:53 -03:00
9d4c1249b4 add keyword map when scanner is initialized
- add basic identifier reading
2019-05-31 22:34:10 -03:00
69aa7b493d add number tokens 2019-05-31 22:08:41 -03:00
d1db7a0bd9 add consumption of comments and strings 2019-05-31 21:46:18 -03:00
9a2c50a53e add basic tokens and a basic lexer 2019-05-31 21:23:51 -03:00
31b0fa783c add file reads and main prompt 2019-05-31 17:07:08 -03:00
b3ea9637bd move to zig 2019-05-31 16:15:27 -03:00
3d26da0144 remove examples and main parser grammar
moving to lox described in https://craftinginterpreters.com
to go with. better learn something first, then walk towards things like
a static typed lang lol

anyways if you were here for jorts as my own language thing
do leave
2019-05-31 16:04:09 -03:00
30 changed files with 1904 additions and 865 deletions

117
.gitignore vendored
View file

@ -1,116 +1 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
zig-cache/

View file

@ -1,19 +1,18 @@
# jorts
jorts programming language
a compiler for the lox language from https://craftinginterpreters.com
## installing
this is a learning project. the implemtation is based heavily off the C part
of the book, but also the Java part for the scanner.
```sh
git clone https://gitdab.com/luna/jorts
cd jorts
pip install --user --editable .
```
## notes
## using
- jorts' lox bytecode is not compatible with any implementation.
right now, its not pretty, nor finished
## how do?
```
cat examples/hello.jt | jortsc
zig build run
```
and play around with it

15
build.zig Normal file
View file

@ -0,0 +1,15 @@
const Builder = @import("std").build.Builder;
pub fn build(b: *Builder) void {
const mode = b.standardReleaseOptions();
const exe = b.addExecutable("jorts", "src/main.zig");
exe.setBuildMode(mode);
const run_cmd = exe.run();
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
b.default_step.dependOn(&exe.step);
b.installArtifact(exe);
}

View file

@ -1,23 +0,0 @@
import io
fn add (int a, int b) -> int {
a + b
}
// return type is void by default
fn main () {
// explicit types, or
int val = add(2, 2)
// type inferred from the functions' return value
val := add(2, 2)
// variables are immutable, however, you can update them with
// the value of the old one.
val = val + 1
// a shorthand is val++, same for val--.
// string interpolation is implicit
io.puts("2 plus 2 = {val}")
}

View file

@ -1,22 +0,0 @@
import io
fn main () {
x := 0
// since variable are immutable but updatable, x is 1 inside clojure, but
// 0 inside main()
fn inner() {
x++
}
inner()
// shows 0
io.puts("x is {x}")
// however, if you explicitly update x:
x = inner()
// shows 1
io.puts("x is {x}")
}

View file

@ -1,11 +0,0 @@
import io
// you can create your own types with 'type'
type T = int
fn main () {
T a = 2
// since T is int, io.puts with an int works
io.puts(a)
}

View file

@ -1,15 +0,0 @@
struct A {
int a,
int b
}
struct B <- A {
int c
}
fn main () {
a := A{1, 2}
b := B{1, 2, 3}
}

View file

@ -1,15 +0,0 @@
import io
import integer
fn my_puts(string str) {
io.puts(str)
}
fn my_puts(int my_int) {
io.puts(integer.to_str(my_int))
}
fn main () {
my_puts(2)
my_puts("aaa")
}

View file

@ -1,11 +0,0 @@
import io
// if a return type is not defined, it is implicitly void and so the function
// returns nil (the only instance of void)
// main can return int or void, void mains are handled by jortsc
fn main () -> int {
// todo: put it back to io.puts
ioputs("pants")
0
}

View file

@ -1,26 +0,0 @@
import io
// takes a function that receives two ints, returns an int
// Func is the function type keyword, to not switch it with fn (which declares
// a function)
fn function_tester (Func func ([int, int] -> int)) -> int {
func(2, 2)
}
fn add(int a, int b) -> int {
a + b
}
fn main () {
// passes the function add to function_tester
res := function_tester(add)
// you can also create functions and put them in variables. not putting a
// function name on the fn block makes it return a Func instance to be put
// in a variable
anonymous := (fn () {})
// anonymous has type Func ([] -> void)
io.puts("res = {res}")
}

View file

@ -1,10 +0,0 @@
import socket
import io
fn main () {
sock := socket.tcp_connect("example.com", 80)
sock.send("HTTP/1.1\n")
frame := sock.recv(1024)
sock.close()
io.puts(frame)
}

View file

@ -1,15 +0,0 @@
import io
fn main () {
s := "this is a string"
io.puts(s)
s := "this is {s}"
io.puts(s)
s := s + 2 // invalid
// this however, is valid, there is an io.puts that handles int,
// more on function overload in a bit
io.puts(2)
}

View file

@ -1,60 +0,0 @@
import io
struct A {
int val1,
int val2
}
// self is injected and represents the struct A
// from the functions' definition
fn A:sum_fields() -> int {
self.val1 + self.val2
}
// type of sum_fields is:
// Func ([A] -> int)
// the mut keyword signals that self is a "reference"
// to self, instead of a copy
// however, what actually happens is that an instance of
// A is returned from the function implicitly
fn mut A:incr_both_fields() {
self.val1++
self.val2++
}
// and so, the type becomes:
// Func ([A] -> A)
fn mut A:incr_and_sum () {
self.val1++
self.val2++
self.val1 + self.val2
}
// type is:
// Func ([A] -> (A, int))
fn main () {
a := A{0, 0}
a.incr_both_fields()
/*
translates to:
a := incr_both_fields(a)
*/
sum := a.sum_fields()
io.puts(sum)
val = a.incr_and_sum()
/*
translates to:
a, val := incr_and_sum(a)
*/
}

View file

@ -1,14 +0,0 @@
import io
struct MyStruct {
int var1,
int var2,
int var3
}
fn main () {
st = MyStruct{1, 2, 3}
// TODO: define a way for printable things
io.puts(st)
}

View file

@ -1,3 +0,0 @@
from .main import main
__all__ = ['main']

View file

@ -1,27 +0,0 @@
#!/usr/bin/python3
import sys
import pprint
import logging
from jortsc.parser.lexer import lex_jorts
from jortsc.parser.syntatic import syntatic
logging.basicConfig(level=logging.DEBUG)
def main():
"""main entry point"""
try:
in_data = sys.stdin.read()
except EOFError:
pass
tokens = lex_jorts(in_data)
pprint.pprint(tokens)
tree = syntatic(tokens)
print(tree)
if __name__ == '__main__':
main()

View file

@ -1,45 +0,0 @@
from dataclasses import dataclass
@dataclass
class TypedVar:
type_: str
name: str
@dataclass
class ReturnType:
type_: str
@dataclass
class Function:
name: str
arguments: str
ret_type: ReturnType
block: list
@dataclass
class Identifier:
name: str
@dataclass
class Import:
module: str
@dataclass
class String:
value: str
@dataclass
class Number:
value: str
@dataclass
class FunctionCall:
function: str
args: list

View file

@ -1,112 +0,0 @@
import re
from dataclasses import dataclass
from enum import Enum, auto
class TokenType(Enum):
"""Defines the type of a token"""
reserved = auto()
identifier = auto()
comment = auto()
comment_start = auto()
comment_end = auto()
whitespace = auto()
number = auto()
string = auto()
@dataclass
class Token:
value: str
type_: TokenType
class LexerError(Exception):
"""Lexer error."""
pass
TOKENS = [
(r'[ \n\t]+', TokenType.whitespace),
# single line comments and multiline comments
(r'//[^\n]*', TokenType.comment),
# TODO: shouldnt this be /* <anything> */ instead of
# only tokenizing on the start and end?
(r'/\*', TokenType.comment_start),
(r'\*/', TokenType.comment_end),
(r'fn', TokenType.reserved),
(r'if', TokenType.reserved),
(r'import', TokenType.reserved),
(r'\(', TokenType.reserved),
(r'\)', TokenType.reserved),
(r'\{', TokenType.reserved),
(r'\}', TokenType.reserved),
(r'\-\>', TokenType.reserved),
(r'\.', TokenType.reserved),
(r'\"[^\n]*\"', TokenType.string),
# basic math ops
(r'[\+\-\/\*]', TokenType.reserved),
(r'[0-9]+', TokenType.number),
(r'[A-Za-z][A-Za-z0-9_]*', TokenType.identifier)
]
def lex(string: str, token_defs: list) -> list:
"""Generate tokens out of the given string."""
pos = 0
strlen = len(string)
tokens = []
# generate a dict for compiled regexes out of the token defs
# instead of compiling on each token definition per token.
compiled = {pattern: re.compile(pattern)
for pattern, _ in token_defs}
# we use this instead of for pos in range(len(string)) because we
# need to increment pos to a whole token length's, and that wouldn't
# be easy on a for .. in range(..)
while pos < strlen:
valid = False
for definition in token_defs:
pattern, tok_type = definition
regex = compiled[pattern]
match = regex.match(string, pos)
if not match:
continue
text = match.group(0)
# update pos to the end of the token
pos = match.end(0)
valid = True
tokens.append(Token(text, tok_type))
# go to next token instead of checking other
# definitions for tokens, e.g if its a reserved token
# we shouldn't go down the path of an identifier.
break
if not valid:
print(f'context: {pos} {len(string)} {string[pos-1:pos+20]!r}')
raise LexerError(f'Invalid character: {string[pos]!r}')
return tokens
def lex_jorts(string: str) -> list:
"""Lex with the jorts token definitions"""
return lex(string, TOKENS)

View file

@ -1,44 +0,0 @@
from lark import Lark
GRAMMAR = """
FN: "fn"
IMPORT: "import"
COMMA: ","
DOT: "."
SINGLE_COMMENT: "//"
NEWLINE: /(\\r?\\n)+\\s*/
ANY: /.+/
WHITESPACE: " "
INTEGER: /[0-9]+/
ARROW: "->"
COM_START: "/*"
COM_END: "*/"
QUOTE: "\\""
identifier: WHITESPACE* ANY WHITESPACE*
single_comment: SINGLE_COMMENT ANY* NEWLINE
multi_comment: COM_START ANY* COM_END
import_stmt: IMPORT identifier NEWLINE
fn_arg: identifier identifier
parameters: fn_arg (COMMA fn_arg)
fn_stmt: FN identifier? "(" parameters? ")" [ARROW identifier] "{" NEWLINE? [stmt NEWLINE]* "}"
sign_int: "+" | "-"
string: QUOTE ANY* QUOTE
value: (sign_int* INTEGER) | string
call_stmt: [identifier DOT] identifier "(" [value COMMA]* ")"
stmt: value | import_stmt | fn_stmt | call_stmt
start: (NEWLINE | stmt)*
"""
def parse(string: str):
"""Parse using Lark"""
parser = Lark(GRAMMAR, parser='lalr', debug=True)
return parser.parse(string)

View file

@ -1,272 +0,0 @@
from typing import Optional, Any, List
from jortsc.parser.lexer import Token, TokenType
from jortsc.parser.ast_nodes import (
Function, TypedVar, Identifier, Import, ReturnType, String, Number,
FunctionCall
)
class ParseError(Exception):
"""Represents a parse error."""
pass
class Reader:
"""Main reader class"""
def __init__(self, tokens: List[Token]):
self.tokens = tokens
self.cur = 0
def __repr__(self):
return (f'<Reader cur={self.cur} tot={len(self.tokens)} '
f'cur_tok={self.peek()}>')
def peek(self) -> Optional[Token]:
"""Peek at the current token."""
try:
token = self.tokens[self.cur]
return token
except IndexError:
return None
def next(self) -> Optional[Token]:
"""Fetch the current token then skip to the next one."""
token = self.peek()
self.cur += 1
return token
def expect(self, token_type: TokenType) -> Token:
"""Check for a specific token type and error if it fails"""
token = self.next()
if token.type_ != token_type:
raise ParseError(f'Expected {token_type}, got '
f'{token.type_} {token.value!r}')
return token
def expect_val(self, value: str) -> Token:
"""Check the next token to see if it matches against a given value,
instead of a type."""
token = self.next()
if token.value != value:
raise ParseError(f'Expected {value!r}, got '
f'{token.type_} {token.value!r}')
return token
def next_safe(self) -> Token:
"""'Safe' version of next().
Raises an 'Unexpected EOF' error if next() returns None.
"""
token = self.next()
if token is None:
raise ParseError('Unexpected EOF')
return token
def ignore(self, token_type: TokenType):
"""Only increase self.cur if token_type is the upcoming token."""
try:
assert self.tokens[self.cur].type_ == token_type
self.cur += 1
except AssertionError:
pass
def _fn_read_args(reader: Reader, cur: List = None) -> List:
"""Recursively read the arguments of the function."""
if cur is None:
cur = []
# it can be an identifier for the arguments' type, OR a RPAREN
# if it is rparen, we stop
# if it isnt, we keep going until that happens
token = reader.next_safe()
if token.value == ')':
return cur
argtype = token
reader.expect(TokenType.whitespace)
argname = reader.next_safe()
cur.append(TypedVar(argtype.value, argname.value))
return _fn_read_args(reader, cur)
def _fn_ret_type(reader: Reader) -> ReturnType:
"""Fetch the return type of a function. Defaults to void."""
try:
reader.expect_val('->')
except ParseError:
return ReturnType('void')
reader.ignore(TokenType.whitespace)
token = reader.expect(TokenType.identifier)
return ReturnType(token.value)
def read_function(reader: Reader):
"""Read a function block."""
reader.expect(TokenType.whitespace)
token = reader.next()
fn_name = '_anonymous'
fn_args = []
print('function token', token)
if token.type_ == TokenType.identifier:
fn_name = token.value
reader.expect(TokenType.whitespace)
reader.expect_val('(')
fn_args = _fn_read_args(reader)
reader.expect(TokenType.whitespace)
fn_ret_type = _fn_ret_type(reader)
# only skip whitespace if we see it
reader.ignore(TokenType.whitespace)
block = read_start(reader)
elif token.value == '(':
fn_args = _fn_read_args(reader)
fn_ret_type = _fn_ret_type(reader)
block = read_start(reader)
print('final function', fn_name, fn_args, fn_ret_type, block)
return Function(fn_name, fn_args, fn_ret_type, block)
def read_import(reader):
"""Read an import"""
reader.expect(TokenType.whitespace)
module = reader.next_safe()
return Import(module.value)
HANDLERS = {
'fn': read_function,
'import': read_import,
}
def read_reserved(token: Token, reader: Reader):
"""Read reserved statements."""
try:
handler = HANDLERS[token.value]
except KeyError:
raise ParseError(f'Unexpected reserved word {token.value!r}')
return handler(reader)
def read_value(token: Token, _reader: Reader):
"""Read a given value"""
if token.type_ == TokenType.string:
return String(token.value)
elif token.type_ == TokenType.number:
return Number(token.value)
def read_statement(token: Token, reader: Reader):
"""Read a statement"""
# token is an identifier, so first check for a function call
# TODO: handle more things than a function call
call_fn_name = token.value
token = reader.expect_val('(')
res = []
while True:
token = reader.next_safe()
if token.value == ')':
break
res.append(read_value(token, reader))
return FunctionCall(call_fn_name, res)
def read_start(reader: Reader):
"""Read the start of a program."""
print('reader', reader)
token = reader.next()
if token is None:
print('eof!')
return None
ast = []
res = []
# handle blocks
if token.value == '{':
# next can be a whitespace, or a }
token = reader.next()
print('block start!, next:', token)
if token.type_ == TokenType.whitespace:
# keep going on reading
while True:
token = reader.peek()
print('block append', token)
if token.value == '}':
print('block end')
reader.next()
break
res.extend(read_start(reader))
elif token.value == '}':
res = []
# import, fn, etc
elif token.type_ == TokenType.reserved:
res = read_reserved(token, reader)
elif token.type_ == TokenType.comment:
return []
elif token.type_ == TokenType.identifier:
res = read_statement(token, reader)
else:
res = read_value(token, reader)
ast.append(res)
return ast
def read_loop(reader: Reader):
"""Read the AST."""
final_ast = []
while True:
ast = read_start(reader)
# break when eof
if ast is None:
break
# TODO: better ast cleanup
final_ast.append(ast)
return final_ast
def syntatic(tokens: List[Token]):
"""Create an AST out of the tokens."""
return read_loop(Reader(tokens))

View file

@ -1,14 +0,0 @@
from setuptools import setup
setup(
name='jortsc',
version='0.1',
py_modules=['jortsc'],
install_requires=[
'lark-parser==0.6.7'
],
entry_points='''
[console_scripts]
jortsc=jortsc:main
'''
)

286
src/chunk.zig Normal file
View file

@ -0,0 +1,286 @@
const std = @import("std");
const value = @import("value.zig");
const Allocator = std.mem.Allocator;
// hack. ugly hack. zig has compiler crash.
const AllOpcodes = struct {
Return: u8 = 0,
Constant: u8 = 1,
ConstantLong: u8 = 2,
Add: u8 = 3,
Subtract: u8 = 4,
Multiply: u8 = 5,
Divide: u8 = 6,
Negate: u8 = 7,
// basic type op codes
Nil: u8 = 8,
True: u8 = 9,
False: u8 = 10,
Not: u8 = 11,
// comparison op codes!
Equal: u8 = 12,
Greater: u8 = 13,
Less: u8 = 14,
Print: u8 = 15,
Pop: u8 = 16,
DefineGlobal: u8 = 17,
DefineGlobalLong: u8 = 18,
GetGlobal: u8 = 19,
GetGlobalLong: u8 = 20,
SetGlobal: u8 = 21,
SetGlobalLong: u8 = 22,
GetLocal: u8 = 23,
SetLocal: u8 = 24,
};
pub const OpCode = AllOpcodes{};
fn simpleInstruction(
stdout: var,
comptime name: []const u8,
index: usize,
) !usize {
try stdout.print("{}\n", name);
return index + 1;
}
fn constantInstruction(
stdout: var,
comptime name: []const u8,
chunk: *Chunk,
index: usize,
) !usize {
// get the constant's index in constants slice
var idx = chunk.code[index + 1];
try stdout.print("\t{}\t{} '", name, idx);
try value.printValue(stdout, chunk.constants.values[idx]);
try stdout.print("'\n");
return index + 2;
}
fn constantLongInstruction(
stdout: var,
comptime name: []const u8,
chunk: *Chunk,
offset: usize,
) !usize {
// constantLong uses three u8's that encode a u24 as the
// contants' index.
var v3: u8 = chunk.code[offset + 1];
var v2: u8 = chunk.code[offset + 2];
var v1: u8 = chunk.code[offset + 3];
var idx: u24 = (@intCast(u24, v3) << 16) | (@intCast(u24, v2) << 8) | v1;
try stdout.print("\t{}\t{} '", name, idx);
try value.printValue(stdout, chunk.constants.values[idx]);
try stdout.print("'\n");
return offset + 4;
}
fn byteInstruction(
stdout: var,
name: []const u8,
chunk: *Chunk,
index: usize,
) !usize {
var slot: u8 = chunk.code[index + 1];
try stdout.print("{} {}", name, slot);
return index + 2;
}
pub const ConstantIndexTag = enum {
Small,
Long,
};
pub const ConstantIndex = union(ConstantIndexTag) {
Small: u8,
Long: [3]u8,
};
pub const Chunk = struct {
count: usize,
lines: []usize,
code: []u8,
allocator: *Allocator,
constants: value.ValueList,
pub fn init(allocator: *Allocator) !Chunk {
return Chunk{
.count = 0,
.allocator = allocator,
.code = try allocator.alloc(u8, 0),
.lines = try allocator.alloc(usize, 0),
.constants = try value.ValueList.init(allocator),
};
}
pub fn write(self: *Chunk, byte: u8, line: usize) !void {
if (self.code.len < self.count + 1) {
self.code = try self.allocator.realloc(
self.code,
self.count + 1,
);
self.lines = try self.allocator.realloc(
self.lines,
self.count + 1,
);
}
self.code[self.count] = byte;
self.lines[self.count] = line;
self.count += 1;
}
pub fn addConstant(self: *Chunk, val: value.Value) !u8 {
try self.constants.write(val);
return self.constants.count - 1;
}
pub fn writeConstantRaw(
self: *Chunk,
val: value.Value,
line: usize,
) !ConstantIndex {
try self.constants.write(val);
var constant_idx = self.constants.count - 1;
if (constant_idx < 256) {
var idx_small = @intCast(u8, constant_idx);
return ConstantIndex{ .Small = idx_small };
} else {
var idx_u24: u24 = @intCast(u24, constant_idx);
const mask = @intCast(u24, 0xff);
const v1: u8 = @intCast(u8, idx_u24 & mask);
const v2: u8 = @intCast(u8, (idx_u24 >> 8) & mask);
const v3: u8 = @intCast(u8, (idx_u24 >> 16) & mask);
return ConstantIndex{ .Long = [_]u8{ v3, v2, v1 } };
}
}
pub fn writeConstant(
self: *Chunk,
val: value.Value,
line: usize,
) !ConstantIndex {
var idx = try self.writeConstantRaw(val, line);
switch (idx) {
.Small => |idx_small| blk: {
try self.write(OpCode.Constant, line);
try self.write(idx_small, line);
break :blk;
},
.Long => |long_u8| blk: {
try self.write(OpCode.ConstantLong, line);
try self.write(long_u8[0], line);
try self.write(long_u8[1], line);
try self.write(long_u8[2], line);
},
else => unreachable,
}
return idx;
}
pub fn disassembleInstruction(
self: *Chunk,
stdout: var,
index: usize,
) !usize {
try stdout.print("{} ", index);
if (index > 0 and self.lines[index] == self.lines[index - 1]) {
try stdout.print(" | ");
} else {
try stdout.print("{} ", self.lines[index]);
}
var instruction = self.code[index];
if (instruction == OpCode.Return) {
return try simpleInstruction(stdout, "OP_RETURN", index);
} else if (instruction == OpCode.Constant) {
return try constantInstruction(stdout, "OP_CONSTANT", self, index);
} else if (instruction == OpCode.ConstantLong) {
return try constantLongInstruction(
stdout,
"OP_CONSTANT_LONG",
self,
index,
);
} else if (instruction == OpCode.Negate) {
return try simpleInstruction(stdout, "OP_NEGATE", index);
} else if (instruction == OpCode.Add) {
return try simpleInstruction(stdout, "OP_ADD", index);
} else if (instruction == OpCode.Subtract) {
return try simpleInstruction(stdout, "OP_SUBTRACT", index);
} else if (instruction == OpCode.Multiply) {
return try simpleInstruction(stdout, "OP_MULTIPLY", index);
} else if (instruction == OpCode.Divide) {
return try simpleInstruction(stdout, "OP_DIVIDE", index);
} else if (instruction == OpCode.Nil) {
return try simpleInstruction(stdout, "OP_NIL", index);
} else if (instruction == OpCode.True) {
return try simpleInstruction(stdout, "OP_TRUE", index);
} else if (instruction == OpCode.False) {
return try simpleInstruction(stdout, "OP_FALSE", index);
} else if (instruction == OpCode.Not) {
return try simpleInstruction(stdout, "OP_NOT", index);
} else if (instruction == OpCode.Equal) {
return try simpleInstruction(stdout, "OP_EQUAL", index);
} else if (instruction == OpCode.Greater) {
return try simpleInstruction(stdout, "OP_GREATER", index);
} else if (instruction == OpCode.Less) {
return try simpleInstruction(stdout, "OP_LESS", index);
} else if (instruction == OpCode.Print) {
return try simpleInstruction(stdout, "OP_PRINT", index);
} else if (instruction == OpCode.Pop) {
return try simpleInstruction(stdout, "OP_POP", index);
} else if (instruction == OpCode.DefineGlobal) {
return try constantInstruction(stdout, "OP_DEFGLOBAL", self, index);
} else if (instruction == OpCode.DefineGlobalLong) {
return try constantLongInstruction(stdout, "OP_DEFGLOBAL_LONG", self, index);
} else if (instruction == OpCode.GetGlobal) {
return try constantInstruction(stdout, "OP_GETGLOBAL", self, index);
} else if (instruction == OpCode.GetGlobalLong) {
return try constantLongInstruction(stdout, "OP_GETGLOBAL_LONG", self, index);
} else if (instruction == OpCode.SetGlobal) {
return try constantInstruction(stdout, "OP_SETGLOBAL", self, index);
} else if (instruction == OpCode.SetGlobalLong) {
return try constantLongInstruction(stdout, "OP_SETGLOBAL_LONG", self, index);
} else if (instruction == OpCode.GetLocal) {
return try byteInstruction(stdout, "OP_GETLOCAL", self, index);
} else if (instruction == OpCode.SetLocal) {
return try byteInstruction(stdout, "OP_GETLOCAL", self, index);
} else {
try stdout.print("Unknown opcode: {}\n", instruction);
return index + 1;
}
}
pub fn disassemble(self: *Chunk, stdout: var, name: []const u8) !void {
try stdout.print("== {} ==\n", name);
var i: usize = 0;
while (i < self.count) {
i = try self.disassembleInstruction(stdout, i);
}
}
};

587
src/compiler.zig Normal file
View file

@ -0,0 +1,587 @@
const std = @import("std");
const scanner = @import("scanner.zig");
const vm = @import("vm.zig");
const chunks = @import("chunk.zig");
const tokens = @import("token.zig");
const values = @import("value.zig");
const objects = @import("object.zig");
const Allocator = std.mem.Allocator;
const Scanner = scanner.Scanner;
const Chunk = chunks.Chunk;
const Token = tokens.Token;
const TokenType = tokens.TokenType;
const Value = values.Value;
const OpCode = chunks.OpCode;
/// Holds parser state for the compiler.
const Parser = struct {
previous: Token = undefined,
current: Token = undefined,
// TODO are those needed
hadError: bool = false,
panicMode: bool = false,
};
/// Represents the order of operations in the parser.
const Precedence = enum(u5) {
None,
Assignment, // =
Or, // or
And, // and
Equality, // == !=
Comparison, // < > <= >=
Term, // + -
Factor, // * /
Unary, // ! -
Call, // . () []
Primary,
};
const ParseFn = fn (*Compiler, bool) anyerror!void;
const ParseRule = struct {
prefix: ?ParseFn = null,
infix: ?ParseFn = null,
precedence: Precedence = Precedence.None,
};
/// For each token, this defines a parse rule for it.
var rules = [_]ParseRule{
// for LEFT_PAREN, we determine it as a call precedence
// plus a prefix parse function of grouping
ParseRule{ .prefix = Compiler.grouping, .precedence = .Call },
ParseRule{},
ParseRule{},
ParseRule{},
ParseRule{},
// dot token, means a call too, for things like a.b
ParseRule{ .precedence = .Call },
// specific to -, as it can be an unary operator when its a prefix
// of something, or a binary one, when its a infix or another thing.
ParseRule{
.prefix = Compiler.unary,
.infix = Compiler.binary,
.precedence = .Term,
},
ParseRule{ .infix = Compiler.binary, .precedence = .Term },
ParseRule{},
// slash is a binary operator, as well as star.
ParseRule{ .infix = Compiler.binary, .precedence = .Factor },
ParseRule{ .infix = Compiler.binary, .precedence = .Factor },
// as the token enum says, those are 1/2 char tokens.
ParseRule{ .prefix = Compiler.unary },
// this is specifically for the != operator
ParseRule{ .infix = Compiler.binary, .precedence = .Equality },
ParseRule{},
// this is specifically for the == operator
ParseRule{ .infix = Compiler.binary, .precedence = .Equality },
// all the comparison ones
ParseRule{ .infix = Compiler.binary, .precedence = .Comparison },
ParseRule{ .infix = Compiler.binary, .precedence = .Comparison },
ParseRule{ .infix = Compiler.binary, .precedence = .Comparison },
ParseRule{ .infix = Compiler.binary, .precedence = .Comparison },
ParseRule{ .prefix = Compiler.variable },
ParseRule{ .prefix = Compiler.string },
ParseRule{ .prefix = Compiler.number },
ParseRule{ .precedence = .And },
ParseRule{},
ParseRule{},
// false
ParseRule{ .prefix = Compiler.literal },
ParseRule{},
ParseRule{},
ParseRule{},
ParseRule{ .prefix = Compiler.literal },
ParseRule{ .precedence = .Or },
ParseRule{},
ParseRule{},
ParseRule{},
ParseRule{},
ParseRule{ .prefix = Compiler.literal },
ParseRule{},
ParseRule{},
ParseRule{},
};
pub const Local = struct {
name: tokens.Token,
depth: i32,
};
pub const Compiler = struct {
src: []const u8,
stdout: vm.StdOut,
allocator: *Allocator,
parser: Parser,
scanr: Scanner = undefined,
chunk: *chunks.Chunk,
debug_flag: bool = false,
vmach: *vm.VM,
locals: [256]Local,
localCount: i32 = 0,
scopeDepth: i32 = 0,
pub fn init(
allocator: *Allocator,
chunk: *chunks.Chunk,
stdout: vm.StdOut,
source: []const u8,
debug_flag: bool,
vmach: *vm.VM,
) Compiler {
return Compiler{
.src = source,
.chunk = chunk,
.allocator = allocator,
.stdout = stdout,
.parser = Parser{},
.debug_flag = debug_flag,
.vmach = vmach,
// local variable resolution
.locals = [_]Local{Local{
.name = Token{},
.depth = -1,
}} ** 256,
};
}
fn errorAt(self: *Compiler, token: Token, msg: []const u8) void {
if (self.parser.panicMode) return;
self.parser.panicMode = true;
std.debug.warn("[line {}] Error", token.line);
if (token.ttype == TokenType.EOF) {
std.debug.warn(" at end");
} else {
std.debug.warn(" at '{}'", token.lexeme);
}
std.debug.warn(": {}\n", msg);
self.parser.hadError = true;
}
fn errorCurrent(self: *Compiler, msg: []const u8) void {
self.errorAt(self.parser.current, msg);
}
fn errorPrevious(self: *Compiler, msg: []const u8) void {
self.errorAt(self.parser.previous, msg);
}
fn advance(self: *Compiler) !void {
self.parser.previous = self.parser.current;
while (true) {
var token_opt = try self.scanr.scanToken();
if (token_opt) |token| {
self.parser.current = token;
break;
}
}
}
fn consume(self: *Compiler, ttype: TokenType, msg: []const u8) !void {
if (self.parser.current.ttype == ttype) {
try self.advance();
return;
}
self.errorCurrent(msg);
}
fn check(self: *Compiler, ttype: TokenType) bool {
return self.parser.current.ttype == ttype;
}
fn match(self: *Compiler, ttype: TokenType) !bool {
if (!(self.check(ttype))) return false;
try self.advance();
return true;
}
fn currentChunk(self: *Compiler) *chunks.Chunk {
return self.chunk;
}
fn emitByte(self: *Compiler, byte: u8) !void {
try self.currentChunk().write(byte, self.parser.previous.line);
}
fn emitBytes(self: *Compiler, byte1: u8, byte2: u8) !void {
try self.emitByte(byte1);
try self.emitByte(byte2);
}
fn emitReturn(self: *Compiler) !void {
try self.emitByte(OpCode.Return);
}
fn emitConstant(self: *Compiler, value: Value) !void {
_ = try self.currentChunk().writeConstant(
value,
self.parser.previous.line,
);
}
fn end(self: *Compiler) !void {
try self.emitReturn();
if (self.debug_flag and !self.parser.hadError) {
try self.currentChunk().disassemble(self.stdout, "code");
}
}
fn beginScope(self: *Compiler) void {
self.scopeDepth += 1;
}
fn endScope(self: *Compiler) !void {
self.scopeDepth -= 1;
// clear the current scope in the stack
while (self.localCount > 0 and self.locals[@intCast(usize, self.localCount - 1)].depth > self.scopeDepth) {
try self.emitByte(chunks.OpCode.Pop);
self.localCount -= 1;
}
}
fn grouping(self: *Compiler, canAssign: bool) !void {
try self.expression();
try self.consume(.RIGHT_PAREN, "Expect ')' after expression.");
}
/// Emits bytecode for a number being loaded into the code.
fn number(self: *Compiler, canAssign: bool) !void {
var value: f64 = try std.fmt.parseFloat(
f64,
self.parser.previous.lexeme,
);
try self.emitConstant(values.NumberVal(value));
}
fn string(self: *Compiler, canAssign: bool) !void {
const lexeme_len = self.parser.previous.lexeme.len;
try self.emitConstant(values.ObjVal(try objects.copyString(
self.vmach,
self.parser.previous.lexeme[1 .. lexeme_len - 1],
)));
}
fn resolveLocal(self: *Compiler, name: *Token) i32 {
var i = self.localCount - 1;
while (i >= 0) : (i -= 1) {
var idx = @intCast(usize, i);
var local = &self.locals[idx];
if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) {
if (local.depth == -1) {
self.errorCurrent("Cannot read local variable in its own initializer.");
}
return i;
}
}
return -1;
}
fn namedVariable(self: *Compiler, tok: *Token, canAssign: bool) !void {
// writeConstant always writes OP_CODE which may be not
// what we want, so.
var getOp: u8 = undefined;
var setOp: u8 = undefined;
// we try to resolve the local. depending if it gets resolved
// or not, we select the necessary get/set op codes.
var arg: i32 = self.resolveLocal(tok);
if (arg != -1) {
getOp = chunks.OpCode.GetLocal;
setOp = chunks.OpCode.SetLocal;
} else {
arg = (try self.identifierConstant(tok)).Small;
getOp = chunks.OpCode.GetGlobal;
setOp = chunks.OpCode.SetGlobal;
}
var idx: u8 = @intCast(u8, arg);
if (canAssign and try self.match(.EQUAL)) {
try self.expression();
try self.emitBytes(setOp, idx);
} else {
try self.emitBytes(getOp, idx);
}
}
fn variable(self: *Compiler, canAssign: bool) !void {
try self.namedVariable(&self.parser.previous, canAssign);
}
/// Emits bytecode for a given unary.
fn unary(self: *Compiler, canAssign: bool) !void {
var ttype = self.parser.previous.ttype;
try self.parsePrecedence(.Unary);
switch (ttype) {
.MINUS => try self.emitByte(OpCode.Negate),
.BANG => try self.emitByte(OpCode.Not),
else => unreachable,
}
}
fn binary(self: *Compiler, canAssign: bool) !void {
var op_type = self.parser.previous.ttype;
var rule: *ParseRule = self.getRule(op_type);
try self.parsePrecedence(@intToEnum(Precedence, @enumToInt(rule.precedence) + 1));
switch (op_type) {
.PLUS => try self.emitByte(OpCode.Add),
.MINUS => try self.emitByte(OpCode.Subtract),
.STAR => try self.emitByte(OpCode.Multiply),
.SLASH => try self.emitByte(OpCode.Divide),
.EQUAL_EQUAL => try self.emitByte(OpCode.Equal),
.GREATER => try self.emitByte(OpCode.Greater),
.LESS => try self.emitByte(OpCode.Less),
.BANG_EQUAL => try self.emitBytes(OpCode.Equal, OpCode.Not),
.GREATER_EQUAL => try self.emitBytes(OpCode.Less, OpCode.Not),
.LESS_EQUAL => try self.emitBytes(OpCode.Greater, OpCode.Not),
else => unreachable,
}
}
fn literal(self: *Compiler, canAssign: bool) !void {
switch (self.parser.previous.ttype) {
.FALSE => try self.emitByte(OpCode.False),
.NIL => try self.emitByte(OpCode.Nil),
.TRUE => try self.emitByte(OpCode.True),
else => unreachable,
}
}
fn parsePrecedence(self: *Compiler, precedence: Precedence) anyerror!void {
try self.advance();
var as_int = @enumToInt(precedence);
var prefix_rule_opt = self.getRule(self.parser.previous.ttype).prefix;
if (prefix_rule_opt) |prefix_rule| {
var canAssign: bool = as_int <= @enumToInt(Precedence.Assignment);
try prefix_rule(self, canAssign);
while (as_int <= @enumToInt(self.getRule(self.parser.current.ttype).precedence)) {
try self.advance();
var infix_rule_opt = self.getRule(self.parser.previous.ttype).infix;
if (infix_rule_opt) |infix_rule| {
try infix_rule(self, canAssign);
}
}
if (canAssign and try self.match(.EQUAL)) {
self.errorPrevious("Invalid assignment target.");
try self.expression();
}
} else {
self.errorPrevious("Expect expression.");
return;
}
}
fn getRule(self: *Compiler, ttype: TokenType) *ParseRule {
return &rules[@enumToInt(ttype)];
}
fn expression(self: *Compiler) anyerror!void {
try self.parsePrecedence(.Assignment);
}
fn printStmt(self: *Compiler) !void {
try self.expression();
try self.consume(.SEMICOLON, "Expect ';' after value.");
try self.emitByte(OpCode.Print);
}
fn exprStmt(self: *Compiler) !void {
try self.expression();
try self.consume(.SEMICOLON, "Expect ';' after expression.");
try self.emitByte(OpCode.Pop);
}
fn synchronize(self: *Compiler) !void {
self.parser.panicMode = false;
while (self.parser.current.ttype != .EOF) {
if (self.parser.previous.ttype == .SEMICOLON) return;
switch (self.parser.current.ttype) {
.CLASS, .FUN, .VAR, .FOR, .IF, .WHILE, .PRINT, .RETURN => return,
else => {},
}
try self.advance();
}
}
/// Write an identifier constant to the bytecode.
fn identifierConstant(
self: *Compiler,
token: *Token,
) !chunks.ConstantIndex {
return try self.currentChunk().writeConstantRaw(values.ObjVal(try objects.copyString(
self.vmach,
token.lexeme,
)), token.line);
}
fn addLocal(self: *Compiler, name: Token) void {
if (self.localCount == 256) {
self.errorCurrent("Too many variables in function.");
return;
}
self.localCount += 1;
var local: *Local = &self.locals[@intCast(usize, self.localCount)];
local.name = name;
//local.depth = self.scopeDepth;
local.depth = -1;
}
fn declareVariable(self: *Compiler) void {
if (self.scopeDepth == 0) return;
var name: *Token = &self.parser.previous;
// check if we're redeclaring an existing variable
// in the *CURRENT* scope.
// go from current down to global
var i = self.localCount;
while (i >= 0) : (i -= 1) {
var local = self.locals[@intCast(usize, i)];
if (local.depth == -1 and local.depth < self.scopeDepth) break;
if (std.mem.eql(u8, name.lexeme, local.name.lexeme)) {
self.errorCurrent("Variable with this name already declared in this scope.");
}
}
self.addLocal(name.*);
}
fn parseVariable(self: *Compiler, msg: []const u8) !chunks.ConstantIndex {
try self.consume(.IDENTIFIER, msg);
self.declareVariable();
if (self.scopeDepth > 0) return chunks.ConstantIndex{ .Small = 0 };
return try self.identifierConstant(&self.parser.previous);
}
fn emitConstWithIndex(
self: *Compiler,
op_short: u8,
op_long: u8,
idx: chunks.ConstantIndex,
) !void {
switch (idx) {
.Small => |val| try self.emitBytes(op_short, val),
.Long => |val| blk: {
try self.emitByte(op_long);
try self.emitByte(val[0]);
try self.emitByte(val[1]);
try self.emitByte(val[2]);
},
else => unreachable,
}
}
fn markInitialized(self: *Compiler) void {
if (self.scopeDepth == 0) return;
var idx = @intCast(usize, self.localCount);
self.locals[idx].depth = self.scopeDepth;
}
fn defineVariable(self: *Compiler, global: chunks.ConstantIndex) !void {
if (self.scopeDepth > 0) {
self.markInitialized();
return;
}
try self.emitConstWithIndex(
chunks.OpCode.DefineGlobal,
chunks.OpCode.DefineGlobalLong,
global,
);
}
fn varDecl(self: *Compiler) !void {
var global = try self.parseVariable("Expect variable name.");
if (try self.match(.EQUAL)) {
try self.expression();
} else {
try self.emitByte(chunks.OpCode.Nil);
}
// check scopeDepth here
try self.consume(.SEMICOLON, "Expect ';' after variable declaration.");
try self.defineVariable(global);
}
fn declaration(self: *Compiler) anyerror!void {
if (try self.match(.VAR)) {
try self.varDecl();
} else {
try self.statement();
}
if (self.parser.panicMode) try self.synchronize();
}
fn block(self: *Compiler) anyerror!void {
while (!self.check(.RIGHT_BRACE) and !self.check(.EOF)) {
try self.declaration();
}
try self.consume(.RIGHT_BRACE, "Expect '}' after block.");
}
fn statement(self: *Compiler) !void {
if (try self.match(.PRINT)) {
try self.printStmt();
} else if (try self.match(.LEFT_BRACE)) {
self.beginScope();
try self.block();
try self.endScope();
} else {
try self.exprStmt();
}
}
/// Compile the source given when initializing the compiler
/// into the given chunk.
pub fn compile(self: *Compiler, chunk: *Chunk) !bool {
self.scanr = try scanner.Scanner.init(self.allocator, self.src);
try self.advance();
while (!(try self.match(.EOF))) {
try self.declaration();
}
// try self.expression();
// try self.consume(.EOF, "Expect end of expression.");
try self.end();
return !self.parser.hadError;
}
};

128
src/main.zig Normal file
View file

@ -0,0 +1,128 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
// const Scanner = @import("scanner.zig").Scanner;
const chunk = @import("chunk.zig");
const vm = @import("vm.zig");
const InterpretResult = vm.InterpretResult;
//const Compiler = @import("compiler.zig").Compiler;
pub var hadError = false;
fn run(allocator: *Allocator, data: []u8) !void {
var stdout_file = try std.io.getStdOut();
const stdout = &stdout_file.outStream().stream;
var vmach = try vm.VM.init(allocator, stdout, true);
defer vmach.deinit();
try vmach.interpret(data);
}
fn runWithVM(vmach: *vm.VM, data: []u8) !void {
var stdout_file = try std.io.getStdOut();
const stdout = &stdout_file.outStream().stream;
defer vmach.deinit();
try vmach.interpret(data);
}
pub fn doError(line: usize, message: []const u8) !void {
try errorReport(line, "", message);
}
pub fn errorReport(line: usize, where: []const u8, message: []const u8) !void {
var stdout_file = try std.io.getStdOut();
const stdout = &stdout_file.outStream().stream;
try stdout.print("[line {}] Error {}: {}\n", line, where, message);
hadError = true;
}
fn runFile(allocator: *Allocator, path: []const u8) !void {
var lox_file = try std.fs.File.openRead(path);
defer lox_file.close();
const total_bytes = try lox_file.getEndPos();
var slice = try allocator.alloc(u8, total_bytes);
_ = try lox_file.read(slice);
run(allocator, slice) catch |err| {
switch (err) {
InterpretResult.Ok => {},
InterpretResult.CompileError => std.os.exit(65),
InterpretResult.RuntimeError => std.os.exit(70),
else => return err,
}
};
}
fn runPrompt(allocator: *Allocator) !void {
var stdout_file = try std.io.getStdOut();
const stdout = &stdout_file.outStream().stream;
var vmach = try vm.VM.init(allocator, stdout, true);
defer vmach.deinit();
while (true) {
try stdout.print(">");
var buffer = try std.Buffer.init(allocator, ""[0..]);
var line = std.io.readLine(&buffer) catch |err| {
if (err == error.EndOfStream) return;
return err;
};
runWithVM(&vmach, line) catch |err| {
switch (err) {
InterpretResult.Ok => {},
InterpretResult.CompileError => blk: {
try stdout.print("compile error.\n");
},
InterpretResult.RuntimeError => blk: {
try stdout.print("runtime error.\n");
},
else => return err,
}
};
vmach.resetStack();
}
}
pub fn main() anyerror!void {
var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator);
defer arena.deinit();
var allocator = &arena.allocator;
var args_it = std.process.args();
var jorts_arg0 = try (args_it.next(allocator) orelse {
// if you ever reach this, tell me what is your os lmao
unreachable;
});
var lox_path = try (args_it.next(allocator) orelse {
try runPrompt(allocator);
return;
});
try runFile(allocator, lox_path);
}
pub fn oldMain() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.direct_allocator);
defer arena.deinit();
var allocator = &arena.allocator;
var stdout_file = try std.io.getStdOut();
var stdout = &stdout_file.outStream().stream;
// this crashes zig??? lol
// var chk = try chunk.Chunk.init(allocator);
//var opcode_byte: u8 = @enumToInt(chunk.OpCode.Return);
//try chk.write(chunk.OpCode.Return);
}

54
src/object.zig Normal file
View file

@ -0,0 +1,54 @@
const std = @import("std");
const vm = @import("vm.zig");
const Allocator = std.mem.Allocator;
pub const ObjType = enum {
String,
};
pub const ObjValue = struct {
String: []u8,
};
pub const Object = struct {
otype: ObjType,
value: ObjValue,
next: ?*Object = null,
};
pub fn allocateObject(
vmach: *vm.VM,
otype: ObjType,
value: ObjValue,
) !*Object {
var obj = try vmach.allocator.create(Object);
obj.otype = otype;
obj.value = value;
obj.next = vmach.objs;
vmach.objs = obj;
return obj;
}
fn createString(vmach: *vm.VM, data: []u8) !*Object {
return allocateObject(vmach, ObjType.String, ObjValue{ .String = data });
}
pub fn copyString(vmach: *vm.VM, data: []const u8) !*Object {
var str = try vmach.allocator.alloc(u8, data.len);
std.mem.copy(u8, str, data);
return try createString(vmach, str);
}
/// Assumes it can take ownership of the given data.
pub fn takeString(vmach: *vm.VM, data: []u8) !*Object {
return try createString(vmach, data);
}
pub fn printObject(stdout: var, obj: Object) !void {
switch (obj.otype) {
.String => try stdout.print("{}", obj.value.String),
else => unreachable,
}
}

276
src/scanner.zig Normal file
View file

@ -0,0 +1,276 @@
const std = @import("std");
const tokens = @import("token.zig");
const Token = tokens.Token;
const TokenType = tokens.TokenType;
const Allocator = std.mem.Allocator;
pub const TokenError = error{
Unexpected,
Unterminated,
};
fn isDigit(char: u8) bool {
return char >= '0' and char <= '9';
}
fn isAlpha(c: u8) bool {
return (c >= 'a' and c <= 'z') or
(c >= 'A' and c <= 'Z') or
c == '_';
}
fn isAlphaNumeric(char: u8) bool {
return isAlpha(char) or isDigit(char);
}
pub const KeywordMap = std.StringHashMap(u6);
/// The book does say that C doesn't have hashmaps. but Zig does. and I can
/// use it here.
fn initKeywordMap(allocator: *std.mem.Allocator) !KeywordMap {
var map = KeywordMap.init(allocator);
const keywords = [_][]const u8{
"and"[0..],
"class"[0..],
"else"[0..],
"false"[0..],
"for"[0..],
"fun"[0..],
"if"[0..],
"nil"[0..],
"or"[0..],
"print"[0..],
"return"[0..],
"super"[0..],
"this"[0..],
"true"[0..],
"var"[0..],
"while"[0..],
};
const tags = [_]TokenType{
TokenType.AND,
TokenType.CLASS,
TokenType.ELSE,
TokenType.FALSE,
TokenType.FOR,
TokenType.FUN,
TokenType.IF,
TokenType.NIL,
TokenType.OR,
TokenType.PRINT,
TokenType.RETURN,
TokenType.SUPER,
TokenType.THIS,
TokenType.TRUE,
TokenType.VAR,
TokenType.WHILE,
};
for (keywords) |keyword, idx| {
var tag = @enumToInt(tags[idx]);
_ = try map.put(keyword, tag);
}
return map;
}
pub const Scanner = struct {
source: []const u8,
keywords: KeywordMap,
start: usize = 0,
current: usize = 0,
line: usize = 1,
allocator: *Allocator,
pub fn init(allocator: *Allocator, data: []const u8) !Scanner {
return Scanner{
.source = data,
.keywords = try initKeywordMap(allocator),
.allocator = allocator,
};
}
fn isAtEnd(self: *Scanner) bool {
return self.current >= self.source.len;
}
fn advance(self: *Scanner) u8 {
self.current += 1;
return self.source[self.current - 1];
}
pub fn currentLexeme(self: *Scanner) []const u8 {
return self.source[self.start..self.current];
}
fn makeToken(self: *Scanner, ttype: TokenType) Token {
return Token{
.ttype = ttype,
.lexeme = self.currentLexeme(),
.line = self.line,
};
}
/// Check if the next character matches what is expected.
fn match(self: *Scanner, expected: u8) bool {
if (self.isAtEnd()) return false;
if (self.source[self.current] != expected) return false;
self.current += 1;
return true;
}
/// Add a SimpleToken of type_match if the next character is
/// `expected`. Adds a SimpleToken of type_nomatch when it is not.
fn makeMatchToken(
self: *Scanner,
expected: u8,
type_match: TokenType,
type_nomatch: TokenType,
) Token {
if (self.match(expected)) {
return self.makeToken(type_match);
} else {
return self.makeToken(type_nomatch);
}
}
fn peek(self: *Scanner) u8 {
if (self.isAtEnd()) return 0;
return self.source[self.current];
}
fn peekNext(self: *Scanner) u8 {
if (self.current + 1 >= self.source.len) return 0;
return self.source[self.current + 1];
}
fn skipWhitespace(self: *Scanner) void {
while (true) {
var c = self.peek();
switch (c) {
' ', '\r', '\t' => blk: {
_ = self.advance();
},
'\n' => blk: {
self.line += 1;
_ = self.advance();
},
else => return,
}
}
}
fn doString(self: *Scanner) !Token {
// consume entire string
while (self.peek() != '"' and !self.isAtEnd()) {
if (self.peek() == '\n') self.line += 1;
_ = self.advance();
}
// unterminated string.
if (self.isAtEnd()) {
return TokenError.Unterminated;
}
// the closing ".
_ = self.advance();
// trim the surrounding quotes.
return self.makeToken(.STRING);
}
/// Consume a number
fn doNumber(self: *Scanner) Token {
while (isDigit(self.peek())) {
_ = self.advance();
}
// check if its a number like 12.34, where the '.' character
// exists and the one next to it is a digit.
if (self.peek() == '.' and isDigit(self.peekNext())) {
_ = self.advance();
while (isDigit(self.peek())) {
_ = self.advance();
}
}
return self.makeToken(.NUMBER);
}
/// Either a keyword or an identifier come out of this.
fn doIdentifier(self: *Scanner) Token {
while (isAlphaNumeric(self.peek())) {
_ = self.advance();
}
// after reading the identifier, we check
// if it is any of our keywords, if it is, then we add
// the specificed keyword type. if not, just .IDENTIFIER
var text = self.source[self.start..self.current];
var type_opt = self.keywords.get(text);
var toktype: TokenType = undefined;
if (type_opt) |kv| {
toktype = @intToEnum(TokenType, kv.value);
} else {
toktype = TokenType.IDENTIFIER;
}
return self.makeToken(toktype);
}
pub fn scanToken(self: *Scanner) !?Token {
self.skipWhitespace();
self.start = self.current;
if (self.isAtEnd()) return self.makeToken(TokenType.EOF);
var c = self.advance();
if (isAlpha(c)) return self.doIdentifier();
if (isDigit(c)) return self.doNumber();
var token = switch (c) {
'(' => self.makeToken(.LEFT_PAREN),
')' => self.makeToken(.RIGHT_PAREN),
'{' => self.makeToken(.LEFT_BRACE),
'}' => self.makeToken(.RIGHT_BRACE),
',' => self.makeToken(.COMMA),
'.' => self.makeToken(.DOT),
'-' => self.makeToken(.MINUS),
'+' => self.makeToken(.PLUS),
';' => self.makeToken(.SEMICOLON),
'*' => self.makeToken(.STAR),
'!' => self.makeMatchToken('=', .BANG_EQUAL, .BANG),
'=' => self.makeMatchToken('=', .EQUAL_EQUAL, .EQUAL),
'<' => self.makeMatchToken('=', .LESS_EQUAL, .LESS),
'>' => self.makeMatchToken('=', .GREATER_EQUAL, .GREATER),
'/' => blk: {
if (self.peekNext() == '/') {
while (self.peek() != '\n' and !self.isAtEnd()) {
_ = self.advance();
}
break :blk null;
} else {
break :blk self.makeToken(.SLASH);
}
},
'"' => try self.doString(),
else => return TokenError.Unexpected,
};
return token;
}
};

57
src/token.zig Normal file
View file

@ -0,0 +1,57 @@
const std = @import("std");
pub const TokenType = enum(u6) {
// Single-character tokens.
LEFT_PAREN,
RIGHT_PAREN,
LEFT_BRACE,
RIGHT_BRACE,
COMMA,
DOT,
MINUS,
PLUS,
SEMICOLON,
SLASH,
STAR,
// One or two character tokens.
BANG,
BANG_EQUAL,
EQUAL,
EQUAL_EQUAL,
GREATER,
GREATER_EQUAL,
LESS,
LESS_EQUAL,
// Literals.
IDENTIFIER,
STRING,
NUMBER,
// Keywords.
AND,
CLASS,
ELSE,
FALSE,
FUN,
FOR,
IF,
NIL,
OR,
PRINT,
RETURN,
SUPER,
THIS,
TRUE,
VAR,
WHILE,
EOF,
};
pub const Token = struct {
ttype: TokenType = TokenType.EOF,
lexeme: []const u8 = ""[0..],
line: usize = 0,
};

80
src/value.zig Normal file
View file

@ -0,0 +1,80 @@
const std = @import("std");
const objects = @import("object.zig");
const Allocator = std.mem.Allocator;
pub const ValueType = enum(u8) {
Bool,
Nil,
Number,
Object,
};
pub const ValueValue = union(ValueType) {
Bool: bool,
Nil: void,
Number: f64,
Object: *objects.Object,
};
pub const Value = struct {
vtype: ValueType,
as: ValueValue,
};
// helper functions
pub fn BoolVal(val: bool) Value {
return Value{ .vtype = .Bool, .as = ValueValue{ .Bool = val } };
}
pub fn NilVal() Value {
return Value{ .vtype = .Nil, .as = ValueValue{ .Nil = {} } };
}
pub fn NumberVal(val: f64) Value {
return Value{ .vtype = .Number, .as = ValueValue{ .Number = val } };
}
pub fn ObjVal(val: *objects.Object) Value {
return Value{ .vtype = .Object, .as = ValueValue{ .Object = val } };
}
pub fn isObjType(val: Value, otype: objects.ObjType) bool {
return val.vtype == .Object and val.as.Object.otype == otype;
}
pub fn printValue(stdout: var, value: Value) !void {
switch (value.as) {
.Nil => try stdout.print("nil"),
.Bool => try stdout.print("{}", value.as.Bool),
.Number => try stdout.print("{}", value.as.Number),
.Object => try objects.printObject(stdout, value.as.Object.*),
else => unreachable,
}
}
pub const ValueList = struct {
count: usize,
values: []Value,
allocator: *Allocator,
pub fn init(allocator: *Allocator) !ValueList {
return ValueList{
.count = 0,
.allocator = allocator,
.values = try allocator.alloc(Value, 0),
};
}
pub fn write(self: *ValueList, value: Value) !void {
if (self.values.len < self.count + 1) {
self.values = try self.allocator.realloc(
self.values,
self.count + 1,
);
}
self.values[self.count] = value;
self.count += 1;
}
};

411
src/vm.zig Normal file
View file

@ -0,0 +1,411 @@
const std = @import("std");
const chunk = @import("chunk.zig");
const value = @import("value.zig");
const values = value;
const compiler = @import("compiler.zig");
const objects = @import("object.zig");
const Chunk = chunk.Chunk;
const Value = value.Value;
const Compiler = compiler.Compiler;
pub const StdOut = *std.io.OutStream(std.fs.File.WriteError);
pub const InterpretResult = error{
Ok,
CompileError,
RuntimeError,
};
fn isFalsey(val: value.Value) bool {
return val.vtype == .Nil or (val.vtype == .Bool and !val.as.Bool);
}
fn valuesEqual(a: value.Value, b: value.Value) bool {
if (a.vtype != b.vtype) return false;
switch (a.vtype) {
.Nil => return true,
.Bool => return a.as.Bool == b.as.Bool,
.Number => return a.as.Number == b.as.Number,
.Object => blk: {
var aStr = a.as.Object.value.String;
var bStr = b.as.Object.value.String;
return std.mem.compare(u8, aStr, bStr) == .Equal;
},
}
}
pub const ValueMap = std.StringHashMap(values.Value);
pub const VM = struct {
chk: *Chunk = undefined,
ip: usize = 0,
stack: []Value,
stackTop: usize = 0,
stdout: StdOut,
debug_flag: bool,
allocator: *std.mem.Allocator,
objs: ?*objects.Object = null,
globals: ValueMap,
fn resetStack(self: *VM) void {
self.stackTop = 0;
}
pub fn init(
allocator: *std.mem.Allocator,
stdout: StdOut,
debug_flag: bool,
) !VM {
var self = VM{
.stack = try allocator.alloc(Value, 256),
.stdout = stdout,
.debug_flag = debug_flag,
.allocator = allocator,
.globals = ValueMap.init(allocator),
};
self.resetStack();
return self;
}
fn deinitObject(self: *VM, obj: *objects.Object) void {
switch (obj.otype) {
.String => blk: {
self.allocator.free(obj.value.String);
self.allocator.destroy(obj);
break :blk;
},
else => unreachable,
}
}
fn deinitObjects(self: *VM) void {
var obj_opt: ?*objects.Object = self.objs;
// doing a while(obj != null) but with optionals
while (true) {
if (obj_opt) |obj| {
var next = obj.next;
self.deinitObject(obj);
obj_opt = next;
} else {
break;
}
}
}
pub fn deinit(self: *VM) void {
self.globals.deinit();
self.deinitObjects();
}
pub fn debug(self: *VM, comptime fmt: []const u8, args: ...) void {
if (self.debug_flag) {
std.debug.warn(fmt, args);
}
}
fn readByte(self: *VM) u8 {
var byte: u8 = self.chk.code[self.ip];
self.ip += 1;
return byte;
}
fn readConst(self: *VM) Value {
return self.chk.constants.values[self.readByte()];
}
fn readConstLong(self: *VM) Value {
const v3 = self.readByte();
const v2 = self.readByte();
const v1 = self.readByte();
const const_idx = (@intCast(u24, v3) << 16) |
(@intCast(u24, v2) << 8) |
v1;
return self.chk.constants.values[const_idx];
}
fn debugStack(self: *VM) !void {
try self.stdout.print(" ");
for (self.stack) |val, idx| {
if (idx >= self.stackTop) break;
try self.stdout.print("[ ");
try value.printValue(self.stdout, val);
try self.stdout.print(" ]");
}
try self.stdout.print("\n");
}
/// gets a f64 out of a value on the top of the stack.
fn popNum(self: *VM) !f64 {
var val: Value = self.pop();
switch (val.vtype) {
.Number => return val.as.Number,
else => |vtype| blk: {
self.runtimeError("Expected number, got {x}", vtype);
return InterpretResult.RuntimeError;
},
}
}
fn concatenateStrings(self: *VM) !void {
var b = self.pop().as.Object.value.String;
var a = self.pop().as.Object.value.String;
var res_str = try std.mem.join(
self.allocator,
"",
[_][]u8{ a, b },
);
var val = values.ObjVal(try objects.takeString(self, res_str));
try self.push(val);
}
fn doAdd(self: *VM) !void {
if (values.isObjType(self.peek(0), .String) and
values.isObjType(self.peek(1), .String))
{
return try self.concatenateStrings();
}
var b = try self.popNum();
var a = try self.popNum();
try self.push(values.NumberVal(a + b));
}
fn doSub(self: *VM) !void {
var b = try self.popNum();
var a = try self.popNum();
try self.push(values.NumberVal(a - b));
}
fn doMul(self: *VM) !void {
var b = try self.popNum();
var a = try self.popNum();
try self.push(values.NumberVal(a * b));
}
fn doDiv(self: *VM) !void {
var b = try self.popNum();
var a = try self.popNum();
try self.push(values.NumberVal(a / b));
}
fn doGreater(self: *VM) !void {
var b = try self.popNum();
var a = try self.popNum();
try self.push(values.BoolVal(a > b));
}
fn doLess(self: *VM) !void {
var b = try self.popNum();
var a = try self.popNum();
try self.push(values.BoolVal(a < b));
}
fn runtimeError(self: *VM, comptime fmt: []const u8, args: ...) void {
std.debug.warn(fmt, args);
std.debug.warn("\n[line {}] in script\n", self.chk.lines[self.ip]);
self.resetStack();
}
fn defGlobal(self: *VM, name: []const u8) !void {
_ = try self.globals.put(name, self.peek(0));
_ = self.pop();
}
fn readString(self: *VM) []u8 {
return self.readConst().as.Object.value.String;
}
fn readStringLong(self: *VM) []u8 {
return self.readConstLong().as.Object.value.String;
}
fn doGetGlobal(self: *VM, name: []u8) !void {
var kv_opt = self.globals.get(name);
if (kv_opt) |kv| {
try self.push(kv.value);
} else {
self.runtimeError("Undefined variable '{}'.", name);
return InterpretResult.RuntimeError;
}
}
fn doSetGlobal(self: *VM, name: []u8) !void {
var res = try self.globals.getOrPut(name);
if (res.found_existing) {
res.kv.value = self.peek(0);
} else {
self.runtimeError("Undefined variable '{}'.", name);
return InterpretResult.RuntimeError;
}
}
fn run(self: *VM) !void {
while (true) {
if (self.debug_flag) {
try self.debugStack();
_ = try self.chk.disassembleInstruction(self.stdout, self.ip);
}
var instruction = self.readByte();
switch (instruction) {
chunk.OpCode.Constant => blk: {
var constant = self.readConst();
try self.push(constant);
break :blk;
},
chunk.OpCode.ConstantLong => blk: {
var constant = self.readConstLong();
try self.push(constant);
break :blk;
},
chunk.OpCode.Print => blk: {
try value.printValue(self.stdout, self.pop());
try self.stdout.print("\n");
break :blk;
},
chunk.OpCode.Return => blk: {
// Exit VM
return InterpretResult.Ok;
},
chunk.OpCode.Nil => try self.push(values.NilVal()),
chunk.OpCode.True => try self.push(values.BoolVal(true)),
chunk.OpCode.False => try self.push(values.BoolVal(false)),
chunk.OpCode.Pop => blk: {
_ = self.pop();
},
chunk.OpCode.GetLocal => blk: {
var slot = self.readByte();
try self.push(self.stack[slot]);
},
chunk.OpCode.SetLocal => blk: {
var slot = self.readByte();
self.stack[slot] = self.peek(0);
},
chunk.OpCode.GetGlobal => blk: {
try self.doGetGlobal(self.readString());
},
chunk.OpCode.SetGlobal => blk: {
try self.doSetGlobal(self.readString());
break :blk;
},
// extracting the name is different depending of the
// op code since one just uses a single byte, the other
// uses three bytes since its a u24.
chunk.OpCode.DefineGlobal => blk: {
try self.defGlobal(self.readString());
break :blk;
},
chunk.OpCode.DefineGlobalLong => blk: {
try self.defGlobal(self.readStringLong());
break :blk;
},
chunk.OpCode.Equal => blk: {
var a = self.pop();
var b = self.pop();
try self.push(values.BoolVal(valuesEqual(a, b)));
},
chunk.OpCode.Greater => try self.doGreater(),
chunk.OpCode.Less => try self.doLess(),
chunk.OpCode.Add => try self.doAdd(),
chunk.OpCode.Subtract => try self.doSub(),
chunk.OpCode.Multiply => try self.doMul(),
chunk.OpCode.Divide => try self.doDiv(),
chunk.OpCode.Not => blk: {
try self.push(values.BoolVal(isFalsey(self.pop())));
},
chunk.OpCode.Negate => blk: {
var val = self.peek(0);
if (val.vtype != .Bool) {
self.runtimeError("Operand must be a number.");
return InterpretResult.RuntimeError;
}
val = self.pop();
switch (val.as) {
.Number => |num| {
try self.push(values.NumberVal(-num));
},
else => unreachable,
}
},
else => blk: {
std.debug.warn("Unknown instruction: {x}\n", instruction);
return InterpretResult.RuntimeError;
},
}
}
}
pub fn interpret(self: *VM, src: []const u8) !void {
//self.ip = 0;
//self.debug("VM start\n");
//var res = try self.run();
//self.debug("VM end\n");
//return res;
var chk = try Chunk.init(self.allocator);
var cmpr = Compiler.init(
self.allocator,
&chk,
self.stdout,
src,
self.debug_flag,
self,
);
if (!try cmpr.compile(&chk)) {
return InterpretResult.CompileError;
}
self.chk = &chk;
self.ip = 0;
return try self.run();
}
pub fn push(self: *VM, val: Value) !void {
if (self.stackTop > 0 and self.stackTop - 1 > self.stack.len) {
self.stack = try self.allocator.realloc(self.stack, self.stack.len + 1);
}
self.stack[self.stackTop] = val;
self.stackTop += 1;
}
pub fn pop(self: *VM) Value {
self.stackTop -= 1;
return self.stack[self.stackTop];
}
pub fn peek(self: *VM, distance: usize) Value {
return self.stack[self.stackTop - 1 - distance];
}
};