move to zig
This commit is contained in:
parent
3d26da0144
commit
b3ea9637bd
11 changed files with 23 additions and 603 deletions
117
.gitignore
vendored
117
.gitignore
vendored
|
@ -1,116 +1 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
zig-cache/
|
||||
|
|
18
README.md
18
README.md
|
@ -1,19 +1,5 @@
|
|||
# jorts
|
||||
|
||||
jorts programming language
|
||||
an interpreter for the lox language from https://craftinginterpreters.com
|
||||
|
||||
## installing
|
||||
|
||||
```sh
|
||||
git clone https://gitdab.com/luna/jorts
|
||||
cd jorts
|
||||
pip install --user --editable .
|
||||
```
|
||||
|
||||
## using
|
||||
|
||||
right now, its not pretty, nor finished
|
||||
|
||||
```
|
||||
cat examples/hello.jt | jortsc
|
||||
```
|
||||
this is a learning project.
|
||||
|
|
15
build.zig
Normal file
15
build.zig
Normal file
|
@ -0,0 +1,15 @@
|
|||
const Builder = @import("std").build.Builder;
|
||||
|
||||
pub fn build(b: *Builder) void {
|
||||
const mode = b.standardReleaseOptions();
|
||||
const exe = b.addExecutable("jorts", "src/main.zig");
|
||||
exe.setBuildMode(mode);
|
||||
|
||||
const run_cmd = exe.run();
|
||||
|
||||
const run_step = b.step("run", "Run the app");
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
|
||||
b.default_step.dependOn(&exe.step);
|
||||
b.installArtifact(exe);
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
from .main import main
|
||||
|
||||
__all__ = ['main']
|
|
@ -1,25 +0,0 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import sys
|
||||
import pprint
|
||||
import logging
|
||||
|
||||
from jortsc.parser.lexer import lex_jorts
|
||||
# from jortsc.parser.parser import parse
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
def main():
|
||||
"""main entry point"""
|
||||
try:
|
||||
in_data = sys.stdin.read().strip()
|
||||
except EOFError:
|
||||
pass
|
||||
|
||||
print(repr(in_data))
|
||||
tokens = lex_jorts(in_data)
|
||||
pprint.pprint(tokens)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,45 +0,0 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class TypedVar:
|
||||
type_: str
|
||||
name: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReturnType:
|
||||
type_: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Function:
|
||||
name: str
|
||||
arguments: str
|
||||
ret_type: ReturnType
|
||||
block: list
|
||||
|
||||
|
||||
@dataclass
|
||||
class Identifier:
|
||||
name: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Import:
|
||||
module: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class String:
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Number:
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionCall:
|
||||
function: str
|
||||
args: list
|
|
@ -1,112 +0,0 @@
|
|||
import re
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum, auto
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
"""Defines the type of a token"""
|
||||
reserved = auto()
|
||||
identifier = auto()
|
||||
comment = auto()
|
||||
comment_start = auto()
|
||||
comment_end = auto()
|
||||
whitespace = auto()
|
||||
number = auto()
|
||||
string = auto()
|
||||
|
||||
|
||||
@dataclass
|
||||
class Token:
|
||||
value: str
|
||||
type_: TokenType
|
||||
|
||||
|
||||
class LexerError(Exception):
|
||||
"""Lexer error."""
|
||||
pass
|
||||
|
||||
|
||||
TOKENS = [
|
||||
(r'[ \n\t]+', TokenType.whitespace),
|
||||
|
||||
# single line comments and multiline comments
|
||||
(r'//[^\n]*', TokenType.comment),
|
||||
|
||||
# TODO: shouldnt this be /* <anything> */ instead of
|
||||
# only tokenizing on the start and end?
|
||||
(r'/\*', TokenType.comment_start),
|
||||
(r'\*/', TokenType.comment_end),
|
||||
|
||||
(r'fn', TokenType.reserved),
|
||||
(r'if', TokenType.reserved),
|
||||
(r'import', TokenType.reserved),
|
||||
|
||||
(r'\(', TokenType.reserved),
|
||||
(r'\)', TokenType.reserved),
|
||||
|
||||
(r'\{', TokenType.reserved),
|
||||
(r'\}', TokenType.reserved),
|
||||
|
||||
(r'\-\>', TokenType.reserved),
|
||||
(r'\.', TokenType.reserved),
|
||||
|
||||
(r'\"[^\n]*\"', TokenType.string),
|
||||
|
||||
# basic math ops
|
||||
(r'[\+\-\/\*]', TokenType.reserved),
|
||||
|
||||
(r'[0-9]+', TokenType.number),
|
||||
(r'[A-Za-z][A-Za-z0-9_]*', TokenType.identifier)
|
||||
]
|
||||
|
||||
|
||||
def lex(string: str, token_defs: list) -> list:
|
||||
"""Generate tokens out of the given string."""
|
||||
pos = 0
|
||||
strlen = len(string)
|
||||
tokens = []
|
||||
|
||||
# generate a dict for compiled regexes out of the token defs
|
||||
# instead of compiling on each token definition per token.
|
||||
compiled = {pattern: re.compile(pattern)
|
||||
for pattern, _ in token_defs}
|
||||
|
||||
# we use this instead of for pos in range(len(string)) because we
|
||||
# need to increment pos to a whole token length's, and that wouldn't
|
||||
# be easy on a for .. in range(..)
|
||||
while pos < strlen:
|
||||
valid = False
|
||||
|
||||
for definition in token_defs:
|
||||
pattern, tok_type = definition
|
||||
regex = compiled[pattern]
|
||||
|
||||
match = regex.match(string, pos)
|
||||
|
||||
if not match:
|
||||
continue
|
||||
|
||||
text = match.group(0)
|
||||
|
||||
# update pos to the end of the token
|
||||
pos = match.end(0)
|
||||
|
||||
valid = True
|
||||
tokens.append(Token(text, tok_type))
|
||||
|
||||
# go to next token instead of checking other
|
||||
# definitions for tokens, e.g if its a reserved token
|
||||
# we shouldn't go down the path of an identifier.
|
||||
break
|
||||
|
||||
if not valid:
|
||||
print(f'context: {pos} {len(string)} {string[pos-1:pos+20]!r}')
|
||||
raise LexerError(f'Invalid character: {string[pos]!r}')
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def lex_jorts(string: str) -> list:
|
||||
"""Lex with the jorts token definitions"""
|
||||
return lex(string, TOKENS)
|
|
@ -1,272 +0,0 @@
|
|||
from typing import Optional, Any, List
|
||||
|
||||
from jortsc.parser.lexer import Token, TokenType
|
||||
from jortsc.parser.ast_nodes import (
|
||||
Function, TypedVar, Identifier, Import, ReturnType, String, Number,
|
||||
FunctionCall
|
||||
)
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Represents a parse error."""
|
||||
pass
|
||||
|
||||
|
||||
class Reader:
|
||||
"""Main reader class"""
|
||||
def __init__(self, tokens: List[Token]):
|
||||
self.tokens = tokens
|
||||
self.cur = 0
|
||||
|
||||
def __repr__(self):
|
||||
return (f'<Reader cur={self.cur} tot={len(self.tokens)} '
|
||||
f'cur_tok={self.peek()}>')
|
||||
|
||||
def peek(self) -> Optional[Token]:
|
||||
"""Peek at the current token."""
|
||||
try:
|
||||
token = self.tokens[self.cur]
|
||||
return token
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def next(self) -> Optional[Token]:
|
||||
"""Fetch the current token then skip to the next one."""
|
||||
token = self.peek()
|
||||
self.cur += 1
|
||||
return token
|
||||
|
||||
def expect(self, token_type: TokenType) -> Token:
|
||||
"""Check for a specific token type and error if it fails"""
|
||||
token = self.next()
|
||||
|
||||
if token.type_ != token_type:
|
||||
raise ParseError(f'Expected {token_type}, got '
|
||||
f'{token.type_} {token.value!r}')
|
||||
|
||||
return token
|
||||
|
||||
def expect_val(self, value: str) -> Token:
|
||||
"""Check the next token to see if it matches against a given value,
|
||||
instead of a type."""
|
||||
token = self.next()
|
||||
|
||||
if token.value != value:
|
||||
raise ParseError(f'Expected {value!r}, got '
|
||||
f'{token.type_} {token.value!r}')
|
||||
|
||||
return token
|
||||
|
||||
|
||||
def next_safe(self) -> Token:
|
||||
"""'Safe' version of next().
|
||||
|
||||
Raises an 'Unexpected EOF' error if next() returns None.
|
||||
"""
|
||||
token = self.next()
|
||||
|
||||
if token is None:
|
||||
raise ParseError('Unexpected EOF')
|
||||
|
||||
return token
|
||||
|
||||
def ignore(self, token_type: TokenType):
|
||||
"""Only increase self.cur if token_type is the upcoming token."""
|
||||
try:
|
||||
assert self.tokens[self.cur].type_ == token_type
|
||||
self.cur += 1
|
||||
except AssertionError:
|
||||
pass
|
||||
|
||||
|
||||
def _fn_read_args(reader: Reader, cur: List = None) -> List:
|
||||
"""Recursively read the arguments of the function."""
|
||||
if cur is None:
|
||||
cur = []
|
||||
|
||||
# it can be an identifier for the arguments' type, OR a RPAREN
|
||||
# if it is rparen, we stop
|
||||
# if it isnt, we keep going until that happens
|
||||
token = reader.next_safe()
|
||||
|
||||
if token.value == ')':
|
||||
return cur
|
||||
|
||||
argtype = token
|
||||
reader.expect(TokenType.whitespace)
|
||||
argname = reader.next_safe()
|
||||
|
||||
cur.append(TypedVar(argtype.value, argname.value))
|
||||
return _fn_read_args(reader, cur)
|
||||
|
||||
|
||||
def _fn_ret_type(reader: Reader) -> ReturnType:
|
||||
"""Fetch the return type of a function. Defaults to void."""
|
||||
try:
|
||||
reader.expect_val('->')
|
||||
except ParseError:
|
||||
return ReturnType('void')
|
||||
|
||||
reader.ignore(TokenType.whitespace)
|
||||
token = reader.expect(TokenType.identifier)
|
||||
return ReturnType(token.value)
|
||||
|
||||
|
||||
def read_function(reader: Reader):
|
||||
"""Read a function block."""
|
||||
reader.expect(TokenType.whitespace)
|
||||
|
||||
token = reader.next()
|
||||
|
||||
fn_name = '_anonymous'
|
||||
fn_args = []
|
||||
|
||||
print('function token', token)
|
||||
|
||||
if token.type_ == TokenType.identifier:
|
||||
fn_name = token.value
|
||||
|
||||
reader.expect(TokenType.whitespace)
|
||||
reader.expect_val('(')
|
||||
|
||||
fn_args = _fn_read_args(reader)
|
||||
|
||||
reader.expect(TokenType.whitespace)
|
||||
fn_ret_type = _fn_ret_type(reader)
|
||||
|
||||
# only skip whitespace if we see it
|
||||
reader.ignore(TokenType.whitespace)
|
||||
block = read_start(reader)
|
||||
elif token.value == '(':
|
||||
fn_args = _fn_read_args(reader)
|
||||
fn_ret_type = _fn_ret_type(reader)
|
||||
block = read_start(reader)
|
||||
|
||||
print('final function', fn_name, fn_args, fn_ret_type, block)
|
||||
|
||||
return Function(fn_name, fn_args, fn_ret_type, block)
|
||||
|
||||
|
||||
def read_import(reader):
|
||||
"""Read an import"""
|
||||
reader.expect(TokenType.whitespace)
|
||||
module = reader.next_safe()
|
||||
return Import(module.value)
|
||||
|
||||
|
||||
HANDLERS = {
|
||||
'fn': read_function,
|
||||
'import': read_import,
|
||||
}
|
||||
|
||||
|
||||
def read_reserved(token: Token, reader: Reader):
|
||||
"""Read reserved statements."""
|
||||
try:
|
||||
handler = HANDLERS[token.value]
|
||||
except KeyError:
|
||||
raise ParseError(f'Unexpected reserved word {token.value!r}')
|
||||
|
||||
return handler(reader)
|
||||
|
||||
|
||||
def read_value(token: Token, _reader: Reader):
|
||||
"""Read a given value"""
|
||||
if token.type_ == TokenType.string:
|
||||
return String(token.value)
|
||||
elif token.type_ == TokenType.number:
|
||||
return Number(token.value)
|
||||
|
||||
|
||||
def read_statement(token: Token, reader: Reader):
|
||||
"""Read a statement"""
|
||||
# token is an identifier, so first check for a function call
|
||||
|
||||
# TODO: handle more things than a function call
|
||||
call_fn_name = token.value
|
||||
token = reader.expect_val('(')
|
||||
res = []
|
||||
|
||||
while True:
|
||||
token = reader.next_safe()
|
||||
|
||||
if token.value == ')':
|
||||
break
|
||||
|
||||
res.append(read_value(token, reader))
|
||||
|
||||
return FunctionCall(call_fn_name, res)
|
||||
|
||||
|
||||
def read_start(reader: Reader):
|
||||
"""Read the start of a program."""
|
||||
print('reader', reader)
|
||||
|
||||
token = reader.next()
|
||||
|
||||
if token is None:
|
||||
print('eof!')
|
||||
return None
|
||||
|
||||
ast = []
|
||||
res = []
|
||||
|
||||
# handle blocks
|
||||
if token.value == '{':
|
||||
# next can be a whitespace, or a }
|
||||
token = reader.next()
|
||||
|
||||
print('block start!, next:', token)
|
||||
|
||||
if token.type_ == TokenType.whitespace:
|
||||
# keep going on reading
|
||||
while True:
|
||||
token = reader.peek()
|
||||
print('block append', token)
|
||||
|
||||
if token.value == '}':
|
||||
print('block end')
|
||||
reader.next()
|
||||
break
|
||||
|
||||
res.extend(read_start(reader))
|
||||
elif token.value == '}':
|
||||
res = []
|
||||
|
||||
# import, fn, etc
|
||||
elif token.type_ == TokenType.reserved:
|
||||
res = read_reserved(token, reader)
|
||||
|
||||
elif token.type_ == TokenType.comment:
|
||||
return []
|
||||
|
||||
elif token.type_ == TokenType.identifier:
|
||||
res = read_statement(token, reader)
|
||||
else:
|
||||
res = read_value(token, reader)
|
||||
|
||||
ast.append(res)
|
||||
return ast
|
||||
|
||||
|
||||
def read_loop(reader: Reader):
|
||||
"""Read the AST."""
|
||||
final_ast = []
|
||||
|
||||
while True:
|
||||
ast = read_start(reader)
|
||||
|
||||
# break when eof
|
||||
if ast is None:
|
||||
break
|
||||
|
||||
# TODO: better ast cleanup
|
||||
|
||||
final_ast.append(ast)
|
||||
|
||||
return final_ast
|
||||
|
||||
|
||||
def syntatic(tokens: List[Token]):
|
||||
"""Create an AST out of the tokens."""
|
||||
return read_loop(Reader(tokens))
|
14
setup.py
14
setup.py
|
@ -1,14 +0,0 @@
|
|||
from setuptools import setup
|
||||
|
||||
setup(
|
||||
name='jortsc',
|
||||
version='0.1',
|
||||
py_modules=['jortsc'],
|
||||
install_requires=[
|
||||
'lark-parser==0.6.7'
|
||||
],
|
||||
entry_points='''
|
||||
[console_scripts]
|
||||
jortsc=jortsc:main
|
||||
'''
|
||||
)
|
5
src/main.zig
Normal file
5
src/main.zig
Normal file
|
@ -0,0 +1,5 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub fn main() anyerror!void {
|
||||
std.debug.warn("All your base are belong to us.\n");
|
||||
}
|
Loading…
Reference in a new issue