move to zig

This commit is contained in:
Luna 2019-05-31 16:15:27 -03:00
parent 3d26da0144
commit b3ea9637bd
11 changed files with 23 additions and 603 deletions

117
.gitignore vendored
View file

@ -1,116 +1 @@
# Byte-compiled / optimized / DLL files zig-cache/
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/

View file

@ -1,19 +1,5 @@
# jorts # jorts
jorts programming language an interpreter for the lox language from https://craftinginterpreters.com
## installing this is a learning project.
```sh
git clone https://gitdab.com/luna/jorts
cd jorts
pip install --user --editable .
```
## using
right now, its not pretty, nor finished
```
cat examples/hello.jt | jortsc
```

15
build.zig Normal file
View file

@ -0,0 +1,15 @@
const Builder = @import("std").build.Builder;
pub fn build(b: *Builder) void {
const mode = b.standardReleaseOptions();
const exe = b.addExecutable("jorts", "src/main.zig");
exe.setBuildMode(mode);
const run_cmd = exe.run();
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
b.default_step.dependOn(&exe.step);
b.installArtifact(exe);
}

View file

@ -1,3 +0,0 @@
from .main import main
__all__ = ['main']

View file

@ -1,25 +0,0 @@
#!/usr/bin/python3
import sys
import pprint
import logging
from jortsc.parser.lexer import lex_jorts
# from jortsc.parser.parser import parse
logging.basicConfig(level=logging.DEBUG)
def main():
"""main entry point"""
try:
in_data = sys.stdin.read().strip()
except EOFError:
pass
print(repr(in_data))
tokens = lex_jorts(in_data)
pprint.pprint(tokens)
if __name__ == '__main__':
main()

View file

@ -1,45 +0,0 @@
from dataclasses import dataclass
@dataclass
class TypedVar:
type_: str
name: str
@dataclass
class ReturnType:
type_: str
@dataclass
class Function:
name: str
arguments: str
ret_type: ReturnType
block: list
@dataclass
class Identifier:
name: str
@dataclass
class Import:
module: str
@dataclass
class String:
value: str
@dataclass
class Number:
value: str
@dataclass
class FunctionCall:
function: str
args: list

View file

@ -1,112 +0,0 @@
import re
from dataclasses import dataclass
from enum import Enum, auto
class TokenType(Enum):
"""Defines the type of a token"""
reserved = auto()
identifier = auto()
comment = auto()
comment_start = auto()
comment_end = auto()
whitespace = auto()
number = auto()
string = auto()
@dataclass
class Token:
value: str
type_: TokenType
class LexerError(Exception):
"""Lexer error."""
pass
TOKENS = [
(r'[ \n\t]+', TokenType.whitespace),
# single line comments and multiline comments
(r'//[^\n]*', TokenType.comment),
# TODO: shouldnt this be /* <anything> */ instead of
# only tokenizing on the start and end?
(r'/\*', TokenType.comment_start),
(r'\*/', TokenType.comment_end),
(r'fn', TokenType.reserved),
(r'if', TokenType.reserved),
(r'import', TokenType.reserved),
(r'\(', TokenType.reserved),
(r'\)', TokenType.reserved),
(r'\{', TokenType.reserved),
(r'\}', TokenType.reserved),
(r'\-\>', TokenType.reserved),
(r'\.', TokenType.reserved),
(r'\"[^\n]*\"', TokenType.string),
# basic math ops
(r'[\+\-\/\*]', TokenType.reserved),
(r'[0-9]+', TokenType.number),
(r'[A-Za-z][A-Za-z0-9_]*', TokenType.identifier)
]
def lex(string: str, token_defs: list) -> list:
"""Generate tokens out of the given string."""
pos = 0
strlen = len(string)
tokens = []
# generate a dict for compiled regexes out of the token defs
# instead of compiling on each token definition per token.
compiled = {pattern: re.compile(pattern)
for pattern, _ in token_defs}
# we use this instead of for pos in range(len(string)) because we
# need to increment pos to a whole token length's, and that wouldn't
# be easy on a for .. in range(..)
while pos < strlen:
valid = False
for definition in token_defs:
pattern, tok_type = definition
regex = compiled[pattern]
match = regex.match(string, pos)
if not match:
continue
text = match.group(0)
# update pos to the end of the token
pos = match.end(0)
valid = True
tokens.append(Token(text, tok_type))
# go to next token instead of checking other
# definitions for tokens, e.g if its a reserved token
# we shouldn't go down the path of an identifier.
break
if not valid:
print(f'context: {pos} {len(string)} {string[pos-1:pos+20]!r}')
raise LexerError(f'Invalid character: {string[pos]!r}')
return tokens
def lex_jorts(string: str) -> list:
"""Lex with the jorts token definitions"""
return lex(string, TOKENS)

View file

@ -1,272 +0,0 @@
from typing import Optional, Any, List
from jortsc.parser.lexer import Token, TokenType
from jortsc.parser.ast_nodes import (
Function, TypedVar, Identifier, Import, ReturnType, String, Number,
FunctionCall
)
class ParseError(Exception):
"""Represents a parse error."""
pass
class Reader:
"""Main reader class"""
def __init__(self, tokens: List[Token]):
self.tokens = tokens
self.cur = 0
def __repr__(self):
return (f'<Reader cur={self.cur} tot={len(self.tokens)} '
f'cur_tok={self.peek()}>')
def peek(self) -> Optional[Token]:
"""Peek at the current token."""
try:
token = self.tokens[self.cur]
return token
except IndexError:
return None
def next(self) -> Optional[Token]:
"""Fetch the current token then skip to the next one."""
token = self.peek()
self.cur += 1
return token
def expect(self, token_type: TokenType) -> Token:
"""Check for a specific token type and error if it fails"""
token = self.next()
if token.type_ != token_type:
raise ParseError(f'Expected {token_type}, got '
f'{token.type_} {token.value!r}')
return token
def expect_val(self, value: str) -> Token:
"""Check the next token to see if it matches against a given value,
instead of a type."""
token = self.next()
if token.value != value:
raise ParseError(f'Expected {value!r}, got '
f'{token.type_} {token.value!r}')
return token
def next_safe(self) -> Token:
"""'Safe' version of next().
Raises an 'Unexpected EOF' error if next() returns None.
"""
token = self.next()
if token is None:
raise ParseError('Unexpected EOF')
return token
def ignore(self, token_type: TokenType):
"""Only increase self.cur if token_type is the upcoming token."""
try:
assert self.tokens[self.cur].type_ == token_type
self.cur += 1
except AssertionError:
pass
def _fn_read_args(reader: Reader, cur: List = None) -> List:
"""Recursively read the arguments of the function."""
if cur is None:
cur = []
# it can be an identifier for the arguments' type, OR a RPAREN
# if it is rparen, we stop
# if it isnt, we keep going until that happens
token = reader.next_safe()
if token.value == ')':
return cur
argtype = token
reader.expect(TokenType.whitespace)
argname = reader.next_safe()
cur.append(TypedVar(argtype.value, argname.value))
return _fn_read_args(reader, cur)
def _fn_ret_type(reader: Reader) -> ReturnType:
"""Fetch the return type of a function. Defaults to void."""
try:
reader.expect_val('->')
except ParseError:
return ReturnType('void')
reader.ignore(TokenType.whitespace)
token = reader.expect(TokenType.identifier)
return ReturnType(token.value)
def read_function(reader: Reader):
"""Read a function block."""
reader.expect(TokenType.whitespace)
token = reader.next()
fn_name = '_anonymous'
fn_args = []
print('function token', token)
if token.type_ == TokenType.identifier:
fn_name = token.value
reader.expect(TokenType.whitespace)
reader.expect_val('(')
fn_args = _fn_read_args(reader)
reader.expect(TokenType.whitespace)
fn_ret_type = _fn_ret_type(reader)
# only skip whitespace if we see it
reader.ignore(TokenType.whitespace)
block = read_start(reader)
elif token.value == '(':
fn_args = _fn_read_args(reader)
fn_ret_type = _fn_ret_type(reader)
block = read_start(reader)
print('final function', fn_name, fn_args, fn_ret_type, block)
return Function(fn_name, fn_args, fn_ret_type, block)
def read_import(reader):
"""Read an import"""
reader.expect(TokenType.whitespace)
module = reader.next_safe()
return Import(module.value)
HANDLERS = {
'fn': read_function,
'import': read_import,
}
def read_reserved(token: Token, reader: Reader):
"""Read reserved statements."""
try:
handler = HANDLERS[token.value]
except KeyError:
raise ParseError(f'Unexpected reserved word {token.value!r}')
return handler(reader)
def read_value(token: Token, _reader: Reader):
"""Read a given value"""
if token.type_ == TokenType.string:
return String(token.value)
elif token.type_ == TokenType.number:
return Number(token.value)
def read_statement(token: Token, reader: Reader):
"""Read a statement"""
# token is an identifier, so first check for a function call
# TODO: handle more things than a function call
call_fn_name = token.value
token = reader.expect_val('(')
res = []
while True:
token = reader.next_safe()
if token.value == ')':
break
res.append(read_value(token, reader))
return FunctionCall(call_fn_name, res)
def read_start(reader: Reader):
"""Read the start of a program."""
print('reader', reader)
token = reader.next()
if token is None:
print('eof!')
return None
ast = []
res = []
# handle blocks
if token.value == '{':
# next can be a whitespace, or a }
token = reader.next()
print('block start!, next:', token)
if token.type_ == TokenType.whitespace:
# keep going on reading
while True:
token = reader.peek()
print('block append', token)
if token.value == '}':
print('block end')
reader.next()
break
res.extend(read_start(reader))
elif token.value == '}':
res = []
# import, fn, etc
elif token.type_ == TokenType.reserved:
res = read_reserved(token, reader)
elif token.type_ == TokenType.comment:
return []
elif token.type_ == TokenType.identifier:
res = read_statement(token, reader)
else:
res = read_value(token, reader)
ast.append(res)
return ast
def read_loop(reader: Reader):
"""Read the AST."""
final_ast = []
while True:
ast = read_start(reader)
# break when eof
if ast is None:
break
# TODO: better ast cleanup
final_ast.append(ast)
return final_ast
def syntatic(tokens: List[Token]):
"""Create an AST out of the tokens."""
return read_loop(Reader(tokens))

View file

@ -1,14 +0,0 @@
from setuptools import setup
setup(
name='jortsc',
version='0.1',
py_modules=['jortsc'],
install_requires=[
'lark-parser==0.6.7'
],
entry_points='''
[console_scripts]
jortsc=jortsc:main
'''
)

5
src/main.zig Normal file
View file

@ -0,0 +1,5 @@
const std = @import("std");
pub fn main() anyerror!void {
std.debug.warn("All your base are belong to us.\n");
}