diff --git a/.gitignore b/.gitignore index 0447b8b..3cef7be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,116 +1 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +zig-cache/ diff --git a/README.md b/README.md index 9b3bc4e..2168721 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,5 @@ # jorts -jorts programming language +an interpreter for the lox language from https://craftinginterpreters.com -## installing - -```sh -git clone https://gitdab.com/luna/jorts -cd jorts -pip install --user --editable . -``` - -## using - -right now, its not pretty, nor finished - -``` -cat examples/hello.jt | jortsc -``` +this is a learning project. diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..371246c --- /dev/null +++ b/build.zig @@ -0,0 +1,15 @@ +const Builder = @import("std").build.Builder; + +pub fn build(b: *Builder) void { + const mode = b.standardReleaseOptions(); + const exe = b.addExecutable("jorts", "src/main.zig"); + exe.setBuildMode(mode); + + const run_cmd = exe.run(); + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + b.default_step.dependOn(&exe.step); + b.installArtifact(exe); +} diff --git a/jortsc/__init__.py b/jortsc/__init__.py deleted file mode 100644 index da3f54a..0000000 --- a/jortsc/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .main import main - -__all__ = ['main'] diff --git a/jortsc/main.py b/jortsc/main.py deleted file mode 100644 index a1b1820..0000000 --- a/jortsc/main.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/python3 - -import sys -import pprint -import logging - -from jortsc.parser.lexer import lex_jorts -# from jortsc.parser.parser import parse - -logging.basicConfig(level=logging.DEBUG) - -def main(): - """main entry point""" - try: - in_data = sys.stdin.read().strip() - except EOFError: - pass - - print(repr(in_data)) - tokens = lex_jorts(in_data) - pprint.pprint(tokens) - - -if __name__ == '__main__': - main() diff --git a/jortsc/parser/__init__.py b/jortsc/parser/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/jortsc/parser/ast_nodes.py b/jortsc/parser/ast_nodes.py deleted file mode 100644 index bff1c05..0000000 --- a/jortsc/parser/ast_nodes.py +++ /dev/null @@ -1,45 +0,0 @@ -from dataclasses import dataclass - -@dataclass -class TypedVar: - type_: str - name: str - - -@dataclass -class ReturnType: - type_: str - - -@dataclass -class Function: - name: str - arguments: str - ret_type: ReturnType - block: list - - -@dataclass -class Identifier: - name: str - - -@dataclass -class Import: - module: str - - -@dataclass -class String: - value: str - - -@dataclass -class Number: - value: str - - -@dataclass -class FunctionCall: - function: str - args: list diff --git a/jortsc/parser/lexer.py b/jortsc/parser/lexer.py deleted file mode 100644 index 7d61ff7..0000000 --- a/jortsc/parser/lexer.py +++ /dev/null @@ -1,112 +0,0 @@ -import re - -from dataclasses import dataclass -from enum import Enum, auto - - -class TokenType(Enum): - """Defines the type of a token""" - reserved = auto() - identifier = auto() - comment = auto() - comment_start = auto() - comment_end = auto() - whitespace = auto() - number = auto() - string = auto() - - -@dataclass -class Token: - value: str - type_: TokenType - - -class LexerError(Exception): - """Lexer error.""" - pass - - -TOKENS = [ - (r'[ \n\t]+', TokenType.whitespace), - - # single line comments and multiline comments - (r'//[^\n]*', TokenType.comment), - - # TODO: shouldnt this be /* */ instead of - # only tokenizing on the start and end? - (r'/\*', TokenType.comment_start), - (r'\*/', TokenType.comment_end), - - (r'fn', TokenType.reserved), - (r'if', TokenType.reserved), - (r'import', TokenType.reserved), - - (r'\(', TokenType.reserved), - (r'\)', TokenType.reserved), - - (r'\{', TokenType.reserved), - (r'\}', TokenType.reserved), - - (r'\-\>', TokenType.reserved), - (r'\.', TokenType.reserved), - - (r'\"[^\n]*\"', TokenType.string), - - # basic math ops - (r'[\+\-\/\*]', TokenType.reserved), - - (r'[0-9]+', TokenType.number), - (r'[A-Za-z][A-Za-z0-9_]*', TokenType.identifier) -] - - -def lex(string: str, token_defs: list) -> list: - """Generate tokens out of the given string.""" - pos = 0 - strlen = len(string) - tokens = [] - - # generate a dict for compiled regexes out of the token defs - # instead of compiling on each token definition per token. - compiled = {pattern: re.compile(pattern) - for pattern, _ in token_defs} - - # we use this instead of for pos in range(len(string)) because we - # need to increment pos to a whole token length's, and that wouldn't - # be easy on a for .. in range(..) - while pos < strlen: - valid = False - - for definition in token_defs: - pattern, tok_type = definition - regex = compiled[pattern] - - match = regex.match(string, pos) - - if not match: - continue - - text = match.group(0) - - # update pos to the end of the token - pos = match.end(0) - - valid = True - tokens.append(Token(text, tok_type)) - - # go to next token instead of checking other - # definitions for tokens, e.g if its a reserved token - # we shouldn't go down the path of an identifier. - break - - if not valid: - print(f'context: {pos} {len(string)} {string[pos-1:pos+20]!r}') - raise LexerError(f'Invalid character: {string[pos]!r}') - - return tokens - - -def lex_jorts(string: str) -> list: - """Lex with the jorts token definitions""" - return lex(string, TOKENS) diff --git a/jortsc/parser/syntatic.py b/jortsc/parser/syntatic.py deleted file mode 100644 index 7e9bc62..0000000 --- a/jortsc/parser/syntatic.py +++ /dev/null @@ -1,272 +0,0 @@ -from typing import Optional, Any, List - -from jortsc.parser.lexer import Token, TokenType -from jortsc.parser.ast_nodes import ( - Function, TypedVar, Identifier, Import, ReturnType, String, Number, - FunctionCall -) - - -class ParseError(Exception): - """Represents a parse error.""" - pass - - -class Reader: - """Main reader class""" - def __init__(self, tokens: List[Token]): - self.tokens = tokens - self.cur = 0 - - def __repr__(self): - return (f'') - - def peek(self) -> Optional[Token]: - """Peek at the current token.""" - try: - token = self.tokens[self.cur] - return token - except IndexError: - return None - - def next(self) -> Optional[Token]: - """Fetch the current token then skip to the next one.""" - token = self.peek() - self.cur += 1 - return token - - def expect(self, token_type: TokenType) -> Token: - """Check for a specific token type and error if it fails""" - token = self.next() - - if token.type_ != token_type: - raise ParseError(f'Expected {token_type}, got ' - f'{token.type_} {token.value!r}') - - return token - - def expect_val(self, value: str) -> Token: - """Check the next token to see if it matches against a given value, - instead of a type.""" - token = self.next() - - if token.value != value: - raise ParseError(f'Expected {value!r}, got ' - f'{token.type_} {token.value!r}') - - return token - - - def next_safe(self) -> Token: - """'Safe' version of next(). - - Raises an 'Unexpected EOF' error if next() returns None. - """ - token = self.next() - - if token is None: - raise ParseError('Unexpected EOF') - - return token - - def ignore(self, token_type: TokenType): - """Only increase self.cur if token_type is the upcoming token.""" - try: - assert self.tokens[self.cur].type_ == token_type - self.cur += 1 - except AssertionError: - pass - - -def _fn_read_args(reader: Reader, cur: List = None) -> List: - """Recursively read the arguments of the function.""" - if cur is None: - cur = [] - - # it can be an identifier for the arguments' type, OR a RPAREN - # if it is rparen, we stop - # if it isnt, we keep going until that happens - token = reader.next_safe() - - if token.value == ')': - return cur - - argtype = token - reader.expect(TokenType.whitespace) - argname = reader.next_safe() - - cur.append(TypedVar(argtype.value, argname.value)) - return _fn_read_args(reader, cur) - - -def _fn_ret_type(reader: Reader) -> ReturnType: - """Fetch the return type of a function. Defaults to void.""" - try: - reader.expect_val('->') - except ParseError: - return ReturnType('void') - - reader.ignore(TokenType.whitespace) - token = reader.expect(TokenType.identifier) - return ReturnType(token.value) - - -def read_function(reader: Reader): - """Read a function block.""" - reader.expect(TokenType.whitespace) - - token = reader.next() - - fn_name = '_anonymous' - fn_args = [] - - print('function token', token) - - if token.type_ == TokenType.identifier: - fn_name = token.value - - reader.expect(TokenType.whitespace) - reader.expect_val('(') - - fn_args = _fn_read_args(reader) - - reader.expect(TokenType.whitespace) - fn_ret_type = _fn_ret_type(reader) - - # only skip whitespace if we see it - reader.ignore(TokenType.whitespace) - block = read_start(reader) - elif token.value == '(': - fn_args = _fn_read_args(reader) - fn_ret_type = _fn_ret_type(reader) - block = read_start(reader) - - print('final function', fn_name, fn_args, fn_ret_type, block) - - return Function(fn_name, fn_args, fn_ret_type, block) - - -def read_import(reader): - """Read an import""" - reader.expect(TokenType.whitespace) - module = reader.next_safe() - return Import(module.value) - - -HANDLERS = { - 'fn': read_function, - 'import': read_import, -} - - -def read_reserved(token: Token, reader: Reader): - """Read reserved statements.""" - try: - handler = HANDLERS[token.value] - except KeyError: - raise ParseError(f'Unexpected reserved word {token.value!r}') - - return handler(reader) - - -def read_value(token: Token, _reader: Reader): - """Read a given value""" - if token.type_ == TokenType.string: - return String(token.value) - elif token.type_ == TokenType.number: - return Number(token.value) - - -def read_statement(token: Token, reader: Reader): - """Read a statement""" - # token is an identifier, so first check for a function call - - # TODO: handle more things than a function call - call_fn_name = token.value - token = reader.expect_val('(') - res = [] - - while True: - token = reader.next_safe() - - if token.value == ')': - break - - res.append(read_value(token, reader)) - - return FunctionCall(call_fn_name, res) - - -def read_start(reader: Reader): - """Read the start of a program.""" - print('reader', reader) - - token = reader.next() - - if token is None: - print('eof!') - return None - - ast = [] - res = [] - - # handle blocks - if token.value == '{': - # next can be a whitespace, or a } - token = reader.next() - - print('block start!, next:', token) - - if token.type_ == TokenType.whitespace: - # keep going on reading - while True: - token = reader.peek() - print('block append', token) - - if token.value == '}': - print('block end') - reader.next() - break - - res.extend(read_start(reader)) - elif token.value == '}': - res = [] - - # import, fn, etc - elif token.type_ == TokenType.reserved: - res = read_reserved(token, reader) - - elif token.type_ == TokenType.comment: - return [] - - elif token.type_ == TokenType.identifier: - res = read_statement(token, reader) - else: - res = read_value(token, reader) - - ast.append(res) - return ast - - -def read_loop(reader: Reader): - """Read the AST.""" - final_ast = [] - - while True: - ast = read_start(reader) - - # break when eof - if ast is None: - break - - # TODO: better ast cleanup - - final_ast.append(ast) - - return final_ast - - -def syntatic(tokens: List[Token]): - """Create an AST out of the tokens.""" - return read_loop(Reader(tokens)) diff --git a/setup.py b/setup.py deleted file mode 100644 index 45d4212..0000000 --- a/setup.py +++ /dev/null @@ -1,14 +0,0 @@ -from setuptools import setup - -setup( - name='jortsc', - version='0.1', - py_modules=['jortsc'], - install_requires=[ - 'lark-parser==0.6.7' - ], - entry_points=''' - [console_scripts] - jortsc=jortsc:main - ''' -) diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..128820d --- /dev/null +++ b/src/main.zig @@ -0,0 +1,5 @@ +const std = @import("std"); + +pub fn main() anyerror!void { + std.debug.warn("All your base are belong to us.\n"); +}