From 7ce0565de703ef36020f59108923da5c76d72a1e Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 10 Mar 2019 01:55:12 -0300 Subject: [PATCH] add basics of a handwritten parser hell yeah i'm going down that path lark made confusing stuff, i'll probably get more confused with a handwritten one, but oh well, such is life --- jortsc/main.py | 6 +-- jortsc/parser/lexer.py | 10 ++++- jortsc/parser/parser.py | 4 +- jortsc/parser/syntatic.py | 91 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 6 deletions(-) create mode 100644 jortsc/parser/syntatic.py diff --git a/jortsc/main.py b/jortsc/main.py index 46587fa..16d1ec2 100644 --- a/jortsc/main.py +++ b/jortsc/main.py @@ -5,7 +5,7 @@ import pprint import logging from jortsc.parser.lexer import lex_jorts -from jortsc.parser.parser import parse +from jortsc.parser.syntatic import syntatic logging.basicConfig(level=logging.DEBUG) @@ -18,10 +18,10 @@ def main(): tokens = lex_jorts(in_data) pprint.pprint(tokens) - print([t[0] for t in tokens]) - tree = parse(in_data) + tree = syntatic(tokens) print(tree) + if __name__ == '__main__': main() diff --git a/jortsc/parser/lexer.py b/jortsc/parser/lexer.py index 3a35a59..9405713 100644 --- a/jortsc/parser/lexer.py +++ b/jortsc/parser/lexer.py @@ -1,4 +1,6 @@ import re + +from dataclasses import dataclass from enum import Enum, auto @@ -13,6 +15,12 @@ class TokenType(Enum): number = auto() +@dataclass +class Token: + value: str + type_: TokenType + + class LexerError(Exception): """Lexer error.""" pass @@ -84,7 +92,7 @@ def lex(string: str, token_defs: list) -> list: pos = match.end(0) valid = True - tokens.append((text, tok_type)) + tokens.append(Token(text, tok_type)) # go to next token instead of checking other # definitions for tokens, e.g if its a reserved token diff --git a/jortsc/parser/parser.py b/jortsc/parser/parser.py index 47410a2..7333bd6 100644 --- a/jortsc/parser/parser.py +++ b/jortsc/parser/parser.py @@ -7,7 +7,7 @@ IMPORT: "import" COMMA: "," DOT: "." SINGLE_COMMENT: "//" -NEWLINE: /[ \\n\\t]+/ +NEWLINE: /(\\r?\\n)+\\s*/ ANY: /.+/ WHITESPACE: " " INTEGER: /[0-9]+/ @@ -39,6 +39,6 @@ start: (NEWLINE | stmt)* """ def parse(string: str): - """Parse""" + """Parse using Lark""" parser = Lark(GRAMMAR, parser='lalr', debug=True) return parser.parse(string) diff --git a/jortsc/parser/syntatic.py b/jortsc/parser/syntatic.py new file mode 100644 index 0000000..f7a3858 --- /dev/null +++ b/jortsc/parser/syntatic.py @@ -0,0 +1,91 @@ +from typing import Optional, Any, List + +from jortsc.parser.lexer import Token, TokenType + + +class ParseError(Exception): + """Represents a parse error.""" + pass + + +class Reader: + """Main reader class""" + def __init__(self, tokens: List[Token]): + self.tokens = tokens + self.cur = 0 + + def next(self) -> Optional[Token]: + """Fetch the current token then skip to the next one.""" + try: + token = self.tokens[self.cur] + except IndexError: + return None + + self.cur += 1 + return token + + +def _fn_read_args(reader: Reader, cur: List = None) -> List: + """Recursively read the arguments of the function.""" + if cur is None: + cur = [] + + token = reader.next() + + if token.value == ')': + return cur + + argtype, argname = reader.next(), reader.next() + cur.append((argtype, argname)) + return _fn_read_args(reader, cur) + + +def read_function(reader: Reader): + """Read a function block.""" + token = reader.next() + + if token.type_ == TokenType.whitespace: + pass + else: + raise ParseError('Expected whitespace') + + token = reader.next() + + fn_name = '_anonymous' + fn_args = [] + + if token.type_ == TokenType.identifier: + fn_name = token.value + fn_args = _fn_read_args(reader) + block = read_start(reader) + elif token.value == '(': + fn_args = _fn_read_args(reader) + block = read_start(reader) + + return (fn_name, fn_args, block) + + +def read_reserved(token: Token, reader: Reader): + """Read reserved statements.""" + if token.value == 'fn': + return read_function(reader) + + +def read_start(reader: Reader): + """Read the start of a program.""" + token = reader.next() + ast = [] + res = [] + + print('cur', token) + + if token.type_ == TokenType.reserved: + res = read_reserved(token, reader) + + ast.extend(res) + return ast + + +def syntatic(tokens: List[Token]): + """Create an AST out of the tokens.""" + return read_start(Reader(tokens))