add basics of a handwritten parser
hell yeah i'm going down that path lark made confusing stuff, i'll probably get more confused with a handwritten one, but oh well, such is life
This commit is contained in:
parent
9fda0b31c3
commit
7ce0565de7
4 changed files with 105 additions and 6 deletions
|
@ -5,7 +5,7 @@ import pprint
|
|||
import logging
|
||||
|
||||
from jortsc.parser.lexer import lex_jorts
|
||||
from jortsc.parser.parser import parse
|
||||
from jortsc.parser.syntatic import syntatic
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
|
@ -18,10 +18,10 @@ def main():
|
|||
|
||||
tokens = lex_jorts(in_data)
|
||||
pprint.pprint(tokens)
|
||||
print([t[0] for t in tokens])
|
||||
|
||||
tree = parse(in_data)
|
||||
tree = syntatic(tokens)
|
||||
print(tree)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
import re
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum, auto
|
||||
|
||||
|
||||
|
@ -13,6 +15,12 @@ class TokenType(Enum):
|
|||
number = auto()
|
||||
|
||||
|
||||
@dataclass
|
||||
class Token:
|
||||
value: str
|
||||
type_: TokenType
|
||||
|
||||
|
||||
class LexerError(Exception):
|
||||
"""Lexer error."""
|
||||
pass
|
||||
|
@ -84,7 +92,7 @@ def lex(string: str, token_defs: list) -> list:
|
|||
pos = match.end(0)
|
||||
|
||||
valid = True
|
||||
tokens.append((text, tok_type))
|
||||
tokens.append(Token(text, tok_type))
|
||||
|
||||
# go to next token instead of checking other
|
||||
# definitions for tokens, e.g if its a reserved token
|
||||
|
|
|
@ -7,7 +7,7 @@ IMPORT: "import"
|
|||
COMMA: ","
|
||||
DOT: "."
|
||||
SINGLE_COMMENT: "//"
|
||||
NEWLINE: /[ \\n\\t]+/
|
||||
NEWLINE: /(\\r?\\n)+\\s*/
|
||||
ANY: /.+/
|
||||
WHITESPACE: " "
|
||||
INTEGER: /[0-9]+/
|
||||
|
@ -39,6 +39,6 @@ start: (NEWLINE | stmt)*
|
|||
"""
|
||||
|
||||
def parse(string: str):
|
||||
"""Parse"""
|
||||
"""Parse using Lark"""
|
||||
parser = Lark(GRAMMAR, parser='lalr', debug=True)
|
||||
return parser.parse(string)
|
||||
|
|
91
jortsc/parser/syntatic.py
Normal file
91
jortsc/parser/syntatic.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
from typing import Optional, Any, List
|
||||
|
||||
from jortsc.parser.lexer import Token, TokenType
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Represents a parse error."""
|
||||
pass
|
||||
|
||||
|
||||
class Reader:
|
||||
"""Main reader class"""
|
||||
def __init__(self, tokens: List[Token]):
|
||||
self.tokens = tokens
|
||||
self.cur = 0
|
||||
|
||||
def next(self) -> Optional[Token]:
|
||||
"""Fetch the current token then skip to the next one."""
|
||||
try:
|
||||
token = self.tokens[self.cur]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
self.cur += 1
|
||||
return token
|
||||
|
||||
|
||||
def _fn_read_args(reader: Reader, cur: List = None) -> List:
|
||||
"""Recursively read the arguments of the function."""
|
||||
if cur is None:
|
||||
cur = []
|
||||
|
||||
token = reader.next()
|
||||
|
||||
if token.value == ')':
|
||||
return cur
|
||||
|
||||
argtype, argname = reader.next(), reader.next()
|
||||
cur.append((argtype, argname))
|
||||
return _fn_read_args(reader, cur)
|
||||
|
||||
|
||||
def read_function(reader: Reader):
|
||||
"""Read a function block."""
|
||||
token = reader.next()
|
||||
|
||||
if token.type_ == TokenType.whitespace:
|
||||
pass
|
||||
else:
|
||||
raise ParseError('Expected whitespace')
|
||||
|
||||
token = reader.next()
|
||||
|
||||
fn_name = '_anonymous'
|
||||
fn_args = []
|
||||
|
||||
if token.type_ == TokenType.identifier:
|
||||
fn_name = token.value
|
||||
fn_args = _fn_read_args(reader)
|
||||
block = read_start(reader)
|
||||
elif token.value == '(':
|
||||
fn_args = _fn_read_args(reader)
|
||||
block = read_start(reader)
|
||||
|
||||
return (fn_name, fn_args, block)
|
||||
|
||||
|
||||
def read_reserved(token: Token, reader: Reader):
|
||||
"""Read reserved statements."""
|
||||
if token.value == 'fn':
|
||||
return read_function(reader)
|
||||
|
||||
|
||||
def read_start(reader: Reader):
|
||||
"""Read the start of a program."""
|
||||
token = reader.next()
|
||||
ast = []
|
||||
res = []
|
||||
|
||||
print('cur', token)
|
||||
|
||||
if token.type_ == TokenType.reserved:
|
||||
res = read_reserved(token, reader)
|
||||
|
||||
ast.extend(res)
|
||||
return ast
|
||||
|
||||
|
||||
def syntatic(tokens: List[Token]):
|
||||
"""Create an AST out of the tokens."""
|
||||
return read_start(Reader(tokens))
|
Loading…
Reference in a new issue