from typing import Optional, Any, List from jortsc.parser.lexer import Token, TokenType from jortsc.parser.ast_nodes import ( Function, TypedVar, Identifier, Import, ReturnType, String, Number, FunctionCall ) class ParseError(Exception): """Represents a parse error.""" pass class Reader: """Main reader class""" def __init__(self, tokens: List[Token]): self.tokens = tokens self.cur = 0 def __repr__(self): return (f'') def peek(self) -> Optional[Token]: """Peek at the current token.""" try: token = self.tokens[self.cur] return token except IndexError: return None def next(self) -> Optional[Token]: """Fetch the current token then skip to the next one.""" token = self.peek() self.cur += 1 return token def expect(self, token_type: TokenType) -> Token: """Check for a specific token type and error if it fails""" token = self.next() if token.type_ != token_type: raise ParseError(f'Expected {token_type}, got ' f'{token.type_} {token.value!r}') return token def expect_val(self, value: str) -> Token: """Check the next token to see if it matches against a given value, instead of a type.""" token = self.next() if token.value != value: raise ParseError(f'Expected {value!r}, got ' f'{token.type_} {token.value!r}') return token def next_safe(self) -> Token: """'Safe' version of next(). Raises an 'Unexpected EOF' error if next() returns None. """ token = self.next() if token is None: raise ParseError('Unexpected EOF') return token def ignore(self, token_type: TokenType): """Only increase self.cur if token_type is the upcoming token.""" try: assert self.tokens[self.cur].type_ == token_type self.cur += 1 except AssertionError: pass def _fn_read_args(reader: Reader, cur: List = None) -> List: """Recursively read the arguments of the function.""" if cur is None: cur = [] # it can be an identifier for the arguments' type, OR a RPAREN # if it is rparen, we stop # if it isnt, we keep going until that happens token = reader.next_safe() if token.value == ')': return cur argtype = token reader.expect(TokenType.whitespace) argname = reader.next_safe() cur.append(TypedVar(argtype.value, argname.value)) return _fn_read_args(reader, cur) def _fn_ret_type(reader: Reader) -> ReturnType: """Fetch the return type of a function. Defaults to void.""" try: reader.expect_val('->') except ParseError: return ReturnType('void') reader.ignore(TokenType.whitespace) token = reader.expect(TokenType.identifier) return ReturnType(token.value) def read_function(reader: Reader): """Read a function block.""" reader.expect(TokenType.whitespace) token = reader.next() fn_name = '_anonymous' fn_args = [] print('function token', token) if token.type_ == TokenType.identifier: fn_name = token.value reader.expect(TokenType.whitespace) reader.expect_val('(') fn_args = _fn_read_args(reader) reader.expect(TokenType.whitespace) fn_ret_type = _fn_ret_type(reader) # only skip whitespace if we see it reader.ignore(TokenType.whitespace) block = read_start(reader) elif token.value == '(': fn_args = _fn_read_args(reader) fn_ret_type = _fn_ret_type(reader) block = read_start(reader) print('final function', fn_name, fn_args, fn_ret_type, block) return Function(fn_name, fn_args, fn_ret_type, block) def read_import(reader): """Read an import""" reader.expect(TokenType.whitespace) module = reader.next_safe() return Import(module.value) HANDLERS = { 'fn': read_function, 'import': read_import, } def read_reserved(token: Token, reader: Reader): """Read reserved statements.""" try: handler = HANDLERS[token.value] except KeyError: raise ParseError(f'Unexpected reserved word {token.value!r}') return handler(reader) def read_value(token: Token, _reader: Reader): """Read a given value""" if token.type_ == TokenType.string: return String(token.value) elif token.type_ == TokenType.number: return Number(token.value) def read_statement(token: Token, reader: Reader): """Read a statement""" # token is an identifier, so first check for a function call # TODO: handle more things than a function call call_fn_name = token.value token = reader.expect_val('(') res = [] while True: token = reader.next_safe() if token.value == ')': break res.append(read_value(token, reader)) return FunctionCall(call_fn_name, res) def read_start(reader: Reader): """Read the start of a program.""" print('reader', reader) token = reader.next() if token is None: print('eof!') return None ast = [] res = [] # handle blocks if token.value == '{': # next can be a whitespace, or a } token = reader.next() print('block start!, next:', token) if token.type_ == TokenType.whitespace: # keep going on reading while True: token = reader.peek() print('block append', token) if token.value == '}': print('block end') reader.next() break res.extend(read_start(reader)) elif token.value == '}': res = [] # import, fn, etc elif token.type_ == TokenType.reserved: res = read_reserved(token, reader) elif token.type_ == TokenType.comment: return [] elif token.type_ == TokenType.identifier: res = read_statement(token, reader) else: res = read_value(token, reader) ast.append(res) return ast def read_loop(reader: Reader): """Read the AST.""" final_ast = [] while True: ast = read_start(reader) # break when eof if ast is None: break # TODO: better ast cleanup final_ast.append(ast) return final_ast def syntatic(tokens: List[Token]): """Create an AST out of the tokens.""" return read_loop(Reader(tokens))