From 588b63fabeea9f4f8b05fac8d97ce02474252ea0 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 8 Mar 2019 23:43:17 -0300 Subject: [PATCH] add basic lexer also a draft grammar that i wont use probably --- .gitignore | 158 ++++++++++++++++++++++++++------------ jortsc/__init__.py | 0 jortsc/grammar | 10 +++ jortsc/main.py | 4 +- jortsc/parser/__init__.py | 0 jortsc/parser/lexer.py | 61 +++++++++++++++ 6 files changed, 183 insertions(+), 50 deletions(-) create mode 100644 jortsc/__init__.py create mode 100644 jortsc/grammar create mode 100644 jortsc/parser/__init__.py create mode 100644 jortsc/parser/lexer.py diff --git a/.gitignore b/.gitignore index cd531cf..0447b8b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,54 +1,116 @@ -# ---> C -# Prerequisites -*.d +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class -# Object files -*.o -*.ko -*.obj -*.elf - -# Linker output -*.ilk -*.map -*.exp - -# Precompiled Headers -*.gch -*.pch - -# Libraries -*.lib -*.a -*.la -*.lo - -# Shared objects (inc. Windows DLLs) -*.dll +# C extensions *.so -*.so.* -*.dylib -# Executables -*.exe -*.out -*.app -*.i*86 -*.x86_64 -*.hex +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST -# Debug files -*.dSYM/ -*.su -*.idb -*.pdb +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec -# Kernel Module Compile Results -*.mod* -*.cmd -.tmp_versions/ -modules.order -Module.symvers -Mkfile.old -dkms.conf +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/jortsc/__init__.py b/jortsc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jortsc/grammar b/jortsc/grammar new file mode 100644 index 0000000..43938ba --- /dev/null +++ b/jortsc/grammar @@ -0,0 +1,10 @@ +digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; +integer = ['-' | '+'] "0" digit {digit} ; + +hex_letters = "a" | "b" | "c" | "d" | "e" | "f" +hex_integer = "0x", {hex_letters | digit} ; + +oct_digits = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ; +octal_integer = "0o", {oct_digits} ; + +program = diff --git a/jortsc/main.py b/jortsc/main.py index e0c4e74..4e4b64f 100644 --- a/jortsc/main.py +++ b/jortsc/main.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 import sys +from parser.lexer import lex_jorts def main(): """main entry point""" @@ -9,8 +10,7 @@ def main(): except EOFError: pass - # TODO: lol - print(in_data) + print(lex_jorts(in_data)) if __name__ == '__main__': main() diff --git a/jortsc/parser/__init__.py b/jortsc/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jortsc/parser/lexer.py b/jortsc/parser/lexer.py new file mode 100644 index 0000000..bcd4fc7 --- /dev/null +++ b/jortsc/parser/lexer.py @@ -0,0 +1,61 @@ +import re +import enum + + +class TokenType(enum.Enum): + """Defines the type of a token""" + RESERVED = enum.auto() + IDENTIFIER = enum.auto() + + +class LexerError(Exception): + """Lexer error.""" + pass + + +TOKENS = [ + (r'fn', TokenType.RESERVED), +] + + +def lex(string: str, token_defs: list) -> list: + """Generate tokens out of the given string.""" + pos = 0 + strlen = len(string) + tokens = [] + + # generate a dict for compiled regexes out of the token defs + # instead of compiling on each token definition per token. + compiled = {pattern: re.compile(pattern) + for pattern, _ in token_defs} + + while pos < strlen: + valid = False + + for definition in token_defs: + pattern, tok_type = definition + regex = compiled[pattern] + + match = regex.match(string, pos) + + if not match: + continue + + text = match.group(0) + pos = match.end(0) + + valid = True + tokens.append((text, tok_type)) + + # go to next token instead + break + + if not valid: + raise LexerError(f'Invalid character: {string[pos]}') + + return tokens + + +def lex_jorts(string: str) -> list: + """Lex with the jorts token definitions""" + return lex(string, TOKENS)