add basic lexer

also a draft grammar that i wont use probably
2019-03-08 23:43:17 -03:00 · 2019-03-08 23:43:17 -03:00 · 588b63fabe
parent 3c983b004c
commit 588b63fabe
6 changed files with 183 additions and 50 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,54 +1,116 @@
-# ---> C
+# Byte-compiled / optimized / DLL files
-# Prerequisites
+__pycache__/
-*.d
+*.py[cod]
 *$py.class
-# Object files
+# C extensions
 *.o
 *.ko
 *.obj
 *.elf
 # Linker output
 *.ilk
 *.map
 *.exp
 # Precompiled Headers
 *.gch
 *.pch
 # Libraries
 *.lib
 *.a
 *.la
 *.lo
 # Shared objects (inc. Windows DLLs)
 *.dll
 *.so
 *.so.*
 *.dylib
-# Executables
+# Distribution / packaging
-*.exe
+.Python
-*.out
+build/
-*.app
+develop-eggs/
-*.i*86
+dist/
-*.x86_64
+downloads/
-*.hex
+eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 pip-wheel-metadata/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
-# Debug files
+# PyInstaller
-*.dSYM/
+#  Usually these files are written by a python script from a template
-*.su
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.idb
+*.manifest
-*.pdb
+*.spec
-# Kernel Module Compile Results
+# Installer logs
-*.mod*
+pip-log.txt
-*.cmd
+pip-delete-this-directory.txt
 .tmp_versions/
 modules.order
 Module.symvers
 Mkfile.old
 dkms.conf
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 .python-version
 # celery beat schedule file
 celerybeat-schedule
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
--- a/jortsc/init.py
+++ b/jortsc/init.py
--- a/jortsc/grammar
+++ b/jortsc/grammar
@ -0,0 +1,10 @@
 digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
 integer = ['-' | '+'] "0" digit {digit} ;
 hex_letters = "a" | "b" | "c" | "d" | "e" | "f"
 hex_integer = "0x", {hex_letters | digit} ;
 oct_digits = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ;
 octal_integer = "0o", {oct_digits} ;
 program = 
--- a/jortsc/main.py
+++ b/jortsc/main.py
@ -1,6 +1,7 @@
 #!/usr/bin/python3
 import sys
 from parser.lexer import lex_jorts
 def main():
    """main entry point"""
@ -9,8 +10,7 @@ def main():
    except EOFError:
        pass
-    # TODO: lol
+    print(lex_jorts(in_data))
    print(in_data)
 if __name__ == '__main__':
    main()
--- a/jortsc/parser/init.py
+++ b/jortsc/parser/init.py
--- a/jortsc/parser/lexer.py
+++ b/jortsc/parser/lexer.py
@ -0,0 +1,61 @@
 import re
 import enum
 class TokenType(enum.Enum):
    """Defines the type of a token"""
    RESERVED = enum.auto()
    IDENTIFIER = enum.auto()
 class LexerError(Exception):
    """Lexer error."""
    pass
 TOKENS = [
    (r'fn', TokenType.RESERVED),
 ]
 def lex(string: str, token_defs: list) -> list:
    """Generate tokens out of the given string."""
    pos = 0
    strlen = len(string)
    tokens = []
    # generate a dict for compiled regexes out of the token defs
    # instead of compiling on each token definition per token.
    compiled = {pattern: re.compile(pattern)
                for pattern, _ in token_defs}
    while pos < strlen:
        valid = False
        for definition in token_defs:
            pattern, tok_type = definition
            regex = compiled[pattern]
            match = regex.match(string, pos)
            if not match:
                continue
            text = match.group(0)
            pos = match.end(0)
            valid = True
            tokens.append((text, tok_type))
            # go to next token instead
            break
        if not valid:
            raise LexerError(f'Invalid character: {string[pos]}')
    return tokens
 def lex_jorts(string: str) -> list:
    """Lex with the jorts token definitions"""
    return lex(string, TOKENS)