add basic lexer

also a draft grammar that i wont use probably
This commit is contained in:
Luna 2019-03-08 23:43:17 -03:00
parent 3c983b004c
commit 588b63fabe
6 changed files with 183 additions and 50 deletions

158
.gitignore vendored
View File

@ -1,54 +1,116 @@
# ---> C # Byte-compiled / optimized / DLL files
# Prerequisites __pycache__/
*.d *.py[cod]
*$py.class
# Object files # C extensions
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so *.so
*.so.*
*.dylib
# Executables # Distribution / packaging
*.exe .Python
*.out build/
*.app develop-eggs/
*.i*86 dist/
*.x86_64 downloads/
*.hex eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Debug files # PyInstaller
*.dSYM/ # Usually these files are written by a python script from a template
*.su # before PyInstaller builds the exe, so as to inject date/other infos into it.
*.idb *.manifest
*.pdb *.spec
# Kernel Module Compile Results # Installer logs
*.mod* pip-log.txt
*.cmd pip-delete-this-directory.txt
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/

0
jortsc/__init__.py Normal file
View File

10
jortsc/grammar Normal file
View File

@ -0,0 +1,10 @@
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
integer = ['-' | '+'] "0" digit {digit} ;
hex_letters = "a" | "b" | "c" | "d" | "e" | "f"
hex_integer = "0x", {hex_letters | digit} ;
oct_digits = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ;
octal_integer = "0o", {oct_digits} ;
program =

View File

@ -1,6 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
import sys import sys
from parser.lexer import lex_jorts
def main(): def main():
"""main entry point""" """main entry point"""
@ -9,8 +10,7 @@ def main():
except EOFError: except EOFError:
pass pass
# TODO: lol print(lex_jorts(in_data))
print(in_data)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

61
jortsc/parser/lexer.py Normal file
View File

@ -0,0 +1,61 @@
import re
import enum
class TokenType(enum.Enum):
"""Defines the type of a token"""
RESERVED = enum.auto()
IDENTIFIER = enum.auto()
class LexerError(Exception):
"""Lexer error."""
pass
TOKENS = [
(r'fn', TokenType.RESERVED),
]
def lex(string: str, token_defs: list) -> list:
"""Generate tokens out of the given string."""
pos = 0
strlen = len(string)
tokens = []
# generate a dict for compiled regexes out of the token defs
# instead of compiling on each token definition per token.
compiled = {pattern: re.compile(pattern)
for pattern, _ in token_defs}
while pos < strlen:
valid = False
for definition in token_defs:
pattern, tok_type = definition
regex = compiled[pattern]
match = regex.match(string, pos)
if not match:
continue
text = match.group(0)
pos = match.end(0)
valid = True
tokens.append((text, tok_type))
# go to next token instead
break
if not valid:
raise LexerError(f'Invalid character: {string[pos]}')
return tokens
def lex_jorts(string: str) -> list:
"""Lex with the jorts token definitions"""
return lex(string, TOKENS)