add basic lexer
also a draft grammar that i wont use probably
This commit is contained in:
parent
3c983b004c
commit
588b63fabe
6 changed files with 183 additions and 50 deletions
158
.gitignore
vendored
158
.gitignore
vendored
|
@ -1,54 +1,116 @@
|
|||
# ---> C
|
||||
# Prerequisites
|
||||
*.d
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# Object files
|
||||
*.o
|
||||
*.ko
|
||||
*.obj
|
||||
*.elf
|
||||
|
||||
# Linker output
|
||||
*.ilk
|
||||
*.map
|
||||
*.exp
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Libraries
|
||||
*.lib
|
||||
*.a
|
||||
*.la
|
||||
*.lo
|
||||
|
||||
# Shared objects (inc. Windows DLLs)
|
||||
*.dll
|
||||
# C extensions
|
||||
*.so
|
||||
*.so.*
|
||||
*.dylib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
*.i*86
|
||||
*.x86_64
|
||||
*.hex
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# Debug files
|
||||
*.dSYM/
|
||||
*.su
|
||||
*.idb
|
||||
*.pdb
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Kernel Module Compile Results
|
||||
*.mod*
|
||||
*.cmd
|
||||
.tmp_versions/
|
||||
modules.order
|
||||
Module.symvers
|
||||
Mkfile.old
|
||||
dkms.conf
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
|
0
jortsc/__init__.py
Normal file
0
jortsc/__init__.py
Normal file
10
jortsc/grammar
Normal file
10
jortsc/grammar
Normal file
|
@ -0,0 +1,10 @@
|
|||
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
|
||||
integer = ['-' | '+'] "0" digit {digit} ;
|
||||
|
||||
hex_letters = "a" | "b" | "c" | "d" | "e" | "f"
|
||||
hex_integer = "0x", {hex_letters | digit} ;
|
||||
|
||||
oct_digits = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ;
|
||||
octal_integer = "0o", {oct_digits} ;
|
||||
|
||||
program =
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import sys
|
||||
from parser.lexer import lex_jorts
|
||||
|
||||
def main():
|
||||
"""main entry point"""
|
||||
|
@ -9,8 +10,7 @@ def main():
|
|||
except EOFError:
|
||||
pass
|
||||
|
||||
# TODO: lol
|
||||
print(in_data)
|
||||
print(lex_jorts(in_data))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
0
jortsc/parser/__init__.py
Normal file
0
jortsc/parser/__init__.py
Normal file
61
jortsc/parser/lexer.py
Normal file
61
jortsc/parser/lexer.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
import re
|
||||
import enum
|
||||
|
||||
|
||||
class TokenType(enum.Enum):
|
||||
"""Defines the type of a token"""
|
||||
RESERVED = enum.auto()
|
||||
IDENTIFIER = enum.auto()
|
||||
|
||||
|
||||
class LexerError(Exception):
|
||||
"""Lexer error."""
|
||||
pass
|
||||
|
||||
|
||||
TOKENS = [
|
||||
(r'fn', TokenType.RESERVED),
|
||||
]
|
||||
|
||||
|
||||
def lex(string: str, token_defs: list) -> list:
|
||||
"""Generate tokens out of the given string."""
|
||||
pos = 0
|
||||
strlen = len(string)
|
||||
tokens = []
|
||||
|
||||
# generate a dict for compiled regexes out of the token defs
|
||||
# instead of compiling on each token definition per token.
|
||||
compiled = {pattern: re.compile(pattern)
|
||||
for pattern, _ in token_defs}
|
||||
|
||||
while pos < strlen:
|
||||
valid = False
|
||||
|
||||
for definition in token_defs:
|
||||
pattern, tok_type = definition
|
||||
regex = compiled[pattern]
|
||||
|
||||
match = regex.match(string, pos)
|
||||
|
||||
if not match:
|
||||
continue
|
||||
|
||||
text = match.group(0)
|
||||
pos = match.end(0)
|
||||
|
||||
valid = True
|
||||
tokens.append((text, tok_type))
|
||||
|
||||
# go to next token instead
|
||||
break
|
||||
|
||||
if not valid:
|
||||
raise LexerError(f'Invalid character: {string[pos]}')
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def lex_jorts(string: str) -> list:
|
||||
"""Lex with the jorts token definitions"""
|
||||
return lex(string, TOKENS)
|
Loading…
Reference in a new issue