add basic lexer

also a draft grammar that i wont use probably
This commit is contained in:
Luna 2019-03-08 23:43:17 -03:00
parent 3c983b004c
commit 588b63fabe
6 changed files with 183 additions and 50 deletions

158
.gitignore vendored
View File

@ -1,54 +1,116 @@
# ---> C
# Prerequisites
*.d
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
# C extensions
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/

0
jortsc/__init__.py Normal file
View File

10
jortsc/grammar Normal file
View File

@ -0,0 +1,10 @@
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
integer = ['-' | '+'] "0" digit {digit} ;
hex_letters = "a" | "b" | "c" | "d" | "e" | "f"
hex_integer = "0x", {hex_letters | digit} ;
oct_digits = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ;
octal_integer = "0o", {oct_digits} ;
program =

View File

@ -1,6 +1,7 @@
#!/usr/bin/python3
import sys
from parser.lexer import lex_jorts
def main():
"""main entry point"""
@ -9,8 +10,7 @@ def main():
except EOFError:
pass
# TODO: lol
print(in_data)
print(lex_jorts(in_data))
if __name__ == '__main__':
main()

View File

61
jortsc/parser/lexer.py Normal file
View File

@ -0,0 +1,61 @@
import re
import enum
class TokenType(enum.Enum):
"""Defines the type of a token"""
RESERVED = enum.auto()
IDENTIFIER = enum.auto()
class LexerError(Exception):
"""Lexer error."""
pass
TOKENS = [
(r'fn', TokenType.RESERVED),
]
def lex(string: str, token_defs: list) -> list:
"""Generate tokens out of the given string."""
pos = 0
strlen = len(string)
tokens = []
# generate a dict for compiled regexes out of the token defs
# instead of compiling on each token definition per token.
compiled = {pattern: re.compile(pattern)
for pattern, _ in token_defs}
while pos < strlen:
valid = False
for definition in token_defs:
pattern, tok_type = definition
regex = compiled[pattern]
match = regex.match(string, pos)
if not match:
continue
text = match.group(0)
pos = match.end(0)
valid = True
tokens.append((text, tok_type))
# go to next token instead
break
if not valid:
raise LexerError(f'Invalid character: {string[pos]}')
return tokens
def lex_jorts(string: str) -> list:
"""Lex with the jorts token definitions"""
return lex(string, TOKENS)