2017-11-04 14:43:40 +00:00
|
|
|
require "compiler/crystal/syntax/*"
|
|
|
|
|
|
|
|
module Ameba
|
2017-11-15 18:49:09 +00:00
|
|
|
# Represents Crystal syntax tokenizer based on `Crystal::Lexer`.
|
|
|
|
#
|
|
|
|
# ```
|
|
|
|
# source = Ameba::Source.new code, path
|
|
|
|
# tokenizer = Ameba::Tokenizer.new(source)
|
|
|
|
# tokenizer.run do |token|
|
|
|
|
# puts token
|
|
|
|
# end
|
|
|
|
# ```
|
2017-11-04 14:43:40 +00:00
|
|
|
class Tokenizer
|
2017-11-15 18:49:09 +00:00
|
|
|
# Instantiates Tokenizer using a `source`.
|
|
|
|
#
|
|
|
|
# ```
|
|
|
|
# source = Ameba::Source.new code, path
|
|
|
|
# Ameba::Tokenizer.new(source)
|
|
|
|
# ```
|
2017-11-04 14:43:40 +00:00
|
|
|
def initialize(source)
|
2017-11-07 20:02:51 +00:00
|
|
|
@lexer = Crystal::Lexer.new source.code
|
2017-11-04 14:43:40 +00:00
|
|
|
@lexer.count_whitespace = true
|
|
|
|
@lexer.comments_enabled = true
|
|
|
|
@lexer.wants_raw = true
|
|
|
|
@lexer.filename = source.path
|
|
|
|
end
|
|
|
|
|
2018-01-31 17:46:44 +00:00
|
|
|
# Instantiates Tokenizer using a `lexer`.
|
|
|
|
#
|
|
|
|
# ```
|
|
|
|
# lexer = Crystal::Lexer.new(code)
|
|
|
|
# Ameba::Tokenizer.new(lexer)
|
|
|
|
# ```
|
|
|
|
def initialize(@lexer : Crystal::Lexer)
|
|
|
|
end
|
|
|
|
|
2017-11-15 18:49:09 +00:00
|
|
|
# Runs the tokenizer and yields each token as a block argument.
|
|
|
|
#
|
|
|
|
# ```
|
|
|
|
# Ameba::Tokenizer.new(source).run do |token|
|
|
|
|
# puts token
|
|
|
|
# end
|
|
|
|
# ```
|
2017-11-04 14:43:40 +00:00
|
|
|
def run(&block : Crystal::Token -> _)
|
|
|
|
run_normal_state @lexer, &block
|
|
|
|
true
|
|
|
|
rescue e : Crystal::SyntaxException
|
|
|
|
# puts e
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
2021-01-11 18:13:58 +00:00
|
|
|
private def run_normal_state(lexer, break_on_rcurly = false, &block : Crystal::Token -> _)
|
2017-11-27 13:35:15 +00:00
|
|
|
loop do
|
2017-11-04 14:43:40 +00:00
|
|
|
token = @lexer.next_token
|
2017-11-04 15:38:04 +00:00
|
|
|
block.call token
|
|
|
|
|
2017-11-04 14:43:40 +00:00
|
|
|
case token.type
|
2022-03-11 12:15:05 +00:00
|
|
|
when .delimiter_start?
|
2017-11-04 14:43:40 +00:00
|
|
|
run_delimiter_state lexer, token, &block
|
2022-03-11 12:15:05 +00:00
|
|
|
when .string_array_start?, .symbol_array_start?
|
2017-11-04 14:43:40 +00:00
|
|
|
run_array_state lexer, token, &block
|
2022-03-11 12:15:05 +00:00
|
|
|
when .eof?
|
2022-03-11 14:18:33 +00:00
|
|
|
break
|
|
|
|
when .op_rcurly?
|
2017-11-04 14:43:40 +00:00
|
|
|
break if break_on_rcurly
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private def run_delimiter_state(lexer, token, &block : Crystal::Token -> _)
|
2017-11-27 13:35:15 +00:00
|
|
|
loop do
|
2017-11-04 14:43:40 +00:00
|
|
|
token = @lexer.next_string_token(token.delimiter_state)
|
2017-11-04 15:38:04 +00:00
|
|
|
block.call token
|
|
|
|
|
2017-11-04 14:43:40 +00:00
|
|
|
case token.type
|
2022-03-11 12:15:05 +00:00
|
|
|
when .delimiter_end?
|
2017-11-04 14:43:40 +00:00
|
|
|
break
|
2022-03-11 12:15:05 +00:00
|
|
|
when .interpolation_start?
|
2017-11-04 14:43:40 +00:00
|
|
|
run_normal_state lexer, break_on_rcurly: true, &block
|
2022-03-11 12:15:05 +00:00
|
|
|
when .eof?
|
2017-11-04 14:43:40 +00:00
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private def run_array_state(lexer, token, &block : Crystal::Token -> _)
|
2017-11-27 13:35:15 +00:00
|
|
|
loop do
|
2017-11-04 14:43:40 +00:00
|
|
|
lexer.next_string_array_token
|
2017-11-04 15:38:04 +00:00
|
|
|
block.call token
|
2017-11-04 14:43:40 +00:00
|
|
|
|
|
|
|
case token.type
|
2022-03-11 12:15:05 +00:00
|
|
|
when .string_array_end?
|
2017-11-04 14:43:40 +00:00
|
|
|
break
|
2022-03-11 12:15:05 +00:00
|
|
|
when .eof?
|
2017-11-04 15:38:04 +00:00
|
|
|
break
|
2017-11-04 14:43:40 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|