Large numbers rule + tokenizer (#10)

This commit is contained in:
V. Elenhaupt 2017-11-04 16:43:40 +02:00 committed by GitHub
parent 3f2bcc56b9
commit 2f9ba27811
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 323 additions and 18 deletions

View file

@ -0,0 +1,118 @@
require "../../spec_helper"
private def it_transforms(number, expected)
it "transforms large number #{number}" do
s = Ameba::Source.new number
Ameba::Rules::LargeNumbers.new.catch(s).should_not be_valid
s.errors.first.message.should contain expected
end
end
module Ameba::Rules
subject = LargeNumbers.new
describe LargeNumbers do
it "passes if large number does not require underscore" do
s = Source.new %q(
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
16 17 18 19 20 30 40 50 60 70 80 90
100
1_000
10_000
100_000
200_000
300_000
400_000
500_000
600_000
700_000
800_000
900_000
1_000_000
-9_223_372_036_854_775_808
9_223_372_036_854_775_807
141_592_654
141_592_654.0
141_592_654.001
141_592_654.001_2
141_592_654.001_23
141_592_654.001_234
141_592_654.001_234_5
0b1101
0o123
0xFE012D
0xfe012d
0xfe012dd11
1_i8
12_i16
123_i32
1_234_i64
12_u8
123_u16
1_234_u32
9_223_372_036_854_775_808_u64
9_223_372_036_854_775_808.000_123_456_789_f64
+100_u32
-900_000_i32
1_234.5e-7
11_234e10_f32
+1.123
-0.000_5
)
subject.catch(s).should be_valid
end
it_transforms "10000", "10_000"
it_transforms "+10000", "+10_000"
it_transforms "-10000", "-10_000"
it_transforms "9223372036854775808", "9_223_372_036_854_775_808"
it_transforms "-9223372036854775808", "-9_223_372_036_854_775_808"
it_transforms "+9223372036854775808", "+9_223_372_036_854_775_808"
it_transforms "1_00000", "100_000"
it_transforms "1_23_i8", "123_i8"
it_transforms "1000_i16", "1_000_i16"
it_transforms "1000_i32", "1_000_i32"
it_transforms "1000_i64", "1_000_i64"
it_transforms "1_23_u8", "123_u8"
it_transforms "1000_u16", "1_000_u16"
it_transforms "1000_u32", "1_000_u32"
it_transforms "1000_u64", "1_000_u64"
it_transforms "123456_f32", "123_456_f32"
it_transforms "123456_f64", "123_456_f64"
it_transforms "123456.5e-7_f32", "123_456.5e-7_f32"
it_transforms "123456e10_f64", "123_456e10_f64"
it_transforms "123456.5e-7", "123_456.5e-7"
it_transforms "123456e10", "123_456e10"
it_transforms "3.00_1", "3.001"
it_transforms "3.0012", "3.001_2"
it_transforms "3.00123", "3.001_23"
it_transforms "3.001234", "3.001_234"
it_transforms "3.0012345", "3.001_234_5"
it "reports rule, pos and message" do
s = Source.new %q(
1200000
)
subject.catch(s).should_not be_valid
error = s.errors.first
error.rule.should_not be_nil
error.pos.should eq 2
error.message.should match /1_200_000/
end
end
end

View file

@ -21,15 +21,5 @@ module Ameba
s.errors.first.message.should eq "Error!"
end
end
describe "#ast" do
it "returns ast nodes" do
s = Source.new %(
class A; end
class B; end
)
s.ast.to_s.should eq "class A\nend\nclass B\nend\n"
end
end
end
end

View file

@ -0,0 +1,42 @@
require "../spec_helper"
private def it_tokenizes(str, expected)
it "tokenizes #{str}" do
([] of Symbol).tap do |token_types|
Ameba::Tokenizer.new(Ameba::Source.new str).run do |token|
token_types << token.type
end.should be_true
end.should eq expected
end
end
module Ameba
describe Tokenizer do
describe "#run" do
it_tokenizes %("string"), %i(STRING)
it_tokenizes %(100), %i(NUMBER)
it_tokenizes %('a'), %i(CHAR)
it_tokenizes %([]), %i([])
it_tokenizes %([] of String), %i([] SPACE IDENT SPACE CONST)
it_tokenizes %q("str #{3}"), %i(STRING NUMBER)
it_tokenizes %(%w(1 2)),
%i(STRING_ARRAY_START STRING STRING STRING_ARRAY_END)
it_tokenizes %(%i(one two)),
%i(SYMBOL_ARRAY_START STRING STRING STRING_ARRAY_END)
it_tokenizes %(
class A
def method
puts "hello"
end
end
), [
:NEWLINE, :SPACE, :IDENT, :SPACE, :CONST, :NEWLINE, :SPACE, :IDENT,
:SPACE, :IDENT, :NEWLINE, :SPACE, :IDENT, :SPACE, :STRING, :NEWLINE,
:SPACE, :IDENT, :NEWLINE, :SPACE, :IDENT, :NEWLINE, :SPACE,
]
end
end
end

View file

@ -16,7 +16,9 @@ module Ameba::AST
@source : Source
def initialize(@rule, @source)
@source.ast.accept self
parser = Crystal::Parser.new(@source.content)
parser.filename = @source.path
parser.parse.accept self
end
def visit(node : Crystal::ASTNode)

View file

@ -0,0 +1,86 @@
module Ameba::Rules
# A rule that disallows usage of large numbers without underscore.
# These do not affect the value of the number, but can help read
# large numbers more easily.
#
# For example, these are considered invalid:
#
# ```
# 10000
# 141592654
# 5.12345
# ```
#
# And has to be rewritten as the following:
#
# ```
# 10_000
# 141_592_654
# 5.123_45
# ```
#
struct LargeNumbers < Rule
def test(source)
Tokenizer.new(source).run do |token|
next unless token.type == :NUMBER && decimal?(token.raw)
if (expected = underscored token.raw) != token.raw
source.error self, token.line_number,
"Large numbers should be written with underscores: #{expected}"
end
end
end
private def decimal?(value)
value !~ /^0(x|b|o)/
end
private def underscored(raw_number)
sign, value, fraction, suffix = parse_number raw_number
value = slice_digits(value.reverse) { |slice| slice }.reverse
fraction = "." + slice_digits(fraction) { |slice| slice } if fraction
"#{sign}#{value}#{fraction}#{suffix}"
end
private def slice_digits(value, by = 3)
([] of String).tap do |slices|
value.chars.reject(&.== '_').each_slice(by) do |slice|
slices << (yield slice).join
end
end.join("_")
end
private def parse_number(value)
value, sign = parse_sign(value)
value, suffix = parse_suffix(value)
value, fraction = parse_fraction(value)
{sign, value, fraction, suffix}
end
private def parse_sign(value)
if "+-".includes?(value[0])
sign = value[0]
value = value[1..-1]
end
{value, sign}
end
private def parse_suffix(value)
if pos = (value =~ /e/ || value =~ /_(i|u|f)/)
suffix = value[pos..-1]
value = value[0..pos - 1]
end
{value, suffix}
end
private def parse_fraction(value)
if comma = value.index('.')
fraction = value[comma + 1..-1]
value = value[0..comma - 1]
end
{value, fraction}
end
end
end

View file

@ -15,7 +15,6 @@ module Ameba
getter errors = [] of Error
getter path : String?
getter content : String
getter ast : Crystal::ASTNode?
def initialize(@content : String, @path = nil)
end
@ -31,11 +30,5 @@ module Ameba
def lines
@lines ||= @content.split("\n")
end
def ast
@ast ||= Crystal::Parser.new(@content)
.tap { |p| p.filename = @path }
.parse
end
end
end

74
src/ameba/tokenizer.cr Normal file
View file

@ -0,0 +1,74 @@
require "compiler/crystal/syntax/*"
module Ameba
class Tokenizer
def initialize(source)
@lexer = Crystal::Lexer.new source.content
@lexer.count_whitespace = true
@lexer.comments_enabled = true
@lexer.wants_raw = true
@lexer.filename = source.path
end
def run(&block : Crystal::Token -> _)
run_normal_state @lexer, &block
true
rescue e : Crystal::SyntaxException
# puts e
false
end
private def run_normal_state(lexer, break_on_rcurly = false,
&block : Crystal::Token -> _)
while true
token = @lexer.next_token
case token.type
when :DELIMITER_START
run_delimiter_state lexer, token, &block
when :STRING_ARRAY_START, :SYMBOL_ARRAY_START
block.call token
run_array_state lexer, token, &block
when :EOF
break
when :"}"
break if break_on_rcurly
block.call token
else
block.call token
end
end
end
private def run_delimiter_state(lexer, token, &block : Crystal::Token -> _)
while true
token = @lexer.next_string_token(token.delimiter_state)
case token.type
when :DELIMITER_END
break
when :INTERPOLATION_START
run_normal_state lexer, break_on_rcurly: true, &block
when :EOF
break
else
block.call token
end
end
end
private def run_array_state(lexer, token, &block : Crystal::Token -> _)
while true
lexer.next_string_array_token
case token.type
when :STRING_ARRAY_END
block.call token
break
when :EOF
raise "Unterminated symbol array literal"
else
block.call token
end
end
end
end
end