From 2f9ba278112d3aef601103eba7dd42c54600e491 Mon Sep 17 00:00:00 2001 From: "V. Elenhaupt" <3624712+veelenga@users.noreply.github.com> Date: Sat, 4 Nov 2017 16:43:40 +0200 Subject: [PATCH] Large numbers rule + tokenizer (#10) --- spec/ameba/rules/large_numbers_spec.cr | 118 +++++++++++++++++++++++++ spec/ameba/source_spec.cr | 10 --- spec/ameba/tokenizer_spec.cr | 42 +++++++++ src/ameba/ast/traverse.cr | 4 +- src/ameba/rules/large_numbers.cr | 86 ++++++++++++++++++ src/ameba/source.cr | 7 -- src/ameba/tokenizer.cr | 74 ++++++++++++++++ 7 files changed, 323 insertions(+), 18 deletions(-) create mode 100644 spec/ameba/rules/large_numbers_spec.cr create mode 100644 spec/ameba/tokenizer_spec.cr create mode 100644 src/ameba/rules/large_numbers.cr create mode 100644 src/ameba/tokenizer.cr diff --git a/spec/ameba/rules/large_numbers_spec.cr b/spec/ameba/rules/large_numbers_spec.cr new file mode 100644 index 00000000..38686efd --- /dev/null +++ b/spec/ameba/rules/large_numbers_spec.cr @@ -0,0 +1,118 @@ +require "../../spec_helper" + +private def it_transforms(number, expected) + it "transforms large number #{number}" do + s = Ameba::Source.new number + Ameba::Rules::LargeNumbers.new.catch(s).should_not be_valid + s.errors.first.message.should contain expected + end +end + +module Ameba::Rules + subject = LargeNumbers.new + + describe LargeNumbers do + it "passes if large number does not require underscore" do + s = Source.new %q( + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 16 17 18 19 20 30 40 50 60 70 80 90 + 100 + 1_000 + 10_000 + 100_000 + 200_000 + 300_000 + 400_000 + 500_000 + 600_000 + 700_000 + 800_000 + 900_000 + 1_000_000 + + -9_223_372_036_854_775_808 + 9_223_372_036_854_775_807 + + 141_592_654 + 141_592_654.0 + 141_592_654.001 + 141_592_654.001_2 + 141_592_654.001_23 + 141_592_654.001_234 + 141_592_654.001_234_5 + + 0b1101 + 0o123 + 0xFE012D + 0xfe012d + 0xfe012dd11 + + 1_i8 + 12_i16 + 123_i32 + 1_234_i64 + + 12_u8 + 123_u16 + 1_234_u32 + 9_223_372_036_854_775_808_u64 + 9_223_372_036_854_775_808.000_123_456_789_f64 + + +100_u32 + -900_000_i32 + + 1_234.5e-7 + 11_234e10_f32 + +1.123 + -0.000_5 + ) + subject.catch(s).should be_valid + end + + it_transforms "10000", "10_000" + it_transforms "+10000", "+10_000" + it_transforms "-10000", "-10_000" + + it_transforms "9223372036854775808", "9_223_372_036_854_775_808" + it_transforms "-9223372036854775808", "-9_223_372_036_854_775_808" + it_transforms "+9223372036854775808", "+9_223_372_036_854_775_808" + + it_transforms "1_00000", "100_000" + + it_transforms "1_23_i8", "123_i8" + it_transforms "1000_i16", "1_000_i16" + it_transforms "1000_i32", "1_000_i32" + it_transforms "1000_i64", "1_000_i64" + + it_transforms "1_23_u8", "123_u8" + it_transforms "1000_u16", "1_000_u16" + it_transforms "1000_u32", "1_000_u32" + it_transforms "1000_u64", "1_000_u64" + + it_transforms "123456_f32", "123_456_f32" + it_transforms "123456_f64", "123_456_f64" + + it_transforms "123456.5e-7_f32", "123_456.5e-7_f32" + it_transforms "123456e10_f64", "123_456e10_f64" + + it_transforms "123456.5e-7", "123_456.5e-7" + it_transforms "123456e10", "123_456e10" + + it_transforms "3.00_1", "3.001" + it_transforms "3.0012", "3.001_2" + it_transforms "3.00123", "3.001_23" + it_transforms "3.001234", "3.001_234" + it_transforms "3.0012345", "3.001_234_5" + + it "reports rule, pos and message" do + s = Source.new %q( + 1200000 + ) + subject.catch(s).should_not be_valid + error = s.errors.first + error.rule.should_not be_nil + error.pos.should eq 2 + error.message.should match /1_200_000/ + end + end +end diff --git a/spec/ameba/source_spec.cr b/spec/ameba/source_spec.cr index 1a726068..762bcf91 100644 --- a/spec/ameba/source_spec.cr +++ b/spec/ameba/source_spec.cr @@ -21,15 +21,5 @@ module Ameba s.errors.first.message.should eq "Error!" end end - - describe "#ast" do - it "returns ast nodes" do - s = Source.new %( - class A; end - class B; end - ) - s.ast.to_s.should eq "class A\nend\nclass B\nend\n" - end - end end end diff --git a/spec/ameba/tokenizer_spec.cr b/spec/ameba/tokenizer_spec.cr new file mode 100644 index 00000000..cdf332cd --- /dev/null +++ b/spec/ameba/tokenizer_spec.cr @@ -0,0 +1,42 @@ +require "../spec_helper" + +private def it_tokenizes(str, expected) + it "tokenizes #{str}" do + ([] of Symbol).tap do |token_types| + Ameba::Tokenizer.new(Ameba::Source.new str).run do |token| + token_types << token.type + end.should be_true + end.should eq expected + end +end + +module Ameba + describe Tokenizer do + describe "#run" do + it_tokenizes %("string"), %i(STRING) + it_tokenizes %(100), %i(NUMBER) + it_tokenizes %('a'), %i(CHAR) + it_tokenizes %([]), %i([]) + it_tokenizes %([] of String), %i([] SPACE IDENT SPACE CONST) + it_tokenizes %q("str #{3}"), %i(STRING NUMBER) + + it_tokenizes %(%w(1 2)), + %i(STRING_ARRAY_START STRING STRING STRING_ARRAY_END) + + it_tokenizes %(%i(one two)), + %i(SYMBOL_ARRAY_START STRING STRING STRING_ARRAY_END) + + it_tokenizes %( + class A + def method + puts "hello" + end + end + ), [ + :NEWLINE, :SPACE, :IDENT, :SPACE, :CONST, :NEWLINE, :SPACE, :IDENT, + :SPACE, :IDENT, :NEWLINE, :SPACE, :IDENT, :SPACE, :STRING, :NEWLINE, + :SPACE, :IDENT, :NEWLINE, :SPACE, :IDENT, :NEWLINE, :SPACE, + ] + end + end +end diff --git a/src/ameba/ast/traverse.cr b/src/ameba/ast/traverse.cr index 293a3477..be71e3ab 100644 --- a/src/ameba/ast/traverse.cr +++ b/src/ameba/ast/traverse.cr @@ -16,7 +16,9 @@ module Ameba::AST @source : Source def initialize(@rule, @source) - @source.ast.accept self + parser = Crystal::Parser.new(@source.content) + parser.filename = @source.path + parser.parse.accept self end def visit(node : Crystal::ASTNode) diff --git a/src/ameba/rules/large_numbers.cr b/src/ameba/rules/large_numbers.cr new file mode 100644 index 00000000..93ee4ac2 --- /dev/null +++ b/src/ameba/rules/large_numbers.cr @@ -0,0 +1,86 @@ +module Ameba::Rules + # A rule that disallows usage of large numbers without underscore. + # These do not affect the value of the number, but can help read + # large numbers more easily. + # + # For example, these are considered invalid: + # + # ``` + # 10000 + # 141592654 + # 5.12345 + # ``` + # + # And has to be rewritten as the following: + # + # ``` + # 10_000 + # 141_592_654 + # 5.123_45 + # ``` + # + struct LargeNumbers < Rule + def test(source) + Tokenizer.new(source).run do |token| + next unless token.type == :NUMBER && decimal?(token.raw) + + if (expected = underscored token.raw) != token.raw + source.error self, token.line_number, + "Large numbers should be written with underscores: #{expected}" + end + end + end + + private def decimal?(value) + value !~ /^0(x|b|o)/ + end + + private def underscored(raw_number) + sign, value, fraction, suffix = parse_number raw_number + value = slice_digits(value.reverse) { |slice| slice }.reverse + fraction = "." + slice_digits(fraction) { |slice| slice } if fraction + + "#{sign}#{value}#{fraction}#{suffix}" + end + + private def slice_digits(value, by = 3) + ([] of String).tap do |slices| + value.chars.reject(&.== '_').each_slice(by) do |slice| + slices << (yield slice).join + end + end.join("_") + end + + private def parse_number(value) + value, sign = parse_sign(value) + value, suffix = parse_suffix(value) + value, fraction = parse_fraction(value) + + {sign, value, fraction, suffix} + end + + private def parse_sign(value) + if "+-".includes?(value[0]) + sign = value[0] + value = value[1..-1] + end + {value, sign} + end + + private def parse_suffix(value) + if pos = (value =~ /e/ || value =~ /_(i|u|f)/) + suffix = value[pos..-1] + value = value[0..pos - 1] + end + {value, suffix} + end + + private def parse_fraction(value) + if comma = value.index('.') + fraction = value[comma + 1..-1] + value = value[0..comma - 1] + end + {value, fraction} + end + end +end diff --git a/src/ameba/source.cr b/src/ameba/source.cr index ba3dcc4b..7f92803b 100644 --- a/src/ameba/source.cr +++ b/src/ameba/source.cr @@ -15,7 +15,6 @@ module Ameba getter errors = [] of Error getter path : String? getter content : String - getter ast : Crystal::ASTNode? def initialize(@content : String, @path = nil) end @@ -31,11 +30,5 @@ module Ameba def lines @lines ||= @content.split("\n") end - - def ast - @ast ||= Crystal::Parser.new(@content) - .tap { |p| p.filename = @path } - .parse - end end end diff --git a/src/ameba/tokenizer.cr b/src/ameba/tokenizer.cr new file mode 100644 index 00000000..c7479a65 --- /dev/null +++ b/src/ameba/tokenizer.cr @@ -0,0 +1,74 @@ +require "compiler/crystal/syntax/*" + +module Ameba + class Tokenizer + def initialize(source) + @lexer = Crystal::Lexer.new source.content + @lexer.count_whitespace = true + @lexer.comments_enabled = true + @lexer.wants_raw = true + @lexer.filename = source.path + end + + def run(&block : Crystal::Token -> _) + run_normal_state @lexer, &block + true + rescue e : Crystal::SyntaxException + # puts e + false + end + + private def run_normal_state(lexer, break_on_rcurly = false, + &block : Crystal::Token -> _) + while true + token = @lexer.next_token + case token.type + when :DELIMITER_START + run_delimiter_state lexer, token, &block + when :STRING_ARRAY_START, :SYMBOL_ARRAY_START + block.call token + run_array_state lexer, token, &block + when :EOF + break + when :"}" + break if break_on_rcurly + block.call token + else + block.call token + end + end + end + + private def run_delimiter_state(lexer, token, &block : Crystal::Token -> _) + while true + token = @lexer.next_string_token(token.delimiter_state) + case token.type + when :DELIMITER_END + break + when :INTERPOLATION_START + run_normal_state lexer, break_on_rcurly: true, &block + when :EOF + break + else + block.call token + end + end + end + + private def run_array_state(lexer, token, &block : Crystal::Token -> _) + while true + lexer.next_string_array_token + + case token.type + when :STRING_ARRAY_END + block.call token + break + when :EOF + raise "Unterminated symbol array literal" + else + block.call token + end + end + end + end +end