Merge pull request #20 from luislavena/improve-unicode-support

Improves support for non-ascii keys in a tree
2024-08-15 00:43:21 +00:00 · 2017-03-12 12:59:16 -03:00 · 2017-03-12 12:59:16 -03:00 · 7a398d463c
commit 7a398d463c
parent 7460033db3 97ef407aec
3 changed files with 55 additions and 9 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -7,6 +7,7 @@ so please check *Changed* and *Removed* notes before upgrading.
 ## [Unreleased]
 ### Fixed
 - Correct lookup issue caused by incorrect comparison of shared key [#21](https://github.com/luislavena/radix/issues/21)
+- Improve support for non-ascii keys in a tree.

 ## [0.3.7] - 2017-02-04
 ### Fixed
--- a/spec/radix/tree_spec.cr
+++ b/spec/radix/tree_spec.cr
@ -172,6 +172,38 @@ module Radix
        end
      end

+      context "dealing with unicode" do
+        it "inserts properly adjacent parent nodes" do
+          tree = Tree(Symbol).new
+          tree.add "/", :root
+          tree.add "/日本語", :japanese
+          tree.add "/素晴らしい", :amazing
+
+          # /          (:root)
+          # +-素晴らしい    (:amazing)
+          # \-日本語      (:japanese)
+          tree.root.children.size.should eq(2)
+          tree.root.children[0].key.should eq("素晴らしい")
+          tree.root.children[1].key.should eq("日本語")
+        end
+
+        it "inserts nodes with shared parent" do
+          tree = Tree(Symbol).new
+          tree.add "/", :root
+          tree.add "/日本語", :japanese
+          tree.add "/日本は難しい", :japanese_is_difficult
+
+          # /                (:root)
+          # \-日本語            (:japanese)
+          #     \-日本は難しい     (:japanese_is_difficult)
+          tree.root.children.size.should eq(1)
+          tree.root.children[0].key.should eq("日本")
+          tree.root.children[0].children.size.should eq(2)
+          tree.root.children[0].children[0].key.should eq("は難しい")
+          tree.root.children[0].children[1].key.should eq("語")
+        end
+      end
+
      context "dealing with duplicates" do
        it "does not allow same path be defined twice" do
          tree = Tree(Symbol).new
@ -349,6 +381,19 @@ module Radix
        end
      end

+      context "unicode nodes with shared parent" do
+        it "finds matching path" do
+          tree = Tree(Symbol).new
+          tree.add "/", :root
+          tree.add "/日本語", :japanese
+          tree.add "/日本日本語は難しい", :japanese_is_difficult
+
+          result = tree.find("/日本日本語は難しい/")
+          result.found?.should be_true
+          result.key.should eq("/日本日本語は難しい")
+        end
+      end
+
      context "dealing with catch all" do
        it "finds matching path" do
          tree = Tree(Symbol).new
--- a/src/radix/tree.cr
+++ b/src/radix/tree.cr
@ -125,7 +125,7 @@ module Radix
      # determine split point difference between path and key
      # compare if path is larger than key
      if path_reader.pos == 0 ||
-         (path_reader.pos < path.size && path_reader.pos >= node.key.size)
+         (path_reader.pos < path.bytesize && path_reader.pos >= node.key.bytesize)
        # determine if a child of this node contains the remaining part
        # of the path
        added = false
@ -156,7 +156,7 @@ module Radix

        # adjust priorities
        node.sort!
-      elsif path_reader.pos == path.size && path_reader.pos == node.key.size
+      elsif path_reader.pos == path.bytesize && path_reader.pos == node.key.bytesize
        # determine if path matches key and potentially be a duplicate
        # and raise if is the case

@ -166,7 +166,7 @@ module Radix
          # assign payload since this is an empty node
          node.payload = payload
        end
-      elsif path_reader.pos > 0 && path_reader.pos < node.key.size
+      elsif path_reader.pos > 0 && path_reader.pos < node.key.bytesize
        # determine if current node key needs to be split to accomodate new
        # children nodes

@ -187,7 +187,7 @@ module Radix
        node.sort!

        # determine if path still continues
-        if path_reader.pos < path.size
+        if path_reader.pos < path.bytesize
          new_key = path.byte_slice(path_reader.pos)
          node.children << Node(T).new(new_key, payload)
          node.sort!
@ -237,7 +237,7 @@ module Radix
      # special consideration when comparing the first node vs. others
      # in case of node key and path being the same, return the node
      # instead of walking character by character
-      if first && (path.size == node.key.size && path == node.key) && node.payload?
+      if first && (path.bytesize == node.key.bytesize && path == node.key) && node.payload?
        result.use node
        return
      end
@ -303,8 +303,8 @@ module Radix
      # nodes
      if path_reader.has_next?
        # using trailing slash?
-        if node.key.size > 0 &&
-           path_reader.pos + 1 == path.size &&
+        if node.key.bytesize > 0 &&
+           path_reader.pos + 1 == path.bytesize &&
           path_reader.current_char == '/'
          result.use node
          return
@ -329,14 +329,14 @@ module Radix
      # key still contains characters to walk
      if key_reader.has_next?
        # determine if there is just a trailing slash?
-        if key_reader.pos + 1 == node.key.size &&
+        if key_reader.pos + 1 == node.key.bytesize &&
           key_reader.current_char == '/'
          result.use node
          return
        end

        # check if remaining part is catch all
-        if key_reader.pos < node.key.size &&
+        if key_reader.pos < node.key.bytesize &&
           ((key_reader.current_char == '/' && key_reader.peek_next_char == '*') ||
           key_reader.current_char == '*')
          # skip to '*' only if necessary