mirror of
https://gitea.invidious.io/iv-org/shard-radix.git
synced 2024-08-15 00:43:21 +00:00
Improves support for non-ascii keys in a tree
Properly recognize and organize non-ascii keys into nodes, allowing usage with entries in other languages. With this change, it is possible to use 2 or 3 bytes wide characters (Unicode) without issues: tree = Radix::Tree(Symbol).new tree.add "/", :root tree.add "/日本語", :japanese tree.add "/日本は難しい", :japanese_is_difficult Which produces the following node hierarchy: # ( 1) / (:root) # ( 6) 日本 # (12) は難しい (:japanese_is_difficult) # ( 3) 語 (:japanese) And lookup works as expected: result = tree.find "/日本は難しい" puts result.found? # => true
This commit is contained in:
parent
7460033db3
commit
97ef407aec
3 changed files with 55 additions and 9 deletions
|
@ -7,6 +7,7 @@ so please check *Changed* and *Removed* notes before upgrading.
|
|||
## [Unreleased]
|
||||
### Fixed
|
||||
- Correct lookup issue caused by incorrect comparison of shared key [#21](https://github.com/luislavena/radix/issues/21)
|
||||
- Improve support for non-ascii keys in a tree.
|
||||
|
||||
## [0.3.7] - 2017-02-04
|
||||
### Fixed
|
||||
|
|
|
@ -172,6 +172,38 @@ module Radix
|
|||
end
|
||||
end
|
||||
|
||||
context "dealing with unicode" do
|
||||
it "inserts properly adjacent parent nodes" do
|
||||
tree = Tree(Symbol).new
|
||||
tree.add "/", :root
|
||||
tree.add "/日本語", :japanese
|
||||
tree.add "/素晴らしい", :amazing
|
||||
|
||||
# / (:root)
|
||||
# +-素晴らしい (:amazing)
|
||||
# \-日本語 (:japanese)
|
||||
tree.root.children.size.should eq(2)
|
||||
tree.root.children[0].key.should eq("素晴らしい")
|
||||
tree.root.children[1].key.should eq("日本語")
|
||||
end
|
||||
|
||||
it "inserts nodes with shared parent" do
|
||||
tree = Tree(Symbol).new
|
||||
tree.add "/", :root
|
||||
tree.add "/日本語", :japanese
|
||||
tree.add "/日本は難しい", :japanese_is_difficult
|
||||
|
||||
# / (:root)
|
||||
# \-日本語 (:japanese)
|
||||
# \-日本は難しい (:japanese_is_difficult)
|
||||
tree.root.children.size.should eq(1)
|
||||
tree.root.children[0].key.should eq("日本")
|
||||
tree.root.children[0].children.size.should eq(2)
|
||||
tree.root.children[0].children[0].key.should eq("は難しい")
|
||||
tree.root.children[0].children[1].key.should eq("語")
|
||||
end
|
||||
end
|
||||
|
||||
context "dealing with duplicates" do
|
||||
it "does not allow same path be defined twice" do
|
||||
tree = Tree(Symbol).new
|
||||
|
@ -349,6 +381,19 @@ module Radix
|
|||
end
|
||||
end
|
||||
|
||||
context "unicode nodes with shared parent" do
|
||||
it "finds matching path" do
|
||||
tree = Tree(Symbol).new
|
||||
tree.add "/", :root
|
||||
tree.add "/日本語", :japanese
|
||||
tree.add "/日本日本語は難しい", :japanese_is_difficult
|
||||
|
||||
result = tree.find("/日本日本語は難しい/")
|
||||
result.found?.should be_true
|
||||
result.key.should eq("/日本日本語は難しい")
|
||||
end
|
||||
end
|
||||
|
||||
context "dealing with catch all" do
|
||||
it "finds matching path" do
|
||||
tree = Tree(Symbol).new
|
||||
|
|
|
@ -125,7 +125,7 @@ module Radix
|
|||
# determine split point difference between path and key
|
||||
# compare if path is larger than key
|
||||
if path_reader.pos == 0 ||
|
||||
(path_reader.pos < path.size && path_reader.pos >= node.key.size)
|
||||
(path_reader.pos < path.bytesize && path_reader.pos >= node.key.bytesize)
|
||||
# determine if a child of this node contains the remaining part
|
||||
# of the path
|
||||
added = false
|
||||
|
@ -156,7 +156,7 @@ module Radix
|
|||
|
||||
# adjust priorities
|
||||
node.sort!
|
||||
elsif path_reader.pos == path.size && path_reader.pos == node.key.size
|
||||
elsif path_reader.pos == path.bytesize && path_reader.pos == node.key.bytesize
|
||||
# determine if path matches key and potentially be a duplicate
|
||||
# and raise if is the case
|
||||
|
||||
|
@ -166,7 +166,7 @@ module Radix
|
|||
# assign payload since this is an empty node
|
||||
node.payload = payload
|
||||
end
|
||||
elsif path_reader.pos > 0 && path_reader.pos < node.key.size
|
||||
elsif path_reader.pos > 0 && path_reader.pos < node.key.bytesize
|
||||
# determine if current node key needs to be split to accomodate new
|
||||
# children nodes
|
||||
|
||||
|
@ -187,7 +187,7 @@ module Radix
|
|||
node.sort!
|
||||
|
||||
# determine if path still continues
|
||||
if path_reader.pos < path.size
|
||||
if path_reader.pos < path.bytesize
|
||||
new_key = path.byte_slice(path_reader.pos)
|
||||
node.children << Node(T).new(new_key, payload)
|
||||
node.sort!
|
||||
|
@ -237,7 +237,7 @@ module Radix
|
|||
# special consideration when comparing the first node vs. others
|
||||
# in case of node key and path being the same, return the node
|
||||
# instead of walking character by character
|
||||
if first && (path.size == node.key.size && path == node.key) && node.payload?
|
||||
if first && (path.bytesize == node.key.bytesize && path == node.key) && node.payload?
|
||||
result.use node
|
||||
return
|
||||
end
|
||||
|
@ -303,8 +303,8 @@ module Radix
|
|||
# nodes
|
||||
if path_reader.has_next?
|
||||
# using trailing slash?
|
||||
if node.key.size > 0 &&
|
||||
path_reader.pos + 1 == path.size &&
|
||||
if node.key.bytesize > 0 &&
|
||||
path_reader.pos + 1 == path.bytesize &&
|
||||
path_reader.current_char == '/'
|
||||
result.use node
|
||||
return
|
||||
|
@ -329,14 +329,14 @@ module Radix
|
|||
# key still contains characters to walk
|
||||
if key_reader.has_next?
|
||||
# determine if there is just a trailing slash?
|
||||
if key_reader.pos + 1 == node.key.size &&
|
||||
if key_reader.pos + 1 == node.key.bytesize &&
|
||||
key_reader.current_char == '/'
|
||||
result.use node
|
||||
return
|
||||
end
|
||||
|
||||
# check if remaining part is catch all
|
||||
if key_reader.pos < node.key.size &&
|
||||
if key_reader.pos < node.key.bytesize &&
|
||||
((key_reader.current_char == '/' && key_reader.peek_next_char == '*') ||
|
||||
key_reader.current_char == '*')
|
||||
# skip to '*' only if necessary
|
||||
|
|
Loading…
Reference in a new issue