mirror of
https://gitea.invidious.io/iv-org/shard-radix.git
synced 2024-08-15 00:43:21 +00:00
Improves support for non-ascii keys in a tree
Properly recognize and organize non-ascii keys into nodes, allowing usage with entries in other languages. With this change, it is possible to use 2 or 3 bytes wide characters (Unicode) without issues: tree = Radix::Tree(Symbol).new tree.add "/", :root tree.add "/日本語", :japanese tree.add "/日本は難しい", :japanese_is_difficult Which produces the following node hierarchy: # ( 1) / (:root) # ( 6) 日本 # (12) は難しい (:japanese_is_difficult) # ( 3) 語 (:japanese) And lookup works as expected: result = tree.find "/日本は難しい" puts result.found? # => true
This commit is contained in:
parent
7460033db3
commit
97ef407aec
3 changed files with 55 additions and 9 deletions
|
@ -7,6 +7,7 @@ so please check *Changed* and *Removed* notes before upgrading.
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
### Fixed
|
### Fixed
|
||||||
- Correct lookup issue caused by incorrect comparison of shared key [#21](https://github.com/luislavena/radix/issues/21)
|
- Correct lookup issue caused by incorrect comparison of shared key [#21](https://github.com/luislavena/radix/issues/21)
|
||||||
|
- Improve support for non-ascii keys in a tree.
|
||||||
|
|
||||||
## [0.3.7] - 2017-02-04
|
## [0.3.7] - 2017-02-04
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
|
@ -172,6 +172,38 @@ module Radix
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "dealing with unicode" do
|
||||||
|
it "inserts properly adjacent parent nodes" do
|
||||||
|
tree = Tree(Symbol).new
|
||||||
|
tree.add "/", :root
|
||||||
|
tree.add "/日本語", :japanese
|
||||||
|
tree.add "/素晴らしい", :amazing
|
||||||
|
|
||||||
|
# / (:root)
|
||||||
|
# +-素晴らしい (:amazing)
|
||||||
|
# \-日本語 (:japanese)
|
||||||
|
tree.root.children.size.should eq(2)
|
||||||
|
tree.root.children[0].key.should eq("素晴らしい")
|
||||||
|
tree.root.children[1].key.should eq("日本語")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "inserts nodes with shared parent" do
|
||||||
|
tree = Tree(Symbol).new
|
||||||
|
tree.add "/", :root
|
||||||
|
tree.add "/日本語", :japanese
|
||||||
|
tree.add "/日本は難しい", :japanese_is_difficult
|
||||||
|
|
||||||
|
# / (:root)
|
||||||
|
# \-日本語 (:japanese)
|
||||||
|
# \-日本は難しい (:japanese_is_difficult)
|
||||||
|
tree.root.children.size.should eq(1)
|
||||||
|
tree.root.children[0].key.should eq("日本")
|
||||||
|
tree.root.children[0].children.size.should eq(2)
|
||||||
|
tree.root.children[0].children[0].key.should eq("は難しい")
|
||||||
|
tree.root.children[0].children[1].key.should eq("語")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
context "dealing with duplicates" do
|
context "dealing with duplicates" do
|
||||||
it "does not allow same path be defined twice" do
|
it "does not allow same path be defined twice" do
|
||||||
tree = Tree(Symbol).new
|
tree = Tree(Symbol).new
|
||||||
|
@ -349,6 +381,19 @@ module Radix
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "unicode nodes with shared parent" do
|
||||||
|
it "finds matching path" do
|
||||||
|
tree = Tree(Symbol).new
|
||||||
|
tree.add "/", :root
|
||||||
|
tree.add "/日本語", :japanese
|
||||||
|
tree.add "/日本日本語は難しい", :japanese_is_difficult
|
||||||
|
|
||||||
|
result = tree.find("/日本日本語は難しい/")
|
||||||
|
result.found?.should be_true
|
||||||
|
result.key.should eq("/日本日本語は難しい")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
context "dealing with catch all" do
|
context "dealing with catch all" do
|
||||||
it "finds matching path" do
|
it "finds matching path" do
|
||||||
tree = Tree(Symbol).new
|
tree = Tree(Symbol).new
|
||||||
|
|
|
@ -125,7 +125,7 @@ module Radix
|
||||||
# determine split point difference between path and key
|
# determine split point difference between path and key
|
||||||
# compare if path is larger than key
|
# compare if path is larger than key
|
||||||
if path_reader.pos == 0 ||
|
if path_reader.pos == 0 ||
|
||||||
(path_reader.pos < path.size && path_reader.pos >= node.key.size)
|
(path_reader.pos < path.bytesize && path_reader.pos >= node.key.bytesize)
|
||||||
# determine if a child of this node contains the remaining part
|
# determine if a child of this node contains the remaining part
|
||||||
# of the path
|
# of the path
|
||||||
added = false
|
added = false
|
||||||
|
@ -156,7 +156,7 @@ module Radix
|
||||||
|
|
||||||
# adjust priorities
|
# adjust priorities
|
||||||
node.sort!
|
node.sort!
|
||||||
elsif path_reader.pos == path.size && path_reader.pos == node.key.size
|
elsif path_reader.pos == path.bytesize && path_reader.pos == node.key.bytesize
|
||||||
# determine if path matches key and potentially be a duplicate
|
# determine if path matches key and potentially be a duplicate
|
||||||
# and raise if is the case
|
# and raise if is the case
|
||||||
|
|
||||||
|
@ -166,7 +166,7 @@ module Radix
|
||||||
# assign payload since this is an empty node
|
# assign payload since this is an empty node
|
||||||
node.payload = payload
|
node.payload = payload
|
||||||
end
|
end
|
||||||
elsif path_reader.pos > 0 && path_reader.pos < node.key.size
|
elsif path_reader.pos > 0 && path_reader.pos < node.key.bytesize
|
||||||
# determine if current node key needs to be split to accomodate new
|
# determine if current node key needs to be split to accomodate new
|
||||||
# children nodes
|
# children nodes
|
||||||
|
|
||||||
|
@ -187,7 +187,7 @@ module Radix
|
||||||
node.sort!
|
node.sort!
|
||||||
|
|
||||||
# determine if path still continues
|
# determine if path still continues
|
||||||
if path_reader.pos < path.size
|
if path_reader.pos < path.bytesize
|
||||||
new_key = path.byte_slice(path_reader.pos)
|
new_key = path.byte_slice(path_reader.pos)
|
||||||
node.children << Node(T).new(new_key, payload)
|
node.children << Node(T).new(new_key, payload)
|
||||||
node.sort!
|
node.sort!
|
||||||
|
@ -237,7 +237,7 @@ module Radix
|
||||||
# special consideration when comparing the first node vs. others
|
# special consideration when comparing the first node vs. others
|
||||||
# in case of node key and path being the same, return the node
|
# in case of node key and path being the same, return the node
|
||||||
# instead of walking character by character
|
# instead of walking character by character
|
||||||
if first && (path.size == node.key.size && path == node.key) && node.payload?
|
if first && (path.bytesize == node.key.bytesize && path == node.key) && node.payload?
|
||||||
result.use node
|
result.use node
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
@ -303,8 +303,8 @@ module Radix
|
||||||
# nodes
|
# nodes
|
||||||
if path_reader.has_next?
|
if path_reader.has_next?
|
||||||
# using trailing slash?
|
# using trailing slash?
|
||||||
if node.key.size > 0 &&
|
if node.key.bytesize > 0 &&
|
||||||
path_reader.pos + 1 == path.size &&
|
path_reader.pos + 1 == path.bytesize &&
|
||||||
path_reader.current_char == '/'
|
path_reader.current_char == '/'
|
||||||
result.use node
|
result.use node
|
||||||
return
|
return
|
||||||
|
@ -329,14 +329,14 @@ module Radix
|
||||||
# key still contains characters to walk
|
# key still contains characters to walk
|
||||||
if key_reader.has_next?
|
if key_reader.has_next?
|
||||||
# determine if there is just a trailing slash?
|
# determine if there is just a trailing slash?
|
||||||
if key_reader.pos + 1 == node.key.size &&
|
if key_reader.pos + 1 == node.key.bytesize &&
|
||||||
key_reader.current_char == '/'
|
key_reader.current_char == '/'
|
||||||
result.use node
|
result.use node
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
# check if remaining part is catch all
|
# check if remaining part is catch all
|
||||||
if key_reader.pos < node.key.size &&
|
if key_reader.pos < node.key.bytesize &&
|
||||||
((key_reader.current_char == '/' && key_reader.peek_next_char == '*') ||
|
((key_reader.current_char == '/' && key_reader.peek_next_char == '*') ||
|
||||||
key_reader.current_char == '*')
|
key_reader.current_char == '*')
|
||||||
# skip to '*' only if necessary
|
# skip to '*' only if necessary
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue