Skip to content

Commit

Permalink
Improved HAMT Hash.
Browse files Browse the repository at this point in the history
The base "table" has been replaced by a regular Trie. Rubinius has deprecated
support for 32-bit and that support is being removed, so the branching factor
is currently 62 (actually 60 = 6 * 10), but that will be extended to 64 when
1. instructions are added for computing with non-mananged objects (ie not
being limited to Fixnum); and 2. managed objects with non-managed fields are
able to be processed by managed code instructions.

A linear scan list is still used for hash values that collide along (6 * 10 =
60 bits), but this could be enhanced to be a binary/red-black/etc tree
instead.
brixen committed Jul 14, 2016
1 parent 98c69b0 commit 7bf377b
Showing 4 changed files with 190 additions and 169 deletions.
306 changes: 169 additions & 137 deletions core/hash.rb
Original file line number Diff line number Diff line change
@@ -42,14 +42,13 @@ def match?(this_key, this_hash, other_key, other_hash)
end

class Item
attr_accessor :state
attr_accessor :previous
attr_accessor :next
attr_accessor :key
attr_accessor :key_hash
attr_accessor :value

def initialize(key, key_hash, value, state)
def initialize(state, key, key_hash, value)
if key.kind_of?(String) and !key.frozen? and !state.compare_by_identity?
key = key.dup
key.freeze
@@ -58,7 +57,6 @@ def initialize(key, key_hash, value, state)
@key = key
@key_hash = key_hash
@value = value
@state = state

if tail = state.tail
@previous = tail
@@ -71,49 +69,51 @@ def initialize(key, key_hash, value, state)
end

def empty?
not @state
not @key_hash
end

def lookup(key, key_hash)
return self if @state.match? @key, @key_hash, key, key_hash
def lookup(state, key, key_hash)
return self if state.match? @key, @key_hash, key, key_hash
end

def insert(key, key_hash, value)
return unless @state.match? @key, @key_hash, key, key_hash
def insert(state, key, key_hash, value)
return unless state.match? @key, @key_hash, key, key_hash
@value = value
self
end

def delete(key, key_hash)
if @state.match? @key, @key_hash, key, key_hash
def delete(state, key, key_hash)
if state.match? @key, @key_hash, key, key_hash

if @previous
@previous.next = @next
else
@state.head = @next
state.head = @next
end

if @next
@next.previous = @previous
else
@state.tail = @previous
state.tail = @previous
end

@state.size -= 1
@state = nil
state.size -= 1
@key_hash = nil

self
end
end

def inspect
"\#<#{self.class}:0x#{object_id.to_s(16)} @key=#{@key} @key_hash=#{@key_hash} @value=#{@value} @previous=\#<#{@previous.class}:0x#{@previous.object_id.to_s(16)}> @next=\#<#{@next.class}:0x#{@next.object_id.to_s(16)}>>"
end
end

class List
attr_accessor :state
attr_accessor :key_hash
attr_accessor :entries

def initialize(state, key_hash)
@state = state
def initialize(key_hash)
@key_hash = key_hash
@entries = Vector.new 2
end
@@ -122,28 +122,28 @@ def empty?
not @entries
end

def lookup(key, key_hash)
@entries.each { |e| return e if e.lookup(key, key_hash) }
def lookup(state, key, key_hash)
@entries.each { |e| return e if e.lookup(state, key, key_hash) }
nil
end

def insert(key, key_hash, value)
@entries.each { |e| return e if e.insert(key, key_hash, value) }
def insert(state, key, key_hash, value)
@entries.each { |e| return e if e.insert(state, key, key_hash, value) }

item = Item.new key, key_hash, value, @state
item = Item.new state, key, key_hash, value
@entries = @entries.insert_at_index @entries.size, item

item
end

def add(item, key, key_hash, value)
def add(state, item, key, key_hash, value)
@entries[0] = item
@entries[1] = Item.new key, key_hash, value, @state
@entries[1] = Item.new state, key, key_hash, value
end

def delete(key, key_hash)
def delete(state, key, key_hash)
@entries.each_with_index do |e, i|
if e.delete key, key_hash
if e.delete state, key, key_hash
@entries = @entries.delete_at_index i
return e
end
@@ -154,13 +154,37 @@ def delete(key, key_hash)

# An Array Mapped Trie
class Trie
MAX_LEVELS = 10

attr_accessor :bmp
attr_accessor :entries
attr_accessor :level
attr_accessor :state

def initialize(state, level)
@state = state
def self.from_key(state, key, key_hash, value)
trie = new 0
trie.add_key state, key, key_hash, value
trie
end

def self.from_item(state, level, item, key, key_hash, value)
if level < MAX_LEVELS
trie = new level + 1
trie.add_item state, item, key, key_hash, value
trie
else
list = List.new key_hash
list.add state, item, key, key_hash, value
list
end
end

def self.from_none
trie = new 0
trie.entries = Vector[]
trie
end

def initialize(level)
@level = level
@bmp = 0
@entries = nil
@@ -170,58 +194,62 @@ def empty?
not @entries
end

def lookup(key, key_hash)
def lookup(state, key, key_hash)
return unless index = item_index(key_hash)
@entries[index].lookup key, key_hash
@entries[index].lookup state, key, key_hash
end

def insert(key, key_hash, value)
def insert(state, key, key_hash, value)
if index = item_index(key_hash)
item = @entries[index]
unless item.insert key, key_hash, value
trie = Trie.new @state, @level + 1
trie.add item, key, key_hash, value
@entries[index] = trie
return trie
else
if item.insert state, key, key_hash, value
return item
else
obj = Trie.from_item state, @level, item, key, key_hash, value
@entries[index] = obj
return obj
end
else
@bmp = set_bitmap key_hash
index = item_index key_hash
item = Item.new key, key_hash, value, @state
item = Item.new state, key, key_hash, value
@entries = @entries.insert_at_index index, item
return item
end
end

def add(item, key, key_hash, value)
def add_key(state, key, key_hash, value)
@bmp = set_bitmap key_hash
item = Item.new state, key, key_hash, value
@entries = Vector[item]
end

def add_item(state, item, key, key_hash, value)
item_hash = item.key_hash
bmp = @bmp = set_bitmap(item_hash)
@bmp = set_bitmap key_hash

if bmp == @bmp
if item_hash == key_hash
list = List.new @state, key_hash
list.add item, key, key_hash, value
list = List.new key_hash
list.add state, item, key, key_hash, value
@entries = Vector[list]
else
trie = Trie.new @state, @level + 1
trie.add item, key, key_hash, value
@entries = Vector[trie]
obj = Trie.from_item state, @level, item, key, key_hash, value
@entries = Vector[obj]
end
else
@entries = Vector.new 2
@entries[item_index(item_hash)] = item
@entries[item_index(key_hash)] = Item.new key, key_hash, value, @state
@entries[item_index(key_hash)] = Item.new state, key, key_hash, value
end
end

def delete(key, key_hash)
def delete(state, key, key_hash)
return unless index = item_index(key_hash)

item = @entries[index]
if de = item.delete(key, key_hash)
if de = item.delete(state, key, key_hash)
if item.empty?
@bmp = unset_bitmap key_hash

@@ -232,6 +260,16 @@ def delete(key, key_hash)
end
end

def inspect
s = "\#<#{self.class}:0x#{object_id.to_s(16)} @bmp=#{@bmp} @level=#{@level} @entries="
if @entries.size > 0
s << "[ "
s << @entries.map { |e| "\#<#{e.class}:0x#{e.object_id.to_s(16)}>" }.join(", ")
s << " ]"
end
s << ">"
end

def item_index(key_hash)
Rubinius.invoke_primitive :vm_hash_trie_item_index, key_hash, @level, @bmp
end
@@ -245,52 +283,6 @@ def unset_bitmap(key_hash)
end
end

class Table
attr_accessor :entries
attr_accessor :state

def initialize(state)
@state = state
@entries = Vector.new 64
end

def item_index(key_hash)
key_hash & 0b111111
end

def lookup(key, key_hash)
if item = @entries[item_index(key_hash)]
item.lookup key, key_hash
end
end

def insert(key, key_hash, value)
index = item_index key_hash
if item = @entries[index]
unless item.insert key, key_hash, value
trie = Trie.new @state, 0
trie.add item, key, key_hash, value
@entries[index] = trie
end
else
item = Item.new key, key_hash, value, @state
@entries[index] = item
end
end

def delete(key, key_hash)
index = item_index key_hash
return unless item = @entries[index]

if de = item.delete(key, key_hash)
if item.empty?
@entries[index] = nil
end
return de
end
end
end

# An external iterator that returns entries in insertion order. While
# somewhat following the API of Enumerator, it is named Iterator because it
# does not provide <code>#each</code> and should not conflict with
@@ -400,9 +392,11 @@ def initialize_copy(other)
private :initialize_copy

def [](key)
Rubinius.privately { key_hash = key.hash }
if @table and item = @table.lookup(key, key_hash)
return item.value
if @trie
Rubinius.privately { key_hash = key.hash }
if item = @trie.lookup(@state, key, key_hash)
return item.value
end
end

default key
@@ -411,14 +405,15 @@ def [](key)
def []=(key, value)
Rubinius.check_frozen

unless @table
Rubinius.privately { key_hash = key.hash }

if @trie
@trie.insert @state, key, key_hash, value
else
@state = State.new unless @state
@table = Table.new @state
@trie = Trie.from_key @state, key, key_hash, value
end

Rubinius.privately { key_hash = key.hash }
@table.insert key, key_hash, value

value
end

@@ -462,7 +457,7 @@ def assoc(key)

def clear
@state = State.from @state
@table = nil
@trie = nil
self
end

@@ -513,9 +508,10 @@ def default_proc=(prc)
def delete(key)
Rubinius.check_frozen

if @table
if @trie
Rubinius.privately { key_hash = key.hash }
if item = @table.delete(key, key_hash)
if item = @trie.delete(@state, key, key_hash)
@trie = nil if @trie.empty?
return item.value
end
end
@@ -604,7 +600,7 @@ def eql?(other)
other.each_item do |e|

Rubinius.privately { key_hash = e.key.hash }
return false unless item = @table.lookup(e.key, key_hash)
return false unless item = @trie.lookup(@state, e.key, key_hash)

# Order of the comparison matters! We must compare our value with
# the other Hash's value and not the other way around.
@@ -629,7 +625,7 @@ def ==(other)
other.each_item do |e|

Rubinius.privately { key_hash = e.key.hash }
item = @table.lookup(e.key, key_hash)
item = @trie.lookup(@state, e.key, key_hash)

return false unless item

@@ -646,9 +642,9 @@ def ==(other)
end

def fetch(key, default=undefined)
unless empty?
if @trie
Rubinius.privately { key_hash = key.hash }
if item = @table.lookup(key, key_hash)
if item = @trie.lookup(@state, key, key_hash)
return item.value
end
end
@@ -665,9 +661,9 @@ def fetch_values(*keys, &block)
# Searches for an item matching +key+. Returns the item
# if found. Otherwise returns +nil+. Called from the C-API.
def find_item(key)
unless empty?
if @trie
Rubinius.privately { key_hash = key.hash }
if item = @table.lookup(key, key_hash)
if item = @trie.lookup(@state, key, key_hash)
return item
end
end
@@ -727,21 +723,34 @@ def key(value)
alias_method :index, :key

def key?(key)
Rubinius.privately { key_hash = key.hash }
if @table and @table.lookup(key, key_hash)
true
else
false
if @trie
Rubinius.privately { key_hash = key.hash }
if @trie.lookup(@state, key, key_hash)
return true
end
end

false
end

alias_method :has_key?, :key?
alias_method :include?, :key?
alias_method :member?, :key?

def keys
ary = []
each_item { |e| ary << e.key }
return [] unless @state

ary = Array.new @state.size

i = 0
item = @state.head

while item
ary[i] = item.key
i += 1
item = item.next
end

ary
end

@@ -756,26 +765,27 @@ def merge!(other)

return self if other.empty?

unless @table
unless @trie
@state = State.new unless @state
@table = Table.new @state
@trie = Trie.from_none
end

if block_given?
other.each_item do |e|
key = e.key
Rubinius.privately { key_hash = key.hash }
if item = @table.lookup(key, key_hash)

if item = @trie.lookup(@state, key, key_hash)
item.value = yield(key, item.value, e.value)
else
@table.insert key, key_hash, e.value
@trie.insert @state, key, key_hash, e.value
end
end
else
other.each_item do |e|
key = e.key
Rubinius.privately { key_hash = key.hash }
@table.insert key, key_hash, e.value
@trie.insert @state, key, key_hash, e.value
end
end

@@ -796,16 +806,14 @@ def rehash

item = @state.head
@state = State.from @state
table = Table.new @state
@trie = Trie.from_none

while item
Rubinius.privately { key_hash = item.key.hash }
table.insert item.key, key_hash, item.value
@trie.insert @state, item.key, key_hash, item.value
item = item.next
end

@table = table

self
end

@@ -841,11 +849,11 @@ def replace(other)

@state = State.from @state
@state.compare_by_identity if other.compare_by_identity?
@table = Table.new @state
@trie = Trie.from_none

other.each_item do |e|
Rubinius.privately { key_hash = e.key.hash }
@table.insert e.key, key_hash, e.value
@trie.insert @state, e.key, key_hash, e.value
end

@default = other.default
@@ -873,11 +881,11 @@ def select!

Rubinius.check_frozen

return nil if empty?
return if empty?

size = @state.size
each_item { |e| delete e.key unless yield(e.key, e.value) }
return nil if size == @state.size
return if size == @state.size

self
end
@@ -889,7 +897,8 @@ def shift

item = @state.head
Rubinius.privately { key_hash = item.key.hash }
@table.delete item.key, key_hash
@trie.delete @state, item.key, key_hash
@trie = nil if @trie.empty?

return item.key, item.value
end
@@ -901,8 +910,20 @@ def size
alias_method :length, :size

def to_a
ary = []
each_item { |e| ary << [e.key, e.value] }
if @state
ary = Array.new size

i = 0
item = @state.head

while item
ary[i] = [item.key, item.value]
i += 1
item = item.next
end
else
ary = []
end

Rubinius::Type.infect ary, self
ary
@@ -937,8 +958,19 @@ def value?(value)
alias_method :has_value?, :value?

def values
ary = []
each_item { |e| ary << e.value }
return [] unless @state

ary = Array.new size

i = 0
item = @state.head

while item
ary[i] = item.value
i += 1
item = item.next
end

ary
end

@@ -948,7 +980,7 @@ def values_at(*args)
else
args.map do |key|
Rubinius.privately { key_hash = key.hash }
if item = @table.lookup(key, key_hash)
if item = @trie.lookup(@state, key, key_hash)
item.value
else
default key
5 changes: 3 additions & 2 deletions core/marshal.rb
Original file line number Diff line number Diff line change
@@ -307,8 +307,9 @@ def __marshal__(ms)
raise TypeError, "can't dump hash with default proc" if default_proc

excluded_ivars = %w[
@capacity @mask @max_entries @size @entries @default_proc @default
@state @compare_by_identity @head @tail @table
@head @tail @size @previous @next @key @key_hash @value
@key_hash @entries @bmp @entries @level @state @trie
@compare_by_identity @default @default_proc
].map { |a| a.to_sym }

out = ms.serialize_instance_variables_prefix(self, excluded_ivars)
40 changes: 10 additions & 30 deletions machine/builtin/system.cpp
Original file line number Diff line number Diff line change
@@ -1769,60 +1769,40 @@ namespace rubinius {
return code->machine_code()->execute_as_script(state, code);
}

#define HASH_TRIE_BASE_SHIFT 6

#if RBX_SIZEOF_LONG == 8
#define HASH_TRIE_BIT_WIDTH 6
#define HASH_TRIE_BIT_MASK 0x3f
#else
#define HASH_TRIE_BIT_WIDTH 5
#define HASH_TRIE_BIT_MASK 0x1f
#endif

static inline size_t hash_trie_bit(Fixnum* hash, Fixnum* level) {
native_int h = hash->to_native();
native_int l = level->to_native();

size_t width = HASH_TRIE_BIT_WIDTH;
size_t mask = HASH_TRIE_BIT_MASK;
size_t base = HASH_TRIE_BASE_SHIFT;
size_t result = 1;

return result << ((h >> (l * width + base)) & mask);
return result << ((h >> (l * width)) & mask);
}

static inline int hash_trie_index(size_t m) {
#if RBX_SIZEOF_LONG == 8
native_int sk5 = 0x5555555555555555;
native_int sk3 = 0x3333333333333333;
native_int skf0 = 0x0F0F0F0F0F0F0F0F;
uint64_t sk5 = 0x5555555555555555;
uint64_t sk3 = 0x3333333333333333;
uint64_t skf0 = 0x0F0F0F0F0F0F0F0F;

m -= (m >> 1) & sk5;
m = (m & sk3) + ((m >> 2) & sk3);
m = (m & skf0) + ((m >> 4) & skf0);
m += m >> 8;
m += m >> 16;
m = (m + (m >> 32)) & 0xFF;
#else
native_int sk5 = 0x55555555;
native_int sk3 = 0x33333333;
native_int skf0 = 0xF0F0F0F;

m -= (m >> 1) & sk5;
m = (m & sk3) + ((m >> 2) & sk3);
m = (m & skf0) + ((m >> 4) & skf0);
m += m >> 8;
m = (m + (m >> 16)) & 0x3F;
#endif

return m;
}

Fixnum* System::vm_hash_trie_item_index(STATE, Fixnum* hash,
Fixnum* level, Integer* map)
{
size_t m = map->to_ulong();
size_t b = hash_trie_bit(hash, level);
uint64_t m = map->to_ulong();
uint64_t b = hash_trie_bit(hash, level);

if(m & b) {
return Fixnum::from(hash_trie_index((b - 1) & m));
@@ -1834,17 +1814,17 @@ namespace rubinius {
Integer* System::vm_hash_trie_set_bitmap(STATE, Fixnum* hash,
Fixnum* level, Integer* map)
{
size_t m = map->to_ulong();
size_t b = hash_trie_bit(hash, level);
uint64_t m = map->to_ulong();
uint64_t b = hash_trie_bit(hash, level);

return Integer::from(state, m | b);
}

Integer* System::vm_hash_trie_unset_bitmap(STATE, Fixnum* hash,
Fixnum* level, Integer* map)
{
size_t m = map->to_ulong();
size_t b = hash_trie_bit(hash, level);
uint64_t m = map->to_ulong();
uint64_t b = hash_trie_bit(hash, level);

return Integer::from(state, m & ~b);
}
8 changes: 8 additions & 0 deletions spec/ruby/core/hash/shift_spec.rb
Original file line number Diff line number Diff line change
@@ -26,6 +26,14 @@
h.shift.should == [h, nil]
end

it "preserves Hash invariants when removing the last item" do
h = new_hash(:a => 1, :b => 2)
h.shift.should == [:a, 1]
h.shift.should == [:b, 2]
h[:c] = 3
h.should == {:c => 3}
end

it "raises a RuntimeError if called on a frozen instance" do
lambda { HashSpecs.frozen_hash.shift }.should raise_error(RuntimeError)
lambda { HashSpecs.empty_frozen_hash.shift }.should raise_error(RuntimeError)

0 comments on commit 7bf377b

Please sign in to comment.