Skip to content

Commit

Permalink
Secure hash function (#5146)
Browse files Browse the repository at this point in the history
* add specialization for string(Bytes)

* use multiplicative HashDos proof function

it is multiplicative function resilent to HashDos given following
restriction is satisfied:
- neither seed nor hash-value is exposed to atacker.

* hasher: fix for tail reading

* hasher: rename `string(value: Bytes)` to `bytes(value)`

* Optimize Bytes#hash and keep slow Slice(T)#hash

* hasher: lets crystal+llvm decide about inlining.

* hasher: description of algorithm

* hasher: add primitive sanity spec for hasher

* hasher: fix a bit algorithm notes

* hasher: improve algorithm

After testing with SMHasher, and benchmarking, improve algorithm's quality:
- improve finalizer's quality. It becomes 2 cycles slower, but rare bias
  were fixed. In practice bias didn't matter much, because with random seed
  it appears occasionally. But it were detectable with zero seed.
- improve performance on short strings a bit.

* hasher: tool format

* hasher: fix naming

* hasher: micro sanity improvement.

* hasher: fix naming

* hasher: Add comments for `#bytes(value)` and `Slice(UInt8)#hash(hasher)`

* hasher: remove `bytes` comment back

* hasher: change test to deterministic. Separate algorithm implementaton.

* some formatting

* hasher: revert to monolith Crystal::Hasher

* hasher: hide internal state.

Just to prevent occasional disclosure of state, hide it in `def inspect`.
It will not prevent intentional disclosure, though, because state can be
easily inspected, and `to_s` could be redefined.

* hasher: s/THasher/TestHasher/
funny-falcon authored and RX14 committed Oct 30, 2017
1 parent 748afe5 commit fbab3a3
Showing 4 changed files with 358 additions and 12 deletions.
232 changes: 232 additions & 0 deletions spec/std/crystal/hasher_spec.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
require "spec"
require "bit_array"
require "random/secure"

struct Crystal::Hasher
def self.for_test
new(1_u64, 1_u64)
end
end

enum TestHasherEnum
A
B
end

alias TestHasher = Crystal::Hasher

describe "Crystal::Hasher" do
context "behavior" do
it "#nil should change hasher state" do
hasher = TestHasher.for_test
hasher1 = nil.hash(hasher)
hasher2 = nil.hash(hasher1)
hasher1.result.should_not eq(hasher.result)
hasher2.result.should_not eq(hasher.result)
hasher2.result.should_not eq(hasher1.result)
end

it "#bool should change state and differ" do
hasher = TestHasher.for_test
hasher_true = true.hash(hasher)
hasher_false = false.hash(hasher)
hasher.result.should_not eq(hasher_true.result)
hasher.result.should_not eq(hasher_false.result)
hasher_true.result.should_not eq(hasher_false.result)
end

it "#int should change state and differ" do
hasher = TestHasher.for_test
hasher1 = 1.hash(hasher)
hasher2 = 2.hash(hasher)
hasher12 = 2.hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end

it "#int should be equal for different types" do
1.hash.should eq(1_u64.hash)
2.hash.should eq(2_u64.hash)
end

it "#float should change state and differ" do
hasher = TestHasher.for_test
hasher1 = 1.0.hash(hasher)
hasher2 = 2.0.hash(hasher)
hasher12 = 2.0.hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end

it "#char should change state and differ" do
hasher = TestHasher.for_test
hasher1 = 'a'.hash(hasher)
hasher2 = 'b'.hash(hasher)
hasher12 = 'b'.hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end

it "#enum should change state and differ" do
hasher = TestHasher.for_test
hasher1 = TestHasherEnum::A.hash(hasher)
hasher2 = TestHasherEnum::B.hash(hasher)
hasher12 = TestHasherEnum::B.hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end

it "#symbol should change state and differ" do
hasher = TestHasher.for_test
hasher1 = :A.hash(hasher)
hasher2 = :B.hash(hasher)
hasher12 = :B.hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end

it "#reference should change state and differ" do
hasher = TestHasher.for_test
a, b = Reference.new, Reference.new
hasher1 = a.hash(hasher)
hasher2 = b.hash(hasher)
hasher12 = b.hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end

it "#string should change state and differ" do
hasher = TestHasher.for_test
hasher1 = "a".hash(hasher)
hasher2 = "b".hash(hasher)
hasher12 = "b".hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end

it "#class should change state and differ" do
hasher = TestHasher.for_test
hasher1 = TestHasher.hash(hasher)
hasher2 = TestHasherEnum.hash(hasher)
hasher12 = TestHasherEnum.hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end

it "#bytes should change state and differ" do
hasher = TestHasher.for_test
a = Bytes[1, 2, 3]
b = Bytes[2, 3, 4]
hasher1 = a.hash(hasher)
hasher2 = b.hash(hasher)
hasher12 = b.hash(hasher1)
[hasher, hasher1, hasher2, hasher12]
.map(&.result)
.combinations(2)
.each { |(a, b)| a.should_not eq(b) }
end
end

context "funny_hash" do
it "result should work" do
hasher = TestHasher.new(1_u64, 1_u64)
typeof(hasher.result).should eq(UInt64)
hasher.result.should eq(0x162c591a100060e5_u64)

hasher = TestHasher.new(1_u64, 2_u64)
hasher.result.should eq(0x7f8304f0947082d1_u64)

hasher = TestHasher.new(2_u64, 1_u64)
hasher.result.should eq(0xc302065c9b909fdf_u64)

hasher = TestHasher.new(0x123456789abcdef0_u64, 0x016fcd2b89e745a3_u64)
hasher.result.should eq(0x54258afe17b6a4bb_u64)

# "bad seed"
hasher = TestHasher.new(0_u64, 0_u64)
hasher.result.should eq(0_u64)
end

it "#nil should match test vectors" do
hasher = TestHasher.for_test
hasher1 = nil.hash(hasher)
hasher2 = nil.hash(hasher1)
hasher1.result.should eq(0x2c58b2332000c1cb_u64)
hasher2.result.should eq(0xef5ab89129b8991b_u64)
end

it "#bool should match test vectors" do
hasher = TestHasher.for_test
hasher_true = true.hash(hasher)
hasher_false = false.hash(hasher)
hasher_true.result.should eq(0x94e4534c12903881_u64)
hasher_false.result.should eq(0x15cf71e56618745b_u64)
end

it "#int should match test vectors" do
hasher = TestHasher.for_test
hasher1 = 1.hash(hasher)
hasher2 = 2.hash(hasher)
hasher1.result.should eq(0x94e4534c12903881_u64)
hasher2.result.should eq(0xaf42909720d981e7_u64)
end

it "#float should match test vectors" do
hasher = TestHasher.for_test
hasher1 = 1.0.hash(hasher)
hasher2 = 2.0.hash(hasher)
hasher1.result.should eq(0xecfbe7798e8f67f2_u64)
hasher2.result.should eq(0x72847386c9572c30_u64)
end

it "#string should match test vectors" do
hasher = TestHasher.for_test
hasher0 = "".hash(hasher)
hasher1 = "1".hash(hasher)
hasher2 = "2.0".hash(hasher)
hasher0.result.should eq(0x15cf71e56618745b_u64)
hasher1.result.should eq(0x70ef623beff8c213_u64)
hasher2.result.should eq(0x2908fdd2bb81fbed_u64)
end
end

describe "to_s" do
it "should not expose internal data" do
hasher = TestHasher.new(1_u64, 2_u64)
hasher.to_s.should_not contain('1')
hasher.to_s.should_not contain(hasher.@a.to_s)
hasher.to_s.should_not contain(hasher.@a.to_s(16))
hasher.to_s.should_not contain('2')
hasher.to_s.should_not contain(hasher.@b.to_s)
hasher.to_s.should_not contain(hasher.@b.to_s(16))
end
end

describe "inspect" do
it "should not expose internal data" do
hasher = TestHasher.new(1_u64, 2_u64)
hasher.inspect.should_not contain('1')
hasher.inspect.should_not contain(hasher.@a.to_s)
hasher.inspect.should_not contain(hasher.@a.to_s(16))
hasher.inspect.should_not contain('2')
hasher.inspect.should_not contain(hasher.@b.to_s)
hasher.inspect.should_not contain(hasher.@b.to_s(16))
end
end
end
8 changes: 8 additions & 0 deletions spec/std/slice_spec.cr
Original file line number Diff line number Diff line change
@@ -404,6 +404,14 @@ describe "Slice" do
slice = Bytes[1, 2, 3, read_only: true]
expect_raises(Exception, "Can't write to read-only Slice") { slice[0] = 0_u8 }
end

it "hashes each item in collection" do
Slice[1, 2, 3].hash.should eq(Slice[1_u64, 2_u64, 3_u64].hash)
end

it "optimizes hash for Bytes" do
Bytes[1, 2, 3].hash.should_not eq(Slice[1, 2, 3].hash)
end
end

private def itself(*args)
121 changes: 109 additions & 12 deletions src/crystal/hasher.cr
Original file line number Diff line number Diff line change
@@ -6,15 +6,68 @@ struct Crystal::Hasher
# value for primitive and basic Crystal objects. All other
# hashes are computed based on these.
#
# TODO: the implementation is naive and should probably use
# another algorithm like SipHash or FNV.
# The algorithm bases on https://github.com/funny-falcon/funny_hash
#
# It is two multiply-rotate 64bit hash functions, combined
# within finalizer.
#
# Both hash functions combines previous state with a block value
# before multiplication. One function multiplies new state
# as is (and then rotates state), other multiplies new state
# already rotated by 32 bits.
#
# This way algorithm ensures that every block bit affects at
# least 1 bit of every state, and certainly many bits of some
# state. So effect of this bit could not be easily canceled
# with following blocks. (Cause next blocks have to cancel
# bits on non-intersecting positions in both states).
# Rotation by 32bit with multiplication also provides good
# inter-block avalanche.
#
# Finalizer performs murmur-like finalization on both functions,
# and then combines them with addition. It greatly reduce
# possibility of state deduction.
#
# Note, it provides good protection from HashDos if and only if:
# - seed is securely random and not exposed to attacker,
# - hash result is also not exposed to attacker in a way other
# than effect of using it in Hash implementation.
# Do not output calculated hash value to user's console/form/
# html/api response, etc. Use some from digest package instead.

@@seed = uninitialized UInt64[2]
Random::Secure.random_bytes(Slice.new(pointerof(@@seed).as(UInt8*), sizeof(typeof(@@seed))))

def initialize(@a : UInt64 = @@seed[0], @b : UInt64 = @@seed[1])
end

private C1 = 0xacd5ad43274593b9_u64
private C2 = 0x6956abd6ed268a3d_u64

@@seed = uninitialized UInt64
Random::Secure.random_bytes(Slice.new(pointerof(@@seed).as(UInt8*), 8))
private def rotl32(v : UInt64)
(v << 32) | (v >> 32)
end

property result : UInt64 = @@seed
private def permute(v : UInt64)
@a = rotl32(@a ^ v) * C1
@b = (rotl32(@b) ^ v) * C2
self
end

def result
a, b = @a, @b
a ^= (a >> 23) ^ (a >> 40)
b ^= (b >> 23) ^ (b >> 40)
a *= C1
b *= C2
a ^= a >> 32
b ^= b >> 32
a + b
end

def nil
@a += @b
@b += 1
self
end

@@ -23,13 +76,11 @@ struct Crystal::Hasher
end

def int(value)
@result = @result * 31 + value.to_u64
self
permute(value.to_u64)
end

def float(value)
@result = @result * 31 + value.to_f64.unsafe_as(UInt64)
self
permute(value.to_f64.unsafe_as(UInt64))
end

def char(value)
@@ -45,15 +96,61 @@ struct Crystal::Hasher
end

def reference(value)
@result = @result * 31 + value.object_id.to_u64
self
permute(value.object_id.to_u64)
end

def string(value)
value.to_slice.hash(self)
bytes(value.to_slice)
end

private def read_u24(ptr, rest)
ptr[0].to_u64 | (ptr[rest/2].to_u64 << 8) | (ptr[rest - 1].to_u64 << 16)
end

private def read_u32(ptr)
# force correct unaligned read
t4 = uninitialized UInt32
pointerof(t4).as(UInt8*).copy_from(ptr, 4)
t4.to_u64
end

private def read_u64(ptr)
# force correct unaligned read
t8 = uninitialized UInt64
pointerof(t8).as(UInt8*).copy_from(ptr, 8)
t8
end

def bytes(value : Bytes)
size = value.size
ptr = value.to_unsafe
if size <= 0
last = 0_u64
elsif size <= 3
last = read_u24(ptr, size)
elsif size <= 7
last = read_u32(ptr)
last |= read_u32(ptr + (size & 3)) << 32
else
while size >= 8
permute(read_u64(ptr))
ptr += 8
size -= 8
end
last = read_u64(ptr - (8 - size))
end
@a ^= size
@b ^= size
permute(last)
self
end

def class(value)
value.crystal_type_id.hash(self)
end

def inspect(io)
io << "#{self.class}(hidden_state)"
nil
end
end
9 changes: 9 additions & 0 deletions src/slice.cr
Original file line number Diff line number Diff line change
@@ -533,6 +533,15 @@ struct Slice(T)
nil
end

# See `Object#hash(hasher)`
def hash(hasher)
{% if T == UInt8 %}
hasher.bytes(self)
{% else %}
super hasher
{% end %}
end

protected def check_writable
raise "Can't write to read-only Slice" if @read_only
end

0 comments on commit fbab3a3

Please sign in to comment.