Skip to content

Commit fbab3a3

Browse files
funny-falconRX14
authored andcommittedOct 30, 2017
Secure hash function (#5146)
* add specialization for string(Bytes) * use multiplicative HashDos proof function it is multiplicative function resilent to HashDos given following restriction is satisfied: - neither seed nor hash-value is exposed to atacker. * hasher: fix for tail reading * hasher: rename `string(value: Bytes)` to `bytes(value)` * Optimize Bytes#hash and keep slow Slice(T)#hash * hasher: lets crystal+llvm decide about inlining. * hasher: description of algorithm * hasher: add primitive sanity spec for hasher * hasher: fix a bit algorithm notes * hasher: improve algorithm After testing with SMHasher, and benchmarking, improve algorithm's quality: - improve finalizer's quality. It becomes 2 cycles slower, but rare bias were fixed. In practice bias didn't matter much, because with random seed it appears occasionally. But it were detectable with zero seed. - improve performance on short strings a bit. * hasher: tool format * hasher: fix naming * hasher: micro sanity improvement. * hasher: fix naming * hasher: Add comments for `#bytes(value)` and `Slice(UInt8)#hash(hasher)` * hasher: remove `bytes` comment back * hasher: change test to deterministic. Separate algorithm implementaton. * some formatting * hasher: revert to monolith Crystal::Hasher * hasher: hide internal state. Just to prevent occasional disclosure of state, hide it in `def inspect`. It will not prevent intentional disclosure, though, because state can be easily inspected, and `to_s` could be redefined. * hasher: s/THasher/TestHasher/
1 parent 748afe5 commit fbab3a3

File tree

4 files changed

+358
-12
lines changed

4 files changed

+358
-12
lines changed
 

Diff for: ‎spec/std/crystal/hasher_spec.cr

+232
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
require "spec"
2+
require "bit_array"
3+
require "random/secure"
4+
5+
struct Crystal::Hasher
6+
def self.for_test
7+
new(1_u64, 1_u64)
8+
end
9+
end
10+
11+
enum TestHasherEnum
12+
A
13+
B
14+
end
15+
16+
alias TestHasher = Crystal::Hasher
17+
18+
describe "Crystal::Hasher" do
19+
context "behavior" do
20+
it "#nil should change hasher state" do
21+
hasher = TestHasher.for_test
22+
hasher1 = nil.hash(hasher)
23+
hasher2 = nil.hash(hasher1)
24+
hasher1.result.should_not eq(hasher.result)
25+
hasher2.result.should_not eq(hasher.result)
26+
hasher2.result.should_not eq(hasher1.result)
27+
end
28+
29+
it "#bool should change state and differ" do
30+
hasher = TestHasher.for_test
31+
hasher_true = true.hash(hasher)
32+
hasher_false = false.hash(hasher)
33+
hasher.result.should_not eq(hasher_true.result)
34+
hasher.result.should_not eq(hasher_false.result)
35+
hasher_true.result.should_not eq(hasher_false.result)
36+
end
37+
38+
it "#int should change state and differ" do
39+
hasher = TestHasher.for_test
40+
hasher1 = 1.hash(hasher)
41+
hasher2 = 2.hash(hasher)
42+
hasher12 = 2.hash(hasher1)
43+
[hasher, hasher1, hasher2, hasher12]
44+
.map(&.result)
45+
.combinations(2)
46+
.each { |(a, b)| a.should_not eq(b) }
47+
end
48+
49+
it "#int should be equal for different types" do
50+
1.hash.should eq(1_u64.hash)
51+
2.hash.should eq(2_u64.hash)
52+
end
53+
54+
it "#float should change state and differ" do
55+
hasher = TestHasher.for_test
56+
hasher1 = 1.0.hash(hasher)
57+
hasher2 = 2.0.hash(hasher)
58+
hasher12 = 2.0.hash(hasher1)
59+
[hasher, hasher1, hasher2, hasher12]
60+
.map(&.result)
61+
.combinations(2)
62+
.each { |(a, b)| a.should_not eq(b) }
63+
end
64+
65+
it "#char should change state and differ" do
66+
hasher = TestHasher.for_test
67+
hasher1 = 'a'.hash(hasher)
68+
hasher2 = 'b'.hash(hasher)
69+
hasher12 = 'b'.hash(hasher1)
70+
[hasher, hasher1, hasher2, hasher12]
71+
.map(&.result)
72+
.combinations(2)
73+
.each { |(a, b)| a.should_not eq(b) }
74+
end
75+
76+
it "#enum should change state and differ" do
77+
hasher = TestHasher.for_test
78+
hasher1 = TestHasherEnum::A.hash(hasher)
79+
hasher2 = TestHasherEnum::B.hash(hasher)
80+
hasher12 = TestHasherEnum::B.hash(hasher1)
81+
[hasher, hasher1, hasher2, hasher12]
82+
.map(&.result)
83+
.combinations(2)
84+
.each { |(a, b)| a.should_not eq(b) }
85+
end
86+
87+
it "#symbol should change state and differ" do
88+
hasher = TestHasher.for_test
89+
hasher1 = :A.hash(hasher)
90+
hasher2 = :B.hash(hasher)
91+
hasher12 = :B.hash(hasher1)
92+
[hasher, hasher1, hasher2, hasher12]
93+
.map(&.result)
94+
.combinations(2)
95+
.each { |(a, b)| a.should_not eq(b) }
96+
end
97+
98+
it "#reference should change state and differ" do
99+
hasher = TestHasher.for_test
100+
a, b = Reference.new, Reference.new
101+
hasher1 = a.hash(hasher)
102+
hasher2 = b.hash(hasher)
103+
hasher12 = b.hash(hasher1)
104+
[hasher, hasher1, hasher2, hasher12]
105+
.map(&.result)
106+
.combinations(2)
107+
.each { |(a, b)| a.should_not eq(b) }
108+
end
109+
110+
it "#string should change state and differ" do
111+
hasher = TestHasher.for_test
112+
hasher1 = "a".hash(hasher)
113+
hasher2 = "b".hash(hasher)
114+
hasher12 = "b".hash(hasher1)
115+
[hasher, hasher1, hasher2, hasher12]
116+
.map(&.result)
117+
.combinations(2)
118+
.each { |(a, b)| a.should_not eq(b) }
119+
end
120+
121+
it "#class should change state and differ" do
122+
hasher = TestHasher.for_test
123+
hasher1 = TestHasher.hash(hasher)
124+
hasher2 = TestHasherEnum.hash(hasher)
125+
hasher12 = TestHasherEnum.hash(hasher1)
126+
[hasher, hasher1, hasher2, hasher12]
127+
.map(&.result)
128+
.combinations(2)
129+
.each { |(a, b)| a.should_not eq(b) }
130+
end
131+
132+
it "#bytes should change state and differ" do
133+
hasher = TestHasher.for_test
134+
a = Bytes[1, 2, 3]
135+
b = Bytes[2, 3, 4]
136+
hasher1 = a.hash(hasher)
137+
hasher2 = b.hash(hasher)
138+
hasher12 = b.hash(hasher1)
139+
[hasher, hasher1, hasher2, hasher12]
140+
.map(&.result)
141+
.combinations(2)
142+
.each { |(a, b)| a.should_not eq(b) }
143+
end
144+
end
145+
146+
context "funny_hash" do
147+
it "result should work" do
148+
hasher = TestHasher.new(1_u64, 1_u64)
149+
typeof(hasher.result).should eq(UInt64)
150+
hasher.result.should eq(0x162c591a100060e5_u64)
151+
152+
hasher = TestHasher.new(1_u64, 2_u64)
153+
hasher.result.should eq(0x7f8304f0947082d1_u64)
154+
155+
hasher = TestHasher.new(2_u64, 1_u64)
156+
hasher.result.should eq(0xc302065c9b909fdf_u64)
157+
158+
hasher = TestHasher.new(0x123456789abcdef0_u64, 0x016fcd2b89e745a3_u64)
159+
hasher.result.should eq(0x54258afe17b6a4bb_u64)
160+
161+
# "bad seed"
162+
hasher = TestHasher.new(0_u64, 0_u64)
163+
hasher.result.should eq(0_u64)
164+
end
165+
166+
it "#nil should match test vectors" do
167+
hasher = TestHasher.for_test
168+
hasher1 = nil.hash(hasher)
169+
hasher2 = nil.hash(hasher1)
170+
hasher1.result.should eq(0x2c58b2332000c1cb_u64)
171+
hasher2.result.should eq(0xef5ab89129b8991b_u64)
172+
end
173+
174+
it "#bool should match test vectors" do
175+
hasher = TestHasher.for_test
176+
hasher_true = true.hash(hasher)
177+
hasher_false = false.hash(hasher)
178+
hasher_true.result.should eq(0x94e4534c12903881_u64)
179+
hasher_false.result.should eq(0x15cf71e56618745b_u64)
180+
end
181+
182+
it "#int should match test vectors" do
183+
hasher = TestHasher.for_test
184+
hasher1 = 1.hash(hasher)
185+
hasher2 = 2.hash(hasher)
186+
hasher1.result.should eq(0x94e4534c12903881_u64)
187+
hasher2.result.should eq(0xaf42909720d981e7_u64)
188+
end
189+
190+
it "#float should match test vectors" do
191+
hasher = TestHasher.for_test
192+
hasher1 = 1.0.hash(hasher)
193+
hasher2 = 2.0.hash(hasher)
194+
hasher1.result.should eq(0xecfbe7798e8f67f2_u64)
195+
hasher2.result.should eq(0x72847386c9572c30_u64)
196+
end
197+
198+
it "#string should match test vectors" do
199+
hasher = TestHasher.for_test
200+
hasher0 = "".hash(hasher)
201+
hasher1 = "1".hash(hasher)
202+
hasher2 = "2.0".hash(hasher)
203+
hasher0.result.should eq(0x15cf71e56618745b_u64)
204+
hasher1.result.should eq(0x70ef623beff8c213_u64)
205+
hasher2.result.should eq(0x2908fdd2bb81fbed_u64)
206+
end
207+
end
208+
209+
describe "to_s" do
210+
it "should not expose internal data" do
211+
hasher = TestHasher.new(1_u64, 2_u64)
212+
hasher.to_s.should_not contain('1')
213+
hasher.to_s.should_not contain(hasher.@a.to_s)
214+
hasher.to_s.should_not contain(hasher.@a.to_s(16))
215+
hasher.to_s.should_not contain('2')
216+
hasher.to_s.should_not contain(hasher.@b.to_s)
217+
hasher.to_s.should_not contain(hasher.@b.to_s(16))
218+
end
219+
end
220+
221+
describe "inspect" do
222+
it "should not expose internal data" do
223+
hasher = TestHasher.new(1_u64, 2_u64)
224+
hasher.inspect.should_not contain('1')
225+
hasher.inspect.should_not contain(hasher.@a.to_s)
226+
hasher.inspect.should_not contain(hasher.@a.to_s(16))
227+
hasher.inspect.should_not contain('2')
228+
hasher.inspect.should_not contain(hasher.@b.to_s)
229+
hasher.inspect.should_not contain(hasher.@b.to_s(16))
230+
end
231+
end
232+
end

Diff for: ‎spec/std/slice_spec.cr

+8
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,14 @@ describe "Slice" do
404404
slice = Bytes[1, 2, 3, read_only: true]
405405
expect_raises(Exception, "Can't write to read-only Slice") { slice[0] = 0_u8 }
406406
end
407+
408+
it "hashes each item in collection" do
409+
Slice[1, 2, 3].hash.should eq(Slice[1_u64, 2_u64, 3_u64].hash)
410+
end
411+
412+
it "optimizes hash for Bytes" do
413+
Bytes[1, 2, 3].hash.should_not eq(Slice[1, 2, 3].hash)
414+
end
407415
end
408416

409417
private def itself(*args)

Diff for: ‎src/crystal/hasher.cr

+109-12
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,68 @@ struct Crystal::Hasher
66
# value for primitive and basic Crystal objects. All other
77
# hashes are computed based on these.
88
#
9-
# TODO: the implementation is naive and should probably use
10-
# another algorithm like SipHash or FNV.
9+
# The algorithm bases on https://github.com/funny-falcon/funny_hash
10+
#
11+
# It is two multiply-rotate 64bit hash functions, combined
12+
# within finalizer.
13+
#
14+
# Both hash functions combines previous state with a block value
15+
# before multiplication. One function multiplies new state
16+
# as is (and then rotates state), other multiplies new state
17+
# already rotated by 32 bits.
18+
#
19+
# This way algorithm ensures that every block bit affects at
20+
# least 1 bit of every state, and certainly many bits of some
21+
# state. So effect of this bit could not be easily canceled
22+
# with following blocks. (Cause next blocks have to cancel
23+
# bits on non-intersecting positions in both states).
24+
# Rotation by 32bit with multiplication also provides good
25+
# inter-block avalanche.
26+
#
27+
# Finalizer performs murmur-like finalization on both functions,
28+
# and then combines them with addition. It greatly reduce
29+
# possibility of state deduction.
30+
#
31+
# Note, it provides good protection from HashDos if and only if:
32+
# - seed is securely random and not exposed to attacker,
33+
# - hash result is also not exposed to attacker in a way other
34+
# than effect of using it in Hash implementation.
35+
# Do not output calculated hash value to user's console/form/
36+
# html/api response, etc. Use some from digest package instead.
37+
38+
@@seed = uninitialized UInt64[2]
39+
Random::Secure.random_bytes(Slice.new(pointerof(@@seed).as(UInt8*), sizeof(typeof(@@seed))))
40+
41+
def initialize(@a : UInt64 = @@seed[0], @b : UInt64 = @@seed[1])
42+
end
43+
44+
private C1 = 0xacd5ad43274593b9_u64
45+
private C2 = 0x6956abd6ed268a3d_u64
1146

12-
@@seed = uninitialized UInt64
13-
Random::Secure.random_bytes(Slice.new(pointerof(@@seed).as(UInt8*), 8))
47+
private def rotl32(v : UInt64)
48+
(v << 32) | (v >> 32)
49+
end
1450

15-
property result : UInt64 = @@seed
51+
private def permute(v : UInt64)
52+
@a = rotl32(@a ^ v) * C1
53+
@b = (rotl32(@b) ^ v) * C2
54+
self
55+
end
56+
57+
def result
58+
a, b = @a, @b
59+
a ^= (a >> 23) ^ (a >> 40)
60+
b ^= (b >> 23) ^ (b >> 40)
61+
a *= C1
62+
b *= C2
63+
a ^= a >> 32
64+
b ^= b >> 32
65+
a + b
66+
end
1667

1768
def nil
69+
@a += @b
70+
@b += 1
1871
self
1972
end
2073

@@ -23,13 +76,11 @@ struct Crystal::Hasher
2376
end
2477

2578
def int(value)
26-
@result = @result * 31 + value.to_u64
27-
self
79+
permute(value.to_u64)
2880
end
2981

3082
def float(value)
31-
@result = @result * 31 + value.to_f64.unsafe_as(UInt64)
32-
self
83+
permute(value.to_f64.unsafe_as(UInt64))
3384
end
3485

3586
def char(value)
@@ -45,15 +96,61 @@ struct Crystal::Hasher
4596
end
4697

4798
def reference(value)
48-
@result = @result * 31 + value.object_id.to_u64
49-
self
99+
permute(value.object_id.to_u64)
50100
end
51101

52102
def string(value)
53-
value.to_slice.hash(self)
103+
bytes(value.to_slice)
104+
end
105+
106+
private def read_u24(ptr, rest)
107+
ptr[0].to_u64 | (ptr[rest/2].to_u64 << 8) | (ptr[rest - 1].to_u64 << 16)
108+
end
109+
110+
private def read_u32(ptr)
111+
# force correct unaligned read
112+
t4 = uninitialized UInt32
113+
pointerof(t4).as(UInt8*).copy_from(ptr, 4)
114+
t4.to_u64
115+
end
116+
117+
private def read_u64(ptr)
118+
# force correct unaligned read
119+
t8 = uninitialized UInt64
120+
pointerof(t8).as(UInt8*).copy_from(ptr, 8)
121+
t8
122+
end
123+
124+
def bytes(value : Bytes)
125+
size = value.size
126+
ptr = value.to_unsafe
127+
if size <= 0
128+
last = 0_u64
129+
elsif size <= 3
130+
last = read_u24(ptr, size)
131+
elsif size <= 7
132+
last = read_u32(ptr)
133+
last |= read_u32(ptr + (size & 3)) << 32
134+
else
135+
while size >= 8
136+
permute(read_u64(ptr))
137+
ptr += 8
138+
size -= 8
139+
end
140+
last = read_u64(ptr - (8 - size))
141+
end
142+
@a ^= size
143+
@b ^= size
144+
permute(last)
145+
self
54146
end
55147

56148
def class(value)
57149
value.crystal_type_id.hash(self)
58150
end
151+
152+
def inspect(io)
153+
io << "#{self.class}(hidden_state)"
154+
nil
155+
end
59156
end

Diff for: ‎src/slice.cr

+9
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,15 @@ struct Slice(T)
533533
nil
534534
end
535535

536+
# See `Object#hash(hasher)`
537+
def hash(hasher)
538+
{% if T == UInt8 %}
539+
hasher.bytes(self)
540+
{% else %}
541+
super hasher
542+
{% end %}
543+
end
544+
536545
protected def check_writable
537546
raise "Can't write to read-only Slice" if @read_only
538547
end

0 commit comments

Comments
 (0)
Please sign in to comment.