Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #900 from zig-lang/hash-and-checksums
Add common hash/checksum functions
- Loading branch information
Showing
7 changed files
with
701 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// Adler32 checksum. | ||
// | ||
// https://tools.ietf.org/html/rfc1950#section-9 | ||
// https://github.com/madler/zlib/blob/master/adler32.c | ||
|
||
const std = @import("../index.zig"); | ||
const debug = std.debug; | ||
|
||
pub const Adler32 = struct { | ||
const base = 65521; | ||
const nmax = 5552; | ||
|
||
adler: u32, | ||
|
||
pub fn init() Adler32 { | ||
return Adler32 { | ||
.adler = 1, | ||
}; | ||
} | ||
|
||
// This fast variant is taken from zlib. It reduces the required modulos and unrolls longer | ||
// buffer inputs and should be much quicker. | ||
pub fn update(self: &Adler32, input: []const u8) void { | ||
var s1 = self.adler & 0xffff; | ||
var s2 = (self.adler >> 16) & 0xffff; | ||
|
||
if (input.len == 1) { | ||
s1 +%= input[0]; | ||
if (s1 >= base) { | ||
s1 -= base; | ||
} | ||
s2 +%= s1; | ||
if (s2 >= base) { | ||
s2 -= base; | ||
} | ||
} | ||
else if (input.len < 16) { | ||
for (input) |b| { | ||
s1 +%= b; | ||
s2 +%= s1; | ||
} | ||
if (s1 >= base) { | ||
s1 -= base; | ||
} | ||
|
||
s2 %= base; | ||
} | ||
else { | ||
var i: usize = 0; | ||
while (i + nmax <= input.len) : (i += nmax) { | ||
const n = nmax / 16; // note: 16 | nmax | ||
|
||
var rounds: usize = 0; | ||
while (rounds < n) : (rounds += 1) { | ||
comptime var j: usize = 0; | ||
inline while (j < 16) : (j += 1) { | ||
s1 +%= input[i + n * j]; | ||
s2 +%= s1; | ||
} | ||
} | ||
} | ||
|
||
if (i < input.len) { | ||
while (i + 16 <= input.len) : (i += 16) { | ||
comptime var j: usize = 0; | ||
inline while (j < 16) : (j += 1) { | ||
s1 +%= input[i + j]; | ||
s2 +%= s1; | ||
} | ||
} | ||
while (i < input.len) : (i += 1) { | ||
s1 +%= input[i]; | ||
s2 +%= s1; | ||
} | ||
|
||
s1 %= base; | ||
s2 %= base; | ||
} | ||
} | ||
|
||
self.adler = s1 | (s2 << 16); | ||
} | ||
|
||
pub fn final(self: &Adler32) u32 { | ||
return self.adler; | ||
} | ||
|
||
pub fn hash(input: []const u8) u32 { | ||
var c = Adler32.init(); | ||
c.update(input); | ||
return c.final(); | ||
} | ||
}; | ||
|
||
test "adler32 sanity" { | ||
debug.assert(Adler32.hash("a") == 0x620062); | ||
debug.assert(Adler32.hash("example") == 0xbc002ed); | ||
} | ||
|
||
test "adler32 long" { | ||
const long1 = []u8 {1} ** 1024; | ||
debug.assert(Adler32.hash(long1[0..]) == 0x06780401); | ||
|
||
const long2 = []u8 {1} ** 1025; | ||
debug.assert(Adler32.hash(long2[0..]) == 0x0a7a0402); | ||
} | ||
|
||
test "adler32 very long" { | ||
const long = []u8 {1} ** 5553; | ||
debug.assert(Adler32.hash(long[0..]) == 0x707f15b2); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
// There are two implementations of CRC32 implemented with the following key characteristics: | ||
// | ||
// - Crc32WithPoly uses 8Kb of tables but is ~10x faster than the small method. | ||
// | ||
// - Crc32SmallWithPoly uses only 64 bytes of memory but is slower. Be aware that this is | ||
// still moderately fast just slow relative to the slicing approach. | ||
|
||
const std = @import("../index.zig"); | ||
const debug = std.debug; | ||
|
||
pub const Polynomial = struct { | ||
const IEEE = 0xedb88320; | ||
const Castagnoli = 0x82f63b78; | ||
const Koopman = 0xeb31d82e; | ||
}; | ||
|
||
// IEEE is by far the most common CRC and so is aliased by default. | ||
pub const Crc32 = Crc32WithPoly(Polynomial.IEEE); | ||
|
||
// slicing-by-8 crc32 implementation. | ||
pub fn Crc32WithPoly(comptime poly: u32) type { | ||
return struct { | ||
const Self = this; | ||
const lookup_tables = comptime block: { | ||
@setEvalBranchQuota(20000); | ||
var tables: [8][256]u32 = undefined; | ||
|
||
for (tables[0]) |*e, i| { | ||
var crc = u32(i); | ||
var j: usize = 0; while (j < 8) : (j += 1) { | ||
if (crc & 1 == 1) { | ||
crc = (crc >> 1) ^ poly; | ||
} else { | ||
crc = (crc >> 1); | ||
} | ||
} | ||
*e = crc; | ||
} | ||
|
||
var i: usize = 0; | ||
while (i < 256) : (i += 1) { | ||
var crc = tables[0][i]; | ||
var j: usize = 1; while (j < 8) : (j += 1) { | ||
const index = @truncate(u8, crc); | ||
crc = tables[0][index] ^ (crc >> 8); | ||
tables[j][i] = crc; | ||
} | ||
} | ||
|
||
break :block tables; | ||
}; | ||
|
||
crc: u32, | ||
|
||
pub fn init() Self { | ||
return Self { | ||
.crc = 0xffffffff, | ||
}; | ||
} | ||
|
||
pub fn update(self: &Self, input: []const u8) void { | ||
var i: usize = 0; | ||
while (i + 8 <= input.len) : (i += 8) { | ||
const p = input[i..i+8]; | ||
|
||
// Unrolling this way gives ~50Mb/s increase | ||
self.crc ^= (u32(p[0]) << 0); | ||
self.crc ^= (u32(p[1]) << 8); | ||
self.crc ^= (u32(p[2]) << 16); | ||
self.crc ^= (u32(p[3]) << 24); | ||
|
||
self.crc = | ||
lookup_tables[0][p[7]] ^ | ||
lookup_tables[1][p[6]] ^ | ||
lookup_tables[2][p[5]] ^ | ||
lookup_tables[3][p[4]] ^ | ||
lookup_tables[4][@truncate(u8, self.crc >> 24)] ^ | ||
lookup_tables[5][@truncate(u8, self.crc >> 16)] ^ | ||
lookup_tables[6][@truncate(u8, self.crc >> 8)] ^ | ||
lookup_tables[7][@truncate(u8, self.crc >> 0)]; | ||
} | ||
|
||
while (i < input.len) : (i += 1) { | ||
const index = @truncate(u8, self.crc) ^ input[i]; | ||
self.crc = (self.crc >> 8) ^ lookup_tables[0][index]; | ||
} | ||
} | ||
|
||
pub fn final(self: &Self) u32 { | ||
return ~self.crc; | ||
} | ||
|
||
pub fn hash(input: []const u8) u32 { | ||
var c = Self.init(); | ||
c.update(input); | ||
return c.final(); | ||
} | ||
}; | ||
} | ||
|
||
test "crc32 ieee" { | ||
const Crc32Ieee = Crc32WithPoly(Polynomial.IEEE); | ||
|
||
debug.assert(Crc32Ieee.hash("") == 0x00000000); | ||
debug.assert(Crc32Ieee.hash("a") == 0xe8b7be43); | ||
debug.assert(Crc32Ieee.hash("abc") == 0x352441c2); | ||
} | ||
|
||
test "crc32 castagnoli" { | ||
const Crc32Castagnoli = Crc32WithPoly(Polynomial.Castagnoli); | ||
|
||
debug.assert(Crc32Castagnoli.hash("") == 0x00000000); | ||
debug.assert(Crc32Castagnoli.hash("a") == 0xc1d04330); | ||
debug.assert(Crc32Castagnoli.hash("abc") == 0x364b3fb7); | ||
} | ||
|
||
// half-byte lookup table implementation. | ||
pub fn Crc32SmallWithPoly(comptime poly: u32) type { | ||
return struct { | ||
const Self = this; | ||
const lookup_table = comptime block: { | ||
var table: [16]u32 = undefined; | ||
|
||
for (table) |*e, i| { | ||
var crc = u32(i * 16); | ||
var j: usize = 0; while (j < 8) : (j += 1) { | ||
if (crc & 1 == 1) { | ||
crc = (crc >> 1) ^ poly; | ||
} else { | ||
crc = (crc >> 1); | ||
} | ||
} | ||
*e = crc; | ||
} | ||
|
||
break :block table; | ||
}; | ||
|
||
crc: u32, | ||
|
||
pub fn init() Self { | ||
return Self { | ||
.crc = 0xffffffff, | ||
}; | ||
} | ||
|
||
pub fn update(self: &Self, input: []const u8) void { | ||
for (input) |b| { | ||
self.crc = lookup_table[@truncate(u4, self.crc ^ (b >> 0))] ^ (self.crc >> 4); | ||
self.crc = lookup_table[@truncate(u4, self.crc ^ (b >> 4))] ^ (self.crc >> 4); | ||
} | ||
} | ||
|
||
pub fn final(self: &Self) u32 { | ||
return ~self.crc; | ||
} | ||
|
||
pub fn hash(input: []const u8) u32 { | ||
var c = Self.init(); | ||
c.update(input); | ||
return c.final(); | ||
} | ||
}; | ||
} | ||
|
||
test "small crc32 ieee" { | ||
const Crc32Ieee = Crc32SmallWithPoly(Polynomial.IEEE); | ||
|
||
debug.assert(Crc32Ieee.hash("") == 0x00000000); | ||
debug.assert(Crc32Ieee.hash("a") == 0xe8b7be43); | ||
debug.assert(Crc32Ieee.hash("abc") == 0x352441c2); | ||
} | ||
|
||
test "small crc32 castagnoli" { | ||
const Crc32Castagnoli = Crc32SmallWithPoly(Polynomial.Castagnoli); | ||
|
||
debug.assert(Crc32Castagnoli.hash("") == 0x00000000); | ||
debug.assert(Crc32Castagnoli.hash("a") == 0xc1d04330); | ||
debug.assert(Crc32Castagnoli.hash("abc") == 0x364b3fb7); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// FNV1a - Fowler-Noll-Vo hash function | ||
// | ||
// FNV1a is a fast, non-cryptographic hash function with fairly good distribution properties. | ||
// | ||
// https://tools.ietf.org/html/draft-eastlake-fnv-14 | ||
|
||
const std = @import("../index.zig"); | ||
const debug = std.debug; | ||
|
||
pub const Fnv1a_32 = Fnv1a(u32, 0x01000193 , 0x811c9dc5); | ||
pub const Fnv1a_64 = Fnv1a(u64, 0x100000001b3, 0xcbf29ce484222325); | ||
pub const Fnv1a_128 = Fnv1a(u128, 0x1000000000000000000013b, 0x6c62272e07bb014262b821756295c58d); | ||
|
||
fn Fnv1a(comptime T: type, comptime prime: T, comptime offset: T) type { | ||
return struct { | ||
const Self = this; | ||
|
||
value: T, | ||
|
||
pub fn init() Self { | ||
return Self { | ||
.value = offset, | ||
}; | ||
} | ||
|
||
pub fn update(self: &Self, input: []const u8) void { | ||
for (input) |b| { | ||
self.value ^= b; | ||
self.value *%= prime; | ||
} | ||
} | ||
|
||
pub fn final(self: &Self) T { | ||
return self.value; | ||
} | ||
|
||
pub fn hash(input: []const u8) T { | ||
var c = Self.init(); | ||
c.update(input); | ||
return c.final(); | ||
} | ||
}; | ||
} | ||
|
||
test "fnv1a-32" { | ||
debug.assert(Fnv1a_32.hash("") == 0x811c9dc5); | ||
debug.assert(Fnv1a_32.hash("a") == 0xe40c292c); | ||
debug.assert(Fnv1a_32.hash("foobar") == 0xbf9cf968); | ||
} | ||
|
||
test "fnv1a-64" { | ||
debug.assert(Fnv1a_64.hash("") == 0xcbf29ce484222325); | ||
debug.assert(Fnv1a_64.hash("a") == 0xaf63dc4c8601ec8c); | ||
debug.assert(Fnv1a_64.hash("foobar") == 0x85944171f73967e8); | ||
} | ||
|
||
test "fnv1a-128" { | ||
debug.assert(Fnv1a_128.hash("") == 0x6c62272e07bb014262b821756295c58d); | ||
debug.assert(Fnv1a_128.hash("a") == 0xd228cb696f1a8caf78912b704e4a8964); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
const adler = @import("adler.zig"); | ||
pub const Adler32 = adler.Adler32; | ||
|
||
// pub for polynomials + generic crc32 construction | ||
pub const crc = @import("crc.zig"); | ||
pub const Crc32 = crc.Crc32; | ||
|
||
const fnv = @import("fnv.zig"); | ||
pub const Fnv1a_32 = fnv.Fnv1a_32; | ||
pub const Fnv1a_64 = fnv.Fnv1a_64; | ||
pub const Fnv1a_128 = fnv.Fnv1a_128; | ||
|
||
const siphash = @import("siphash.zig"); | ||
pub const SipHash64 = siphash.SipHash64; | ||
pub const SipHash128 = siphash.SipHash128; | ||
|
||
test "hash" { | ||
_ = @import("adler.zig"); | ||
_ = @import("crc.zig"); | ||
_ = @import("fnv.zig"); | ||
_ = @import("siphash.zig"); | ||
} |
Oops, something went wrong.