Skip to content

Commit

Permalink
Merge pull request #900 from zig-lang/hash-and-checksums
Browse files Browse the repository at this point in the history
Add common hash/checksum functions
  • Loading branch information
andrewrk committed Apr 6, 2018
2 parents 873641c + c34ce2c commit 7186e92
Show file tree
Hide file tree
Showing 7 changed files with 701 additions and 0 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Expand Up @@ -438,6 +438,11 @@ set(ZIG_STD_FILES
"fmt/errol/lookup.zig"
"fmt/index.zig"
"hash_map.zig"
"hash/index.zig"
"hash/adler.zig"
"hash/crc.zig"
"hash/fnv.zig"
"hash/siphash.zig"
"heap.zig"
"index.zig"
"io.zig"
Expand Down
112 changes: 112 additions & 0 deletions std/hash/adler.zig
@@ -0,0 +1,112 @@
// Adler32 checksum.
//
// https://tools.ietf.org/html/rfc1950#section-9
// https://github.com/madler/zlib/blob/master/adler32.c

const std = @import("../index.zig");
const debug = std.debug;

pub const Adler32 = struct {
const base = 65521;
const nmax = 5552;

adler: u32,

pub fn init() Adler32 {
return Adler32 {
.adler = 1,
};
}

// This fast variant is taken from zlib. It reduces the required modulos and unrolls longer
// buffer inputs and should be much quicker.
pub fn update(self: &Adler32, input: []const u8) void {
var s1 = self.adler & 0xffff;
var s2 = (self.adler >> 16) & 0xffff;

if (input.len == 1) {
s1 +%= input[0];
if (s1 >= base) {
s1 -= base;
}
s2 +%= s1;
if (s2 >= base) {
s2 -= base;
}
}
else if (input.len < 16) {
for (input) |b| {
s1 +%= b;
s2 +%= s1;
}
if (s1 >= base) {
s1 -= base;
}

s2 %= base;
}
else {
var i: usize = 0;
while (i + nmax <= input.len) : (i += nmax) {
const n = nmax / 16; // note: 16 | nmax

var rounds: usize = 0;
while (rounds < n) : (rounds += 1) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + n * j];
s2 +%= s1;
}
}
}

if (i < input.len) {
while (i + 16 <= input.len) : (i += 16) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
}
while (i < input.len) : (i += 1) {
s1 +%= input[i];
s2 +%= s1;
}

s1 %= base;
s2 %= base;
}
}

self.adler = s1 | (s2 << 16);
}

pub fn final(self: &Adler32) u32 {
return self.adler;
}

pub fn hash(input: []const u8) u32 {
var c = Adler32.init();
c.update(input);
return c.final();
}
};

test "adler32 sanity" {
debug.assert(Adler32.hash("a") == 0x620062);
debug.assert(Adler32.hash("example") == 0xbc002ed);
}

test "adler32 long" {
const long1 = []u8 {1} ** 1024;
debug.assert(Adler32.hash(long1[0..]) == 0x06780401);

const long2 = []u8 {1} ** 1025;
debug.assert(Adler32.hash(long2[0..]) == 0x0a7a0402);
}

test "adler32 very long" {
const long = []u8 {1} ** 5553;
debug.assert(Adler32.hash(long[0..]) == 0x707f15b2);
}

180 changes: 180 additions & 0 deletions std/hash/crc.zig
@@ -0,0 +1,180 @@
// There are two implementations of CRC32 implemented with the following key characteristics:
//
// - Crc32WithPoly uses 8Kb of tables but is ~10x faster than the small method.
//
// - Crc32SmallWithPoly uses only 64 bytes of memory but is slower. Be aware that this is
// still moderately fast just slow relative to the slicing approach.

const std = @import("../index.zig");
const debug = std.debug;

pub const Polynomial = struct {
const IEEE = 0xedb88320;
const Castagnoli = 0x82f63b78;
const Koopman = 0xeb31d82e;
};

// IEEE is by far the most common CRC and so is aliased by default.
pub const Crc32 = Crc32WithPoly(Polynomial.IEEE);

// slicing-by-8 crc32 implementation.
pub fn Crc32WithPoly(comptime poly: u32) type {
return struct {
const Self = this;
const lookup_tables = comptime block: {
@setEvalBranchQuota(20000);
var tables: [8][256]u32 = undefined;

for (tables[0]) |*e, i| {
var crc = u32(i);
var j: usize = 0; while (j < 8) : (j += 1) {
if (crc & 1 == 1) {
crc = (crc >> 1) ^ poly;
} else {
crc = (crc >> 1);
}
}
*e = crc;
}

var i: usize = 0;
while (i < 256) : (i += 1) {
var crc = tables[0][i];
var j: usize = 1; while (j < 8) : (j += 1) {
const index = @truncate(u8, crc);
crc = tables[0][index] ^ (crc >> 8);
tables[j][i] = crc;
}
}

break :block tables;
};

crc: u32,

pub fn init() Self {
return Self {
.crc = 0xffffffff,
};
}

pub fn update(self: &Self, input: []const u8) void {
var i: usize = 0;
while (i + 8 <= input.len) : (i += 8) {
const p = input[i..i+8];

// Unrolling this way gives ~50Mb/s increase
self.crc ^= (u32(p[0]) << 0);
self.crc ^= (u32(p[1]) << 8);
self.crc ^= (u32(p[2]) << 16);
self.crc ^= (u32(p[3]) << 24);

self.crc =
lookup_tables[0][p[7]] ^
lookup_tables[1][p[6]] ^
lookup_tables[2][p[5]] ^
lookup_tables[3][p[4]] ^
lookup_tables[4][@truncate(u8, self.crc >> 24)] ^
lookup_tables[5][@truncate(u8, self.crc >> 16)] ^
lookup_tables[6][@truncate(u8, self.crc >> 8)] ^
lookup_tables[7][@truncate(u8, self.crc >> 0)];
}

while (i < input.len) : (i += 1) {
const index = @truncate(u8, self.crc) ^ input[i];
self.crc = (self.crc >> 8) ^ lookup_tables[0][index];
}
}

pub fn final(self: &Self) u32 {
return ~self.crc;
}

pub fn hash(input: []const u8) u32 {
var c = Self.init();
c.update(input);
return c.final();
}
};
}

test "crc32 ieee" {
const Crc32Ieee = Crc32WithPoly(Polynomial.IEEE);

debug.assert(Crc32Ieee.hash("") == 0x00000000);
debug.assert(Crc32Ieee.hash("a") == 0xe8b7be43);
debug.assert(Crc32Ieee.hash("abc") == 0x352441c2);
}

test "crc32 castagnoli" {
const Crc32Castagnoli = Crc32WithPoly(Polynomial.Castagnoli);

debug.assert(Crc32Castagnoli.hash("") == 0x00000000);
debug.assert(Crc32Castagnoli.hash("a") == 0xc1d04330);
debug.assert(Crc32Castagnoli.hash("abc") == 0x364b3fb7);
}

// half-byte lookup table implementation.
pub fn Crc32SmallWithPoly(comptime poly: u32) type {
return struct {
const Self = this;
const lookup_table = comptime block: {
var table: [16]u32 = undefined;

for (table) |*e, i| {
var crc = u32(i * 16);
var j: usize = 0; while (j < 8) : (j += 1) {
if (crc & 1 == 1) {
crc = (crc >> 1) ^ poly;
} else {
crc = (crc >> 1);
}
}
*e = crc;
}

break :block table;
};

crc: u32,

pub fn init() Self {
return Self {
.crc = 0xffffffff,
};
}

pub fn update(self: &Self, input: []const u8) void {
for (input) |b| {
self.crc = lookup_table[@truncate(u4, self.crc ^ (b >> 0))] ^ (self.crc >> 4);
self.crc = lookup_table[@truncate(u4, self.crc ^ (b >> 4))] ^ (self.crc >> 4);
}
}

pub fn final(self: &Self) u32 {
return ~self.crc;
}

pub fn hash(input: []const u8) u32 {
var c = Self.init();
c.update(input);
return c.final();
}
};
}

test "small crc32 ieee" {
const Crc32Ieee = Crc32SmallWithPoly(Polynomial.IEEE);

debug.assert(Crc32Ieee.hash("") == 0x00000000);
debug.assert(Crc32Ieee.hash("a") == 0xe8b7be43);
debug.assert(Crc32Ieee.hash("abc") == 0x352441c2);
}

test "small crc32 castagnoli" {
const Crc32Castagnoli = Crc32SmallWithPoly(Polynomial.Castagnoli);

debug.assert(Crc32Castagnoli.hash("") == 0x00000000);
debug.assert(Crc32Castagnoli.hash("a") == 0xc1d04330);
debug.assert(Crc32Castagnoli.hash("abc") == 0x364b3fb7);
}
60 changes: 60 additions & 0 deletions std/hash/fnv.zig
@@ -0,0 +1,60 @@
// FNV1a - Fowler-Noll-Vo hash function
//
// FNV1a is a fast, non-cryptographic hash function with fairly good distribution properties.
//
// https://tools.ietf.org/html/draft-eastlake-fnv-14

const std = @import("../index.zig");
const debug = std.debug;

pub const Fnv1a_32 = Fnv1a(u32, 0x01000193 , 0x811c9dc5);
pub const Fnv1a_64 = Fnv1a(u64, 0x100000001b3, 0xcbf29ce484222325);
pub const Fnv1a_128 = Fnv1a(u128, 0x1000000000000000000013b, 0x6c62272e07bb014262b821756295c58d);

fn Fnv1a(comptime T: type, comptime prime: T, comptime offset: T) type {
return struct {
const Self = this;

value: T,

pub fn init() Self {
return Self {
.value = offset,
};
}

pub fn update(self: &Self, input: []const u8) void {
for (input) |b| {
self.value ^= b;
self.value *%= prime;
}
}

pub fn final(self: &Self) T {
return self.value;
}

pub fn hash(input: []const u8) T {
var c = Self.init();
c.update(input);
return c.final();
}
};
}

test "fnv1a-32" {
debug.assert(Fnv1a_32.hash("") == 0x811c9dc5);
debug.assert(Fnv1a_32.hash("a") == 0xe40c292c);
debug.assert(Fnv1a_32.hash("foobar") == 0xbf9cf968);
}

test "fnv1a-64" {
debug.assert(Fnv1a_64.hash("") == 0xcbf29ce484222325);
debug.assert(Fnv1a_64.hash("a") == 0xaf63dc4c8601ec8c);
debug.assert(Fnv1a_64.hash("foobar") == 0x85944171f73967e8);
}

test "fnv1a-128" {
debug.assert(Fnv1a_128.hash("") == 0x6c62272e07bb014262b821756295c58d);
debug.assert(Fnv1a_128.hash("a") == 0xd228cb696f1a8caf78912b704e4a8964);
}
22 changes: 22 additions & 0 deletions std/hash/index.zig
@@ -0,0 +1,22 @@
const adler = @import("adler.zig");
pub const Adler32 = adler.Adler32;

// pub for polynomials + generic crc32 construction
pub const crc = @import("crc.zig");
pub const Crc32 = crc.Crc32;

const fnv = @import("fnv.zig");
pub const Fnv1a_32 = fnv.Fnv1a_32;
pub const Fnv1a_64 = fnv.Fnv1a_64;
pub const Fnv1a_128 = fnv.Fnv1a_128;

const siphash = @import("siphash.zig");
pub const SipHash64 = siphash.SipHash64;
pub const SipHash128 = siphash.SipHash128;

test "hash" {
_ = @import("adler.zig");
_ = @import("crc.zig");
_ = @import("fnv.zig");
_ = @import("siphash.zig");
}

0 comments on commit 7186e92

Please sign in to comment.