Skip to content

Commit

Permalink
Unroll Sha3 inner loop
Browse files Browse the repository at this point in the history
Issue #699 since fixed. Nearly a x3 perf improvement.

Using --release-fast.

Sha3_256 (before): 96 Mb/s
Sha3_256  (after): 267 Mb/s

Sha3_512 (before): 53 Mb/s
Sha3_512  (after): 142 Mb/s

No real gains from unrolling other initialization loops in crypto
functions so have been left as is.
  • Loading branch information
tiehuis committed Mar 9, 2018
1 parent 5a7a0e8 commit 7a89369
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 14 deletions.
1 change: 0 additions & 1 deletion std/crypto/md5.zig
Expand Up @@ -108,7 +108,6 @@ pub const Md5 = struct {

var s: [16]u32 = undefined;

// ERROR: cannot unroll this at comptime
var i: usize = 0;
while (i < 16) : (i += 1) {
// NOTE: Performing or's separately improves perf by ~10%
Expand Down
2 changes: 0 additions & 2 deletions std/crypto/sha2.zig
Expand Up @@ -156,7 +156,6 @@ fn Sha2_32(comptime params: Sha2Params32) type { return struct {

var s: [64]u32 = undefined;

// ERROR: Cannot unroll at compile-time.
var i: usize = 0;
while (i < 16) : (i += 1) {
s[i] = 0;
Expand Down Expand Up @@ -472,7 +471,6 @@ fn Sha2_64(comptime params: Sha2Params64) type { return struct {

var s: [80]u64 = undefined;

// ERROR: Cannot unroll at compile-time.
var i: usize = 0;
while (i < 16) : (i += 1) {
s[i] = 0;
Expand Down
21 changes: 10 additions & 11 deletions std/crypto/sha3.zig
Expand Up @@ -123,35 +123,34 @@ fn keccak_f(comptime F: usize, d: []u8) void {
*r = mem.readIntLE(u64, d[8*i .. 8*i + 8]);
}

var x: usize = 0;
var y: usize = 0;
// TODO: Cannot unroll all loops here due to comptime differences.
inline for (RC[0..no_rounds]) |round| {
comptime var x: usize = 0;
comptime var y: usize = 0;
for (RC[0..no_rounds]) |round| {
// theta
x = 0; while (x < 5) : (x += 1) {
x = 0; inline while (x < 5) : (x += 1) {
c[x] = s[x] ^ s[x+5] ^ s[x+10] ^ s[x+15] ^ s[x+20];
}
x = 0; while (x < 5) : (x += 1) {
x = 0; inline while (x < 5) : (x += 1) {
t[0] = c[M5[x+4]] ^ math.rotl(u64, c[M5[x+1]], usize(1));
y = 0; while (y < 5) : (y += 1) {
y = 0; inline while (y < 5) : (y += 1) {
s[x + y*5] ^= t[0];
}
}

// rho+pi
t[0] = s[1];
x = 0; while (x < 24) : (x += 1) {
x = 0; inline while (x < 24) : (x += 1) {
c[0] = s[PIL[x]];
s[PIL[x]] = math.rotl(u64, t[0], ROTC[x]);
t[0] = c[0];
}

// chi
y = 0; while (y < 5) : (y += 1) {
x = 0; while (x < 5) : (x += 1) {
y = 0; inline while (y < 5) : (y += 1) {
x = 0; inline while (x < 5) : (x += 1) {
c[x] = s[x + y*5];
}
x = 0; while (x < 5) : (x += 1) {
x = 0; inline while (x < 5) : (x += 1) {
s[x + y*5] = c[x] ^ (~c[M5[x+1]] & c[M5[x+2]]);
}
}
Expand Down

1 comment on commit 7a89369

@andrewrk
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow!

Please sign in to comment.