From 289c8596754031866dd036c8c38743551cb968de Mon Sep 17 00:00:00 2001 From: "Frederick F. Kautz IV" Date: Mon, 27 Apr 2015 13:48:56 -0700 Subject: [PATCH] Adding software fallback --- pkg/utils/crypto/sha256/sha256_linux.go | 2 + pkg/utils/crypto/sha256/sha256block_linux.go | 125 +++++++++++++++- pkg/utils/crypto/sha512/sha512_linux.go | 2 + pkg/utils/crypto/sha512/sha512block_linux.go | 143 ++++++++++++++++++- 4 files changed, 270 insertions(+), 2 deletions(-) diff --git a/pkg/utils/crypto/sha256/sha256_linux.go b/pkg/utils/crypto/sha256/sha256_linux.go index fd7cc3766..8a08df6ff 100644 --- a/pkg/utils/crypto/sha256/sha256_linux.go +++ b/pkg/utils/crypto/sha256/sha256_linux.go @@ -65,6 +65,8 @@ func block(dig *digest, p []byte) { blockAVX(dig, p) case cpu.HasSSE41() == true: blockSSE(dig, p) + default: + blockSoftware(dig, p) } } diff --git a/pkg/utils/crypto/sha256/sha256block_linux.go b/pkg/utils/crypto/sha256/sha256block_linux.go index 2ec326d62..027677f06 100644 --- a/pkg/utils/crypto/sha256/sha256block_linux.go +++ b/pkg/utils/crypto/sha256/sha256block_linux.go @@ -14,7 +14,13 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// + +// Software block transform are provided by The Go Authors: +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file of +// Golang project: +// https://github.com/golang/go/blob/master/LICENSE package sha256 @@ -37,3 +43,120 @@ func blockAVX(dig *digest, p []byte) { func blockAVX2(dig *digest, p []byte) { C.sha256_transform_rorx((*C.char)(unsafe.Pointer(&p[0])), (*C.uint32_t)(unsafe.Pointer(&dig.h[0])), (C.ulong)(len(p)/64)) } + +func blockSoftware(dig *digest, p []byte) { + var w [64]uint32 + h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] + for len(p) >= chunk { + // Can interlace the computation of w with the + // rounds below if needed for speed. + for i := 0; i < 16; i++ { + j := i * 4 + w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) + } + for i := 16; i < 64; i++ { + v1 := w[i-2] + t1 := (v1>>17 | v1<<(32-17)) ^ (v1>>19 | v1<<(32-19)) ^ (v1 >> 10) + v2 := w[i-15] + t2 := (v2>>7 | v2<<(32-7)) ^ (v2>>18 | v2<<(32-18)) ^ (v2 >> 3) + w[i] = t1 + w[i-7] + t2 + w[i-16] + } + + a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7 + + for i := 0; i < 64; i++ { + t1 := h + ((e>>6 | e<<(32-6)) ^ (e>>11 | e<<(32-11)) ^ (e>>25 | e<<(32-25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i] + + t2 := ((a>>2 | a<<(32-2)) ^ (a>>13 | a<<(32-13)) ^ (a>>22 | a<<(32-22))) + ((a & b) ^ (a & c) ^ (b & c)) + + h = g + g = f + f = e + e = d + t1 + d = c + c = b + b = a + a = t1 + t2 + } + + h0 += a + h1 += b + h2 += c + h3 += d + h4 += e + h5 += f + h6 += g + h7 += h + + p = p[chunk:] + } + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7 +} + +var _K = []uint32{ + 0x428a2f98, + 0x71374491, + 0xb5c0fbcf, + 0xe9b5dba5, + 0x3956c25b, + 0x59f111f1, + 0x923f82a4, + 0xab1c5ed5, + 0xd807aa98, + 0x12835b01, + 0x243185be, + 0x550c7dc3, + 0x72be5d74, + 0x80deb1fe, + 0x9bdc06a7, + 0xc19bf174, + 0xe49b69c1, + 0xefbe4786, + 0x0fc19dc6, + 0x240ca1cc, + 0x2de92c6f, + 0x4a7484aa, + 0x5cb0a9dc, + 0x76f988da, + 0x983e5152, + 0xa831c66d, + 0xb00327c8, + 0xbf597fc7, + 0xc6e00bf3, + 0xd5a79147, + 0x06ca6351, + 0x14292967, + 0x27b70a85, + 0x2e1b2138, + 0x4d2c6dfc, + 0x53380d13, + 0x650a7354, + 0x766a0abb, + 0x81c2c92e, + 0x92722c85, + 0xa2bfe8a1, + 0xa81a664b, + 0xc24b8b70, + 0xc76c51a3, + 0xd192e819, + 0xd6990624, + 0xf40e3585, + 0x106aa070, + 0x19a4c116, + 0x1e376c08, + 0x2748774c, + 0x34b0bcb5, + 0x391c0cb3, + 0x4ed8aa4a, + 0x5b9cca4f, + 0x682e6ff3, + 0x748f82ee, + 0x78a5636f, + 0x84c87814, + 0x8cc70208, + 0x90befffa, + 0xa4506ceb, + 0xbef9a3f7, + 0xc67178f2, +} diff --git a/pkg/utils/crypto/sha512/sha512_linux.go b/pkg/utils/crypto/sha512/sha512_linux.go index ef4cb2520..83ff22068 100644 --- a/pkg/utils/crypto/sha512/sha512_linux.go +++ b/pkg/utils/crypto/sha512/sha512_linux.go @@ -52,6 +52,8 @@ func block(dig *digest, p []byte) { blockAVX(dig, p) case cpu.HasSSE41() == true: blockSSE(dig, p) + default: + blockSoftware(dig, p) } } diff --git a/pkg/utils/crypto/sha512/sha512block_linux.go b/pkg/utils/crypto/sha512/sha512block_linux.go index 85333557b..a7825a0c2 100644 --- a/pkg/utils/crypto/sha512/sha512block_linux.go +++ b/pkg/utils/crypto/sha512/sha512block_linux.go @@ -15,6 +15,12 @@ // See the License for the specific language governing permissions and // limitations under the License. // +// Software block transform are provided by The Go Authors: +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file of +// Golang project: +// https://github.com/golang/go/blob/master/LICENSE package sha512 @@ -24,7 +30,9 @@ package sha512 // void sha512_transform_avx (const void* M, void* D, uint64_t L); // void sha512_transform_rorx (const void* M, void* D, uint64_t L); import "C" -import "unsafe" +import ( + "unsafe" +) func blockSSE(dig *digest, p []byte) { C.sha512_transform_ssse3(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk)) @@ -37,3 +45,136 @@ func blockAVX(dig *digest, p []byte) { func blockAVX2(dig *digest, p []byte) { C.sha512_transform_rorx(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk)) } + +func blockSoftware(dig *digest, p []byte) { + var w [80]uint64 + h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] + for len(p) >= chunk { + for i := 0; i < 16; i++ { + j := i * 8 + w[i] = uint64(p[j])<<56 | uint64(p[j+1])<<48 | uint64(p[j+2])<<40 | uint64(p[j+3])<<32 | + uint64(p[j+4])<<24 | uint64(p[j+5])<<16 | uint64(p[j+6])<<8 | uint64(p[j+7]) + } + for i := 16; i < 80; i++ { + v1 := w[i-2] + t1 := (v1>>19 | v1<<(64-19)) ^ (v1>>61 | v1<<(64-61)) ^ (v1 >> 6) + v2 := w[i-15] + t2 := (v2>>1 | v2<<(64-1)) ^ (v2>>8 | v2<<(64-8)) ^ (v2 >> 7) + + w[i] = t1 + w[i-7] + t2 + w[i-16] + } + + a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7 + + for i := 0; i < 80; i++ { + t1 := h + ((e>>14 | e<<(64-14)) ^ (e>>18 | e<<(64-18)) ^ (e>>41 | e<<(64-41))) + ((e & f) ^ (^e & g)) + _K[i] + w[i] + + t2 := ((a>>28 | a<<(64-28)) ^ (a>>34 | a<<(64-34)) ^ (a>>39 | a<<(64-39))) + ((a & b) ^ (a & c) ^ (b & c)) + + h = g + g = f + f = e + e = d + t1 + d = c + c = b + b = a + a = t1 + t2 + } + + h0 += a + h1 += b + h2 += c + h3 += d + h4 += e + h5 += f + h6 += g + h7 += h + + p = p[chunk:] + } + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7 +} + +var _K = []uint64{ + 0x428a2f98d728ae22, + 0x7137449123ef65cd, + 0xb5c0fbcfec4d3b2f, + 0xe9b5dba58189dbbc, + 0x3956c25bf348b538, + 0x59f111f1b605d019, + 0x923f82a4af194f9b, + 0xab1c5ed5da6d8118, + 0xd807aa98a3030242, + 0x12835b0145706fbe, + 0x243185be4ee4b28c, + 0x550c7dc3d5ffb4e2, + 0x72be5d74f27b896f, + 0x80deb1fe3b1696b1, + 0x9bdc06a725c71235, + 0xc19bf174cf692694, + 0xe49b69c19ef14ad2, + 0xefbe4786384f25e3, + 0x0fc19dc68b8cd5b5, + 0x240ca1cc77ac9c65, + 0x2de92c6f592b0275, + 0x4a7484aa6ea6e483, + 0x5cb0a9dcbd41fbd4, + 0x76f988da831153b5, + 0x983e5152ee66dfab, + 0xa831c66d2db43210, + 0xb00327c898fb213f, + 0xbf597fc7beef0ee4, + 0xc6e00bf33da88fc2, + 0xd5a79147930aa725, + 0x06ca6351e003826f, + 0x142929670a0e6e70, + 0x27b70a8546d22ffc, + 0x2e1b21385c26c926, + 0x4d2c6dfc5ac42aed, + 0x53380d139d95b3df, + 0x650a73548baf63de, + 0x766a0abb3c77b2a8, + 0x81c2c92e47edaee6, + 0x92722c851482353b, + 0xa2bfe8a14cf10364, + 0xa81a664bbc423001, + 0xc24b8b70d0f89791, + 0xc76c51a30654be30, + 0xd192e819d6ef5218, + 0xd69906245565a910, + 0xf40e35855771202a, + 0x106aa07032bbd1b8, + 0x19a4c116b8d2d0c8, + 0x1e376c085141ab53, + 0x2748774cdf8eeb99, + 0x34b0bcb5e19b48a8, + 0x391c0cb3c5c95a63, + 0x4ed8aa4ae3418acb, + 0x5b9cca4f7763e373, + 0x682e6ff3d6b2b8a3, + 0x748f82ee5defb2fc, + 0x78a5636f43172f60, + 0x84c87814a1f0ab72, + 0x8cc702081a6439ec, + 0x90befffa23631e28, + 0xa4506cebde82bde9, + 0xbef9a3f7b2c67915, + 0xc67178f2e372532b, + 0xca273eceea26619c, + 0xd186b8c721c0c207, + 0xeada7dd6cde0eb1e, + 0xf57d4f7fee6ed178, + 0x06f067aa72176fba, + 0x0a637dc5a2c898a6, + 0x113f9804bef90dae, + 0x1b710b35131c471b, + 0x28db77f523047d84, + 0x32caab7b40c72493, + 0x3c9ebe0a15c9bebc, + 0x431d67c49c100d4c, + 0x4cc5d4becb3e42b6, + 0x597f299cfc657e2a, + 0x5fcb6fab3ad6faec, + 0x6c44198c4a475817, +}