Merge pull request #513 from fkautz/pr_out_adding_software_fallback

This commit is contained in:
Frederick F. Kautz IV 2015-04-27 14:41:41 -07:00
commit fc2537eef9
4 changed files with 270 additions and 2 deletions

View File

@ -65,6 +65,8 @@ func block(dig *digest, p []byte) {
blockAVX(dig, p) blockAVX(dig, p)
case cpu.HasSSE41() == true: case cpu.HasSSE41() == true:
blockSSE(dig, p) blockSSE(dig, p)
default:
blockSoftware(dig, p)
} }
} }

View File

@ -14,7 +14,13 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
//
// Software block transform are provided by The Go Authors:
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file of
// Golang project:
// https://github.com/golang/go/blob/master/LICENSE
package sha256 package sha256
@ -37,3 +43,120 @@ func blockAVX(dig *digest, p []byte) {
func blockAVX2(dig *digest, p []byte) { func blockAVX2(dig *digest, p []byte) {
C.sha256_transform_rorx((*C.char)(unsafe.Pointer(&p[0])), (*C.uint32_t)(unsafe.Pointer(&dig.h[0])), (C.ulong)(len(p)/64)) C.sha256_transform_rorx((*C.char)(unsafe.Pointer(&p[0])), (*C.uint32_t)(unsafe.Pointer(&dig.h[0])), (C.ulong)(len(p)/64))
} }
func blockSoftware(dig *digest, p []byte) {
var w [64]uint32
h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]
for len(p) >= chunk {
// Can interlace the computation of w with the
// rounds below if needed for speed.
for i := 0; i < 16; i++ {
j := i * 4
w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3])
}
for i := 16; i < 64; i++ {
v1 := w[i-2]
t1 := (v1>>17 | v1<<(32-17)) ^ (v1>>19 | v1<<(32-19)) ^ (v1 >> 10)
v2 := w[i-15]
t2 := (v2>>7 | v2<<(32-7)) ^ (v2>>18 | v2<<(32-18)) ^ (v2 >> 3)
w[i] = t1 + w[i-7] + t2 + w[i-16]
}
a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7
for i := 0; i < 64; i++ {
t1 := h + ((e>>6 | e<<(32-6)) ^ (e>>11 | e<<(32-11)) ^ (e>>25 | e<<(32-25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i]
t2 := ((a>>2 | a<<(32-2)) ^ (a>>13 | a<<(32-13)) ^ (a>>22 | a<<(32-22))) + ((a & b) ^ (a & c) ^ (b & c))
h = g
g = f
f = e
e = d + t1
d = c
c = b
b = a
a = t1 + t2
}
h0 += a
h1 += b
h2 += c
h3 += d
h4 += e
h5 += f
h6 += g
h7 += h
p = p[chunk:]
}
dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
}
var _K = []uint32{
0x428a2f98,
0x71374491,
0xb5c0fbcf,
0xe9b5dba5,
0x3956c25b,
0x59f111f1,
0x923f82a4,
0xab1c5ed5,
0xd807aa98,
0x12835b01,
0x243185be,
0x550c7dc3,
0x72be5d74,
0x80deb1fe,
0x9bdc06a7,
0xc19bf174,
0xe49b69c1,
0xefbe4786,
0x0fc19dc6,
0x240ca1cc,
0x2de92c6f,
0x4a7484aa,
0x5cb0a9dc,
0x76f988da,
0x983e5152,
0xa831c66d,
0xb00327c8,
0xbf597fc7,
0xc6e00bf3,
0xd5a79147,
0x06ca6351,
0x14292967,
0x27b70a85,
0x2e1b2138,
0x4d2c6dfc,
0x53380d13,
0x650a7354,
0x766a0abb,
0x81c2c92e,
0x92722c85,
0xa2bfe8a1,
0xa81a664b,
0xc24b8b70,
0xc76c51a3,
0xd192e819,
0xd6990624,
0xf40e3585,
0x106aa070,
0x19a4c116,
0x1e376c08,
0x2748774c,
0x34b0bcb5,
0x391c0cb3,
0x4ed8aa4a,
0x5b9cca4f,
0x682e6ff3,
0x748f82ee,
0x78a5636f,
0x84c87814,
0x8cc70208,
0x90befffa,
0xa4506ceb,
0xbef9a3f7,
0xc67178f2,
}

View File

@ -52,6 +52,8 @@ func block(dig *digest, p []byte) {
blockAVX(dig, p) blockAVX(dig, p)
case cpu.HasSSE41() == true: case cpu.HasSSE41() == true:
blockSSE(dig, p) blockSSE(dig, p)
default:
blockSoftware(dig, p)
} }
} }

View File

@ -15,6 +15,12 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
// Software block transform are provided by The Go Authors:
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file of
// Golang project:
// https://github.com/golang/go/blob/master/LICENSE
package sha512 package sha512
@ -24,7 +30,9 @@ package sha512
// void sha512_transform_avx (const void* M, void* D, uint64_t L); // void sha512_transform_avx (const void* M, void* D, uint64_t L);
// void sha512_transform_rorx (const void* M, void* D, uint64_t L); // void sha512_transform_rorx (const void* M, void* D, uint64_t L);
import "C" import "C"
import "unsafe" import (
"unsafe"
)
func blockSSE(dig *digest, p []byte) { func blockSSE(dig *digest, p []byte) {
C.sha512_transform_ssse3(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk)) C.sha512_transform_ssse3(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk))
@ -37,3 +45,136 @@ func blockAVX(dig *digest, p []byte) {
func blockAVX2(dig *digest, p []byte) { func blockAVX2(dig *digest, p []byte) {
C.sha512_transform_rorx(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk)) C.sha512_transform_rorx(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk))
} }
func blockSoftware(dig *digest, p []byte) {
var w [80]uint64
h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]
for len(p) >= chunk {
for i := 0; i < 16; i++ {
j := i * 8
w[i] = uint64(p[j])<<56 | uint64(p[j+1])<<48 | uint64(p[j+2])<<40 | uint64(p[j+3])<<32 |
uint64(p[j+4])<<24 | uint64(p[j+5])<<16 | uint64(p[j+6])<<8 | uint64(p[j+7])
}
for i := 16; i < 80; i++ {
v1 := w[i-2]
t1 := (v1>>19 | v1<<(64-19)) ^ (v1>>61 | v1<<(64-61)) ^ (v1 >> 6)
v2 := w[i-15]
t2 := (v2>>1 | v2<<(64-1)) ^ (v2>>8 | v2<<(64-8)) ^ (v2 >> 7)
w[i] = t1 + w[i-7] + t2 + w[i-16]
}
a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7
for i := 0; i < 80; i++ {
t1 := h + ((e>>14 | e<<(64-14)) ^ (e>>18 | e<<(64-18)) ^ (e>>41 | e<<(64-41))) + ((e & f) ^ (^e & g)) + _K[i] + w[i]
t2 := ((a>>28 | a<<(64-28)) ^ (a>>34 | a<<(64-34)) ^ (a>>39 | a<<(64-39))) + ((a & b) ^ (a & c) ^ (b & c))
h = g
g = f
f = e
e = d + t1
d = c
c = b
b = a
a = t1 + t2
}
h0 += a
h1 += b
h2 += c
h3 += d
h4 += e
h5 += f
h6 += g
h7 += h
p = p[chunk:]
}
dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
}
var _K = []uint64{
0x428a2f98d728ae22,
0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f,
0xe9b5dba58189dbbc,
0x3956c25bf348b538,
0x59f111f1b605d019,
0x923f82a4af194f9b,
0xab1c5ed5da6d8118,
0xd807aa98a3030242,
0x12835b0145706fbe,
0x243185be4ee4b28c,
0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f,
0x80deb1fe3b1696b1,
0x9bdc06a725c71235,
0xc19bf174cf692694,
0xe49b69c19ef14ad2,
0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5,
0x240ca1cc77ac9c65,
0x2de92c6f592b0275,
0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4,
0x76f988da831153b5,
0x983e5152ee66dfab,
0xa831c66d2db43210,
0xb00327c898fb213f,
0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2,
0xd5a79147930aa725,
0x06ca6351e003826f,
0x142929670a0e6e70,
0x27b70a8546d22ffc,
0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed,
0x53380d139d95b3df,
0x650a73548baf63de,
0x766a0abb3c77b2a8,
0x81c2c92e47edaee6,
0x92722c851482353b,
0xa2bfe8a14cf10364,
0xa81a664bbc423001,
0xc24b8b70d0f89791,
0xc76c51a30654be30,
0xd192e819d6ef5218,
0xd69906245565a910,
0xf40e35855771202a,
0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8,
0x1e376c085141ab53,
0x2748774cdf8eeb99,
0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63,
0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373,
0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc,
0x78a5636f43172f60,
0x84c87814a1f0ab72,
0x8cc702081a6439ec,
0x90befffa23631e28,
0xa4506cebde82bde9,
0xbef9a3f7b2c67915,
0xc67178f2e372532b,
0xca273eceea26619c,
0xd186b8c721c0c207,
0xeada7dd6cde0eb1e,
0xf57d4f7fee6ed178,
0x06f067aa72176fba,
0x0a637dc5a2c898a6,
0x113f9804bef90dae,
0x1b710b35131c471b,
0x28db77f523047d84,
0x32caab7b40c72493,
0x3c9ebe0a15c9bebc,
0x431d67c49c100d4c,
0x4cc5d4becb3e42b6,
0x597f299cfc657e2a,
0x5fcb6fab3ad6faec,
0x6c44198c4a475817,
}