From d41dcb784b4a40639525c27841b9250c123dcb56 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 24 Jan 2017 18:07:31 -0800 Subject: [PATCH] Move to blake2b-simd due to perf problems in golang.org/x/crypto Ref https://github.com/golang/go/issues/18563 --- cmd/erasure-utils.go | 6 +- .../github.com/minio/blake2b-simd/blake2b.go | 301 ++++ .../minio/blake2b-simd/compressAvx2_amd64.go | 47 + .../minio/blake2b-simd/compressAvx2_amd64.s | 671 ++++++++ .../minio/blake2b-simd/compressAvx_amd64.go | 41 + .../minio/blake2b-simd/compressAvx_amd64.s | 682 ++++++++ .../minio/blake2b-simd/compressSse_amd64.go | 41 + .../minio/blake2b-simd/compressSse_amd64.s | 770 +++++++++ .../minio/blake2b-simd/compress_amd64.go | 30 + .../minio/blake2b-simd/compress_generic.go | 1419 +++++++++++++++++ .../minio/blake2b-simd/compress_noasm.go | 23 + vendor/github.com/minio/blake2b-simd/cpuid.go | 60 + .../github.com/minio/blake2b-simd/cpuid_386.s | 33 + .../minio/blake2b-simd/cpuid_amd64.s | 34 + vendor/golang.org/x/crypto/blake2b/blake2b.go | 194 --- .../x/crypto/blake2b/blake2bAVX2_amd64.go | 43 - .../x/crypto/blake2b/blake2bAVX2_amd64.s | 502 ------ .../x/crypto/blake2b/blake2b_amd64.go | 25 - .../x/crypto/blake2b/blake2b_amd64.s | 290 ---- .../x/crypto/blake2b/blake2b_generic.go | 179 --- .../x/crypto/blake2b/blake2b_ref.go | 11 - .../x/crypto/blake2b/blake2b_test.go | 448 ------ vendor/vendor.json | 6 + 23 files changed, 4161 insertions(+), 1695 deletions(-) create mode 100644 vendor/github.com/minio/blake2b-simd/blake2b.go create mode 100644 vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.go create mode 100644 vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.s create mode 100644 vendor/github.com/minio/blake2b-simd/compressAvx_amd64.go create mode 100644 vendor/github.com/minio/blake2b-simd/compressAvx_amd64.s create mode 100644 vendor/github.com/minio/blake2b-simd/compressSse_amd64.go create mode 100644 vendor/github.com/minio/blake2b-simd/compressSse_amd64.s create mode 100644 vendor/github.com/minio/blake2b-simd/compress_amd64.go create mode 100644 vendor/github.com/minio/blake2b-simd/compress_generic.go create mode 100644 vendor/github.com/minio/blake2b-simd/compress_noasm.go create mode 100644 vendor/github.com/minio/blake2b-simd/cpuid.go create mode 100644 vendor/github.com/minio/blake2b-simd/cpuid_386.s create mode 100644 vendor/github.com/minio/blake2b-simd/cpuid_amd64.s delete mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b.go delete mode 100644 vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go delete mode 100644 vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s delete mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go delete mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s delete mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b_generic.go delete mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b_ref.go delete mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b_test.go diff --git a/cmd/erasure-utils.go b/cmd/erasure-utils.go index 814df1c47..d01ea731a 100644 --- a/cmd/erasure-utils.go +++ b/cmd/erasure-utils.go @@ -24,8 +24,8 @@ import ( "sync" "github.com/klauspost/reedsolomon" + "github.com/minio/blake2b-simd" "github.com/minio/sha256-simd" - "golang.org/x/crypto/blake2b" ) // newHashWriters - inititialize a slice of hashes for the disk count. @@ -48,14 +48,14 @@ func newHash(algo string) (h hash.Hash) { // ignore the error, because New512 without a key never fails // New512 only returns a non-nil error, if the length of the passed // key > 64 bytes - but we use blake2b as hash function (no key) - h, _ = blake2b.New512(nil) + h = blake2b.New512() // Add new hashes here. default: // Default to blake2b. // ignore the error, because New512 without a key never fails // New512 only returns a non-nil error, if the length of the passed // key > 64 bytes - but we use blake2b as hash function (no key) - h, _ = blake2b.New512(nil) + h = blake2b.New512() } return h } diff --git a/vendor/github.com/minio/blake2b-simd/blake2b.go b/vendor/github.com/minio/blake2b-simd/blake2b.go new file mode 100644 index 000000000..538466a1a --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/blake2b.go @@ -0,0 +1,301 @@ +// Written in 2012 by Dmitry Chestnykh. +// +// To the extent possible under law, the author have dedicated all copyright +// and related and neighboring rights to this software to the public domain +// worldwide. This software is distributed without any warranty. +// http://creativecommons.org/publicdomain/zero/1.0/ + +// Package blake2b implements BLAKE2b cryptographic hash function. +package blake2b + +import ( + "encoding/binary" + "errors" + "hash" +) + +const ( + BlockSize = 128 // block size of algorithm + Size = 64 // maximum digest size + SaltSize = 16 // maximum salt size + PersonSize = 16 // maximum personalization string size + KeySize = 64 // maximum size of key +) + +type digest struct { + h [8]uint64 // current chain value + t [2]uint64 // message bytes counter + f [2]uint64 // finalization flags + x [BlockSize]byte // buffer for data not yet compressed + nx int // number of bytes in buffer + + ih [8]uint64 // initial chain value (after config) + paddedKey [BlockSize]byte // copy of key, padded with zeros + isKeyed bool // indicates whether hash was keyed + size uint8 // digest size in bytes + isLastNode bool // indicates processing of the last node in tree hashing +} + +// Initialization values. +var iv = [8]uint64{ + 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, + 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, + 0x510e527fade682d1, 0x9b05688c2b3e6c1f, + 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, +} + +// Config is used to configure hash function parameters and keying. +// All parameters are optional. +type Config struct { + Size uint8 // digest size (if zero, default size of 64 bytes is used) + Key []byte // key for prefix-MAC + Salt []byte // salt (if < 16 bytes, padded with zeros) + Person []byte // personalization (if < 16 bytes, padded with zeros) + Tree *Tree // parameters for tree hashing +} + +// Tree represents parameters for tree hashing. +type Tree struct { + Fanout uint8 // fanout + MaxDepth uint8 // maximal depth + LeafSize uint32 // leaf maximal byte length (0 for unlimited) + NodeOffset uint64 // node offset (0 for first, leftmost or leaf) + NodeDepth uint8 // node depth (0 for leaves) + InnerHashSize uint8 // inner hash byte length + IsLastNode bool // indicates processing of the last node of layer +} + +var ( + defaultConfig = &Config{Size: Size} + config256 = &Config{Size: 32} +) + +func verifyConfig(c *Config) error { + if c.Size > Size { + return errors.New("digest size is too large") + } + if len(c.Key) > KeySize { + return errors.New("key is too large") + } + if len(c.Salt) > SaltSize { + // Smaller salt is okay: it will be padded with zeros. + return errors.New("salt is too large") + } + if len(c.Person) > PersonSize { + // Smaller personalization is okay: it will be padded with zeros. + return errors.New("personalization is too large") + } + if c.Tree != nil { + if c.Tree.Fanout == 1 { + return errors.New("fanout of 1 is not allowed in tree mode") + } + if c.Tree.MaxDepth < 2 { + return errors.New("incorrect tree depth") + } + if c.Tree.InnerHashSize < 1 || c.Tree.InnerHashSize > Size { + return errors.New("incorrect tree inner hash size") + } + } + return nil +} + +// New returns a new hash.Hash configured with the given Config. +// Config can be nil, in which case the default one is used, calculating 64-byte digest. +// Returns non-nil error if Config contains invalid parameters. +func New(c *Config) (hash.Hash, error) { + if c == nil { + c = defaultConfig + } else { + if c.Size == 0 { + // Set default size if it's zero. + c.Size = Size + } + if err := verifyConfig(c); err != nil { + return nil, err + } + } + d := new(digest) + d.initialize(c) + return d, nil +} + +// initialize initializes digest with the given +// config, which must be non-nil and verified. +func (d *digest) initialize(c *Config) { + // Create parameter block. + var p [BlockSize]byte + p[0] = c.Size + p[1] = uint8(len(c.Key)) + if c.Salt != nil { + copy(p[32:], c.Salt) + } + if c.Person != nil { + copy(p[48:], c.Person) + } + if c.Tree != nil { + p[2] = c.Tree.Fanout + p[3] = c.Tree.MaxDepth + binary.LittleEndian.PutUint32(p[4:], c.Tree.LeafSize) + binary.LittleEndian.PutUint64(p[8:], c.Tree.NodeOffset) + p[16] = c.Tree.NodeDepth + p[17] = c.Tree.InnerHashSize + } else { + p[2] = 1 + p[3] = 1 + } + + // Initialize. + d.size = c.Size + for i := 0; i < 8; i++ { + d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(p[i*8:]) + } + if c.Tree != nil && c.Tree.IsLastNode { + d.isLastNode = true + } + + // Process key. + if c.Key != nil { + copy(d.paddedKey[:], c.Key) + d.Write(d.paddedKey[:]) + d.isKeyed = true + } + // Save a copy of initialized state. + copy(d.ih[:], d.h[:]) +} + +// New512 returns a new hash.Hash computing the BLAKE2b 64-byte checksum. +func New512() hash.Hash { + d := new(digest) + d.initialize(defaultConfig) + return d +} + +// New256 returns a new hash.Hash computing the BLAKE2b 32-byte checksum. +func New256() hash.Hash { + d := new(digest) + d.initialize(config256) + return d +} + +// NewMAC returns a new hash.Hash computing BLAKE2b prefix- +// Message Authentication Code of the given size in bytes +// (up to 64) with the given key (up to 64 bytes in length). +func NewMAC(outBytes uint8, key []byte) hash.Hash { + d, err := New(&Config{Size: outBytes, Key: key}) + if err != nil { + panic(err.Error()) + } + return d +} + +// Reset resets the state of digest to the initial state +// after configuration and keying. +func (d *digest) Reset() { + copy(d.h[:], d.ih[:]) + d.t[0] = 0 + d.t[1] = 0 + d.f[0] = 0 + d.f[1] = 0 + d.nx = 0 + if d.isKeyed { + d.Write(d.paddedKey[:]) + } +} + +// Size returns the digest size in bytes. +func (d *digest) Size() int { return int(d.size) } + +// BlockSize returns the algorithm block size in bytes. +func (d *digest) BlockSize() int { return BlockSize } + +func (d *digest) Write(p []byte) (nn int, err error) { + nn = len(p) + left := BlockSize - d.nx + if len(p) > left { + // Process buffer. + copy(d.x[d.nx:], p[:left]) + p = p[left:] + compress(d, d.x[:]) + d.nx = 0 + } + // Process full blocks except for the last one. + if len(p) > BlockSize { + n := len(p) &^ (BlockSize - 1) + if n == len(p) { + n -= BlockSize + } + compress(d, p[:n]) + p = p[n:] + } + // Fill buffer. + d.nx += copy(d.x[d.nx:], p) + return +} + +// Sum returns the calculated checksum. +func (d *digest) Sum(in []byte) []byte { + // Make a copy of d so that caller can keep writing and summing. + d0 := *d + hash := d0.checkSum() + return append(in, hash[:d0.size]...) +} + +func (d *digest) checkSum() [Size]byte { + // Do not create unnecessary copies of the key. + if d.isKeyed { + for i := 0; i < len(d.paddedKey); i++ { + d.paddedKey[i] = 0 + } + } + + dec := BlockSize - uint64(d.nx) + if d.t[0] < dec { + d.t[1]-- + } + d.t[0] -= dec + + // Pad buffer with zeros. + for i := d.nx; i < len(d.x); i++ { + d.x[i] = 0 + } + // Set last block flag. + d.f[0] = 0xffffffffffffffff + if d.isLastNode { + d.f[1] = 0xffffffffffffffff + } + // Compress last block. + compress(d, d.x[:]) + + var out [Size]byte + j := 0 + for _, s := range d.h[:(d.size-1)/8+1] { + out[j+0] = byte(s >> 0) + out[j+1] = byte(s >> 8) + out[j+2] = byte(s >> 16) + out[j+3] = byte(s >> 24) + out[j+4] = byte(s >> 32) + out[j+5] = byte(s >> 40) + out[j+6] = byte(s >> 48) + out[j+7] = byte(s >> 56) + j += 8 + } + return out +} + +// Sum512 returns a 64-byte BLAKE2b hash of data. +func Sum512(data []byte) [64]byte { + var d digest + d.initialize(defaultConfig) + d.Write(data) + return d.checkSum() +} + +// Sum256 returns a 32-byte BLAKE2b hash of data. +func Sum256(data []byte) (out [32]byte) { + var d digest + d.initialize(config256) + d.Write(data) + sum := d.checkSum() + copy(out[:], sum[:32]) + return +} diff --git a/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.go b/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.go new file mode 100644 index 000000000..ec53599f8 --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.go @@ -0,0 +1,47 @@ +//+build !noasm +//+build !appengine + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package blake2b + +//go:noescape +func compressAVX2Loop(p []uint8, in, iv, t, f, shffle, out []uint64) + +func compressAVX2(d *digest, p []uint8) { + var ( + in [8]uint64 + out [8]uint64 + shffle [8]uint64 + ) + + // vector for PSHUFB instruction + shffle[0] = 0x0201000706050403 + shffle[1] = 0x0a09080f0e0d0c0b + shffle[2] = 0x0201000706050403 + shffle[3] = 0x0a09080f0e0d0c0b + shffle[4] = 0x0100070605040302 + shffle[5] = 0x09080f0e0d0c0b0a + shffle[6] = 0x0100070605040302 + shffle[7] = 0x09080f0e0d0c0b0a + + in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] + + compressAVX2Loop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:]) + + d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7] +} diff --git a/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.s b/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.s new file mode 100644 index 000000000..24df234b5 --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.s @@ -0,0 +1,671 @@ +//+build !noasm !appengine + +// +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// +// Based on AVX2 implementation from https://github.com/sneves/blake2-avx2/blob/master/blake2b-common.h +// +// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent +// +// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as: +// #define ROUND(r) \ +// LOAD_MSG_ ##r ##_1(b0, b1); \ +// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// LOAD_MSG_ ##r ##_2(b0, b1); \ +// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ +// LOAD_MSG_ ##r ##_3(b0, b1); \ +// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// LOAD_MSG_ ##r ##_4(b0, b1); \ +// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); +// +// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go +// +// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly) +// +// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and +// rounds 2 & 12 are identical) +// + +#define G1 \ + \ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); + BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc4 \ // VPADDQ YMM0,YMM0,YMM4 /* v0 += m[0], v1 += m[2], v2 += m[4], v3 += m[6] */ + BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc1 \ // VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */ + BYTE $0xc5; BYTE $0xe5; BYTE $0xef; BYTE $0xd8 \ // VPXOR YMM3,YMM3,YMM0 /* v12 ^= v0, v13 ^= v1, v14 ^= v2, v15 ^= v3 */ + BYTE $0xc5; BYTE $0xfd; BYTE $0x70; BYTE $0xdb; BYTE $0xb1 \ // VPSHUFD YMM3,YMM3,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = */ + BYTE $0xc5; BYTE $0xed; BYTE $0xd4; BYTE $0xd3 \ // VPADDQ YMM2,YMM2,YMM3 /* v8 += v12, v9 += v13, v10 += v14, v11 += v15 */ + BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xca \ // VPXOR YMM1,YMM1,YMM2 /* v4 ^= v8, v5 ^= v9, v6 ^= v10, v7 ^= v11 */ + BYTE $0xc4; BYTE $0xe2; BYTE $0x75; BYTE $0x00; BYTE $0xce // VPSHUFB YMM1,YMM1,YMM6 /* v4 = v4<<(64-24) | v4>>24, ..., ..., v7 = v7<<(64-24) | v7>>24 */ + +#define G2 \ + BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc5 \ // VPADDQ YMM0,YMM0,YMM5 /* v0 += m[1], v1 += m[3], v2 += m[5], v3 += m[7] */ + BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc1 \ // VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */ + BYTE $0xc5; BYTE $0xe5; BYTE $0xef; BYTE $0xd8 \ // VPXOR YMM3,YMM3,YMM0 /* v12 ^= v0, v13 ^= v1, v14 ^= v2, v15 ^= v3 */ + BYTE $0xc4; BYTE $0xe2; BYTE $0x65; BYTE $0x00; BYTE $0xdf \ // VPSHUFB YMM3,YMM3,YMM7 /* v12 = v12<<(64-16) | v12>>16, ..., ..., v15 = v15<<(64-16) | v15>>16 */ + BYTE $0xc5; BYTE $0xed; BYTE $0xd4; BYTE $0xd3 \ // VPADDQ YMM2,YMM2,YMM3 /* v8 += v12, v9 += v13, v10 += v14, v11 += v15 */ + BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xca \ // VPXOR YMM1,YMM1,YMM2 /* v4 ^= v8, v5 ^= v9, v6 ^= v10, v7 ^= v11 */ + BYTE $0xc5; BYTE $0x75; BYTE $0xd4; BYTE $0xf9 \ // VPADDQ YMM15,YMM1,YMM1 /* temp reg = reg*2 */ + BYTE $0xc5; BYTE $0xf5; BYTE $0x73; BYTE $0xd1; BYTE $0x3f \ // VPSRLQ YMM1,YMM1,0x3f /* reg = reg>>63 */ + BYTE $0xc4; BYTE $0xc1; BYTE $0x75; BYTE $0xef; BYTE $0xcf // VPXOR YMM1,YMM1,YMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */ + +#define DIAGONALIZE \ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb \ // VPERMQ YMM3, YMM3, 0x93 + BYTE $0x93 \ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2 \ // VPERMQ YMM2, YMM2, 0x4e + BYTE $0x4e \ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 \ // VPERMQ YMM1, YMM1, 0x39 + BYTE $0x39 \ + // DO NOT DELETE -- macro delimiter (previous line extended) + +#define UNDIAGONALIZE \ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb \ // VPERMQ YMM3, YMM3, 0x39 + BYTE $0x39 \ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2 \ // VPERMQ YMM2, YMM2, 0x4e + BYTE $0x4e \ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 \ // VPERMQ YMM1, YMM1, 0x93 + BYTE $0x93 \ + // DO NOT DELETE -- macro delimiter (previous line extended) + +#define LOAD_SHUFFLE \ + MOVQ shffle+120(FP), SI \ // SI: &shuffle + BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x36 \ // VMOVDQU YMM6, [rsi] + BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x7e; BYTE $0x20 // VMOVDQU YMM7, 32[rsi] + +// func compressAVX2Loop(compressSSE(p []uint8, in, iv, t, f, shffle, out []uint64) +TEXT ·compressAVX2Loop(SB), 7, $0 + + // REGISTER USE + // Y0 - Y3: v0 - v15 + // Y4 - Y5: m[0] - m[7] + // Y6 - Y7: shuffle value + // Y8 - Y9: temp registers + // Y10 -Y13: copy of full message + // Y15: temp register + + // Load digest + MOVQ in+24(FP), SI // SI: &in + BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x06 // VMOVDQU YMM0, [rsi] + BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x4e; BYTE $0x20 // VMOVDQU YMM1, 32[rsi] + + // Already store digest into &out (so we can reload it later generically) + MOVQ out+144(FP), SI // SI: &out + BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x06 // VMOVDQU [rsi], YMM0 + BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x4e; BYTE $0x20 // VMOVDQU 32[rsi], YMM1 + + // Initialize message pointer and loop counter + MOVQ message+0(FP), DX // DX: &p (message) + MOVQ message_len+8(FP), R8 // R8: len(message) + SHRQ $7, R8 // len(message) / 128 + CMPQ R8, $0 + JEQ complete + +loop: + // Increment counter + MOVQ t+72(FP), SI // SI: &t + MOVQ 0(SI), R9 // + ADDQ $128, R9 // /* d.t[0] += BlockSize */ + MOVQ R9, 0(SI) // + CMPQ R9, $128 // /* if d.t[0] < BlockSize { */ + JGE noincr // + MOVQ 8(SI), R9 // + ADDQ $1, R9 // /* d.t[1]++ */ + MOVQ R9, 8(SI) // +noincr: // /* } */ + + // Load initialization vector + MOVQ iv+48(FP), SI // SI: &iv + BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x16 // VMOVDQU YMM2, [rsi] + BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x5e; BYTE $0x20 // VMOVDQU YMM3, 32[rsi] + MOVQ t+72(FP), SI // SI: &t + BYTE $0xc4; BYTE $0x63; BYTE $0x3d; BYTE $0x38; BYTE $0x06 // VINSERTI128 YMM8, YMM8, [rsi], 0 /* Y8 = t[0]+t[1] */ + BYTE $0x00 + MOVQ t+96(FP), SI // SI: &f + BYTE $0xc4; BYTE $0x63; BYTE $0x3d; BYTE $0x38; BYTE $0x06 // VINSERTI128 YMM8, YMM8, [rsi], 1 /* Y8 = t[0]+t[1]+f[0]+f[1] */ + BYTE $0x01 + BYTE $0xc4; BYTE $0xc1; BYTE $0x65; BYTE $0xef; BYTE $0xd8 // VPXOR YMM3,YMM3,YMM8 /* Y3 = Y3 ^ Y8 */ + + BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x12 // VMOVDQU YMM10, [rdx] /* Y10 = m[0]+ m[1]+ m[2]+ m[3] */ + BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x5a; BYTE $0x20 // VMOVDQU YMM11, 32[rdx] /* Y11 = m[4]+ m[5]+ m[6]+ m[7] */ + BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x62; BYTE $0x40 // VMOVDQU YMM12, 64[rdx] /* Y12 = m[8]+ m[9]+m[10]+m[11] */ + BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x6a; BYTE $0x60 // VMOVDQU YMM13, 96[rdx] /* Y13 = m[12]+m[13]+m[14]+m[15] */ + + LOAD_SHUFFLE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6c; BYTE $0xe3 // VPUNPCKLQDQ YMM4, YMM10, YMM11 /* m[0], m[4], m[2], m[6] */ + BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6d; BYTE $0xeb // VPUNPCKHQDQ YMM5, YMM10, YMM11 /* m[1], m[5], m[3], m[7] */ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */ + BYTE $0xd8 + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */ + BYTE $0xd8 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6c; BYTE $0xe5 // VPUNPCKLQDQ YMM4, YMM12, YMM13 /* m[8], m[12], m[10], m[14] */ + BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6d; BYTE $0xed // VPUNPCKHQDQ YMM5, YMM12, YMM13 /* m[9], m[13], m[11], m[15] */ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */ + BYTE $0xd8 + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */ + BYTE $0xd8 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 2 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 /* m[4], ____, ____, m[14] */ + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x03 /* m[14], m[4], ____, ____ */ /* xxxx 0011 = 0x03 */ + BYTE $0x03 + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM12, YMM13 /* m[9], m[13], ____, ____ */ + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[9], m[13], ____, ____ */ /* 0010 0000 = 0x20 */ + BYTE $0x20 + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x02 /* m[10], m[8], ____, ____ */ /* xxxx 0010 = 0x02 */ + BYTE $0x02 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x30 /* ____, ____, m[15], ____ */ /* xx11 xxxx = 0x30 */ + BYTE $0x30 + BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM9, YMM11 /* ____, ____, m[15], m[6] */ + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */ + BYTE $0x30 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x01 /* m[1], m[0], ____, ____ */ /* xxxx 0001 = 0x01 */ + BYTE $0x01 + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 /* m[5], ____, ____, m[11] */ + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x03 /* m[11], m[5], ____, ____ */ /* xxxx 0011 = 0x03 */ + BYTE $0x03 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[1], m[0], m[11], m[5] */ /* 0010 0000 = 0x20 */ + BYTE $0x20 + + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM10, YMM13 /* ___, m[12], m[2], ____ */ + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x09 /* m[12], m[2], ____, ____ */ /* xxxx 1001 = 0x09 */ + BYTE $0x09 + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 /* ____, ____, m[7], m[3] */ + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */ + BYTE $0x30 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 3 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0x00 + BYTE $0x00 + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8 + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM11, YMM13 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21 + BYTE $0x21 + + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc2 // VPUNPCKLQDQ YMM8, YMM12, YMM10 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x55 + BYTE $0x55 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM10, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 + BYTE $0x30 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM12, YMM8 + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 + BYTE $0x31 + + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM13, YMM11 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x00 + BYTE $0x00 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 + BYTE $0x21 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 4 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM11, YMM10 + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM13, YMM12 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21 + BYTE $0x21 + + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM12, YMM10 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x08 + BYTE $0x08 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20 + BYTE $0x20 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0x55 + BYTE $0x55 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21 + BYTE $0x21 + + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc4 // VPUNPCKLQDQ YMM8, YMM11, YMM12 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 + BYTE $0x21 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 5 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM12, YMM11 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30 + BYTE $0x30 + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20 + BYTE $0x20 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM13, YMM8 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 + BYTE $0x31 + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0x00 + BYTE $0x00 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM10, YMM8 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x55 + BYTE $0x55 + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM12, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20 + BYTE $0x20 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 6 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21 + BYTE $0x21 + + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc4 // VPUNPCKLQDQ YMM8, YMM13, YMM12 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM12, YMM10 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 + BYTE $0x30 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM13, YMM10 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30 + BYTE $0x30 + + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM13, YMM11 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x55 + BYTE $0x55 + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM13, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 + BYTE $0x30 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 7 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x55 + BYTE $0x55 + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM13, YMM8 + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM13, YMM11 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30 + BYTE $0x30 + + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc5 // VPUNPCKHQDQ YMM8, YMM11, YMM13 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xaa + BYTE $0xaa + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM13, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20 + BYTE $0x20 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x01 + BYTE $0x01 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 + BYTE $0x20 + + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM11, YMM10 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM10, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31 + BYTE $0x31 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 8 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM13, YMM11 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM13, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 + BYTE $0x20 + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0xaa + BYTE $0xaa + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM10, YMM12 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 + BYTE $0x21 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc5 // VPUNPCKHQDQ YMM8, YMM11, YMM13 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xca // VPUNPCKLQDQ YMM9, YMM12, YMM10 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c + BYTE $0x0c + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 + BYTE $0x20 + + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11 + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM11, YMM12 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 + BYTE $0x30 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 9 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0x00 + BYTE $0x00 + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM12, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 + BYTE $0x31 + + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc4 // VPUNPCKHQDQ YMM8, YMM13, YMM12 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x00 + BYTE $0x00 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31 + BYTE $0x31 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xaa + BYTE $0xaa + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9 + BYTE $0xc4; BYTE $0xc3; BYTE $0x15; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM13, YMM9, 0x20 + BYTE $0x20 + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0xff + BYTE $0xff + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x04 + BYTE $0x04 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 + BYTE $0x21 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 10 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x20 + BYTE $0x20 + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 + BYTE $0x31 + + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31 + BYTE $0x31 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc4 // VPUNPCKHQDQ YMM8, YMM13, YMM12 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM10, YMM13 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 + BYTE $0x60 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 + BYTE $0x31 + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0xaa + BYTE $0xaa + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8 + BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xca // VPUNPCKLQDQ YMM9, YMM13, YMM10 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 + BYTE $0x21 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 1 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6c; BYTE $0xe3 // VPUNPCKLQDQ YMM4, YMM10, YMM11 /* m[0], m[4], m[2], m[6] */ + BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6d; BYTE $0xeb // VPUNPCKHQDQ YMM5, YMM10, YMM11 /* m[1], m[5], m[3], m[7] */ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */ + BYTE $0xd8 + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */ + BYTE $0xd8 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6c; BYTE $0xe5 // VPUNPCKLQDQ YMM4, YMM12, YMM13 /* m[8], m[12], m[10], m[14] */ + BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6d; BYTE $0xed // VPUNPCKHQDQ YMM5, YMM12, YMM13 /* m[9], m[13], m[11], m[15] */ + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */ + BYTE $0xd8 + BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */ + BYTE $0xd8 + + G1 + G2 + + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 2 + /////////////////////////////////////////////////////////////////////////// + + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 /* m[4], ____, ____, m[14] */ + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x03 /* m[14], m[4], ____, ____ */ /* xxxx 0011 = 0x03 */ + BYTE $0x03 + BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM12, YMM13 /* m[9], m[13], ____, ____ */ + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[9], m[13], ____, ____ */ /* 0010 0000 = 0x20 */ + BYTE $0x20 + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x02 /* m[10], m[8], ____, ____ */ /* xxxx 0010 = 0x02 */ + BYTE $0x02 + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x30 /* ____, ____, m[15], ____ */ /* xx11 xxxx = 0x30 */ + BYTE $0x30 + BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM9, YMM11 /* ____, ____, m[15], m[6] */ + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */ + BYTE $0x30 + + G1 + G2 + + DIAGONALIZE + + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x01 /* m[1], m[0], ____, ____ */ /* xxxx 0001 = 0x01 */ + BYTE $0x01 + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 /* m[5], ____, ____, m[11] */ + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x03 /* m[11], m[5], ____, ____ */ /* xxxx 0011 = 0x03 */ + BYTE $0x03 + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[1], m[0], m[11], m[5] */ /* 0010 0000 = 0x20 */ + BYTE $0x20 + + BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM10, YMM13 /* ___, m[12], m[2], ____ */ + BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x09 /* m[12], m[2], ____, ____ */ /* xxxx 1001 = 0x09 */ + BYTE $0x09 + BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 /* ____, ____, m[7], m[3] */ + BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */ + BYTE $0x30 + + G1 + G2 + + UNDIAGONALIZE + + // Reload digest (most current value store in &out) + MOVQ out+144(FP), SI // SI: &in + BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x26 // VMOVDQU YMM12, [rsi] + BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x6e; BYTE $0x20 // VMOVDQU YMM13, 32[rsi] + + BYTE $0xc5; BYTE $0xfd; BYTE $0xef; BYTE $0xc2 // VPXOR YMM0,YMM0,YMM2 /* X0 = X0 ^ X4, X1 = X1 ^ X5 */ + BYTE $0xc4; BYTE $0xc1; BYTE $0x7d; BYTE $0xef; BYTE $0xc4 // VPXOR YMM0,YMM0,YMM12 /* X0 = X0 ^ X12, X1 = X1 ^ X13 */ + BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xcb // VPXOR YMM1,YMM1,YMM3 /* X2 = X2 ^ X6, X3 = X3 ^ X7 */ + BYTE $0xc4; BYTE $0xc1; BYTE $0x75; BYTE $0xef; BYTE $0xcd // VPXOR YMM1,YMM1,YMM13 /* X2 = X2 ^ X14, X3 = X3 ^ X15 */ + + // Store digest into &out + MOVQ out+144(FP), SI // SI: &out + BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x06 // VMOVDQU [rsi], YMM0 + BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x4e; BYTE $0x20 // VMOVDQU 32[rsi], YMM1 + + // Increment message pointer and check if there's more to do + ADDQ $128, DX // message += 128 + SUBQ $1, R8 + JNZ loop + +complete: + BYTE $0xc5; BYTE $0xf8; BYTE $0x77 // VZEROUPPER /* Prevent further context switches */ + RET + diff --git a/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.go b/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.go new file mode 100644 index 000000000..cfa12c04f --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.go @@ -0,0 +1,41 @@ +//+build !noasm +//+build !appengine + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package blake2b + +//go:noescape +func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64) + +func compressAVX(d *digest, p []uint8) { + var ( + in [8]uint64 + out [8]uint64 + shffle [2]uint64 + ) + + // vector for PSHUFB instruction + shffle[0] = 0x0201000706050403 + shffle[1] = 0x0a09080f0e0d0c0b + + in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] + + blockAVXLoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:]) + + d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7] +} diff --git a/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.s b/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.s new file mode 100644 index 000000000..f68e17392 --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.s @@ -0,0 +1,682 @@ +//+build !noasm !appengine + +// +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// +// Based on SSE implementation from https://github.com/BLAKE2/BLAKE2/blob/master/sse/blake2b.c +// +// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent +// +// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as: +// #define ROUND(r) \ +// LOAD_MSG_ ##r ##_1(b0, b1); \ +// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// LOAD_MSG_ ##r ##_2(b0, b1); \ +// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ +// LOAD_MSG_ ##r ##_3(b0, b1); \ +// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// LOAD_MSG_ ##r ##_4(b0, b1); \ +// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); +// +// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go +// +// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly) +// +// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and +// rounds 2 & 12 are identical) +// + +#define G1 \ + \ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); + LONG $0xd479c1c4; BYTE $0xc0 \ // VPADDQ XMM0,XMM0,XMM8 /* v0 += m[0], v1 += m[2] */ + LONG $0xd471c1c4; BYTE $0xc9 \ // VPADDQ XMM1,XMM1,XMM9 /* v2 += m[4], v3 += m[6] */ + LONG $0xc2d4f9c5 \ // VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */ + LONG $0xcbd4f1c5 \ // VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */ + LONG $0xf0efc9c5 \ // VPXOR XMM6,XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */ + LONG $0xf9efc1c5 \ // VPXOR XMM7,XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */ + LONG $0xf670f9c5; BYTE $0xb1 \ // VPSHUFD XMM6,XMM6,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = v13<<(64-32) | v13>>32 */ + LONG $0xff70f9c5; BYTE $0xb1 \ // VPSHUFD XMM7,XMM7,0xb1 /* v14 = v14<<(64-32) | v14>>32, v15 = v15<<(64-32) | v15>>32 */ + LONG $0xe6d4d9c5 \ // VPADDQ XMM4,XMM4,XMM6 /* v8 += v12, v9 += v13 */ + LONG $0xefd4d1c5 \ // VPADDQ XMM5,XMM5,XMM7 /* v10 += v14, v11 += v15 */ + LONG $0xd4efe9c5 \ // VPXOR XMM2,XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */ + LONG $0xddefe1c5 \ // VPXOR XMM3,XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */ + LONG $0x0069c2c4; BYTE $0xd4 \ // VPSHUFB XMM2,XMM2,XMM12 /* v4 = v4<<(64-24) | v4>>24, v5 = v5<<(64-24) | v5>>24 */ + LONG $0x0061c2c4; BYTE $0xdc // VPSHUFB XMM3,XMM3,XMM12 /* v6 = v6<<(64-24) | v6>>24, v7 = v7<<(64-24) | v7>>24 */ + +#define G2 \ + \ // G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); + LONG $0xd479c1c4; BYTE $0xc2 \ // VPADDQ XMM0,XMM0,XMM10 /* v0 += m[1], v1 += m[3] */ + LONG $0xd471c1c4; BYTE $0xcb \ // VPADDQ XMM1,XMM1,XMM11 /* v2 += m[5], v3 += m[7] */ + LONG $0xc2d4f9c5 \ // VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */ + LONG $0xcbd4f1c5 \ // VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */ + LONG $0xf0efc9c5 \ // VPXOR XMM6,XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */ + LONG $0xf9efc1c5 \ // VPXOR XMM7,XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */ + LONG $0xf670fbc5; BYTE $0x39 \ // VPSHUFLW XMM6,XMM6,0x39 /* combined with next ... */ + LONG $0xf670fac5; BYTE $0x39 \ // VPSHUFHW XMM6,XMM6,0x39 /* v12 = v12<<(64-16) | v12>>16, v13 = v13<<(64-16) | v13>>16 */ + LONG $0xff70fbc5; BYTE $0x39 \ // VPSHUFLW XMM7,XMM7,0x39 /* combined with next ... */ + LONG $0xff70fac5; BYTE $0x39 \ // VPSHUFHW XMM7,XMM7,0x39 /* v14 = v14<<(64-16) | v14>>16, v15 = v15<<(64-16) | v15>>16 */ + LONG $0xe6d4d9c5 \ // VPADDQ XMM4,XMM4,XMM6 /* v8 += v12, v9 += v13 */ + LONG $0xefd4d1c5 \ // VPADDQ XMM5,XMM5,XMM7 /* v10 += v14, v11 += v15 */ + LONG $0xd4efe9c5 \ // VPXOR XMM2,XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */ + LONG $0xddefe1c5 \ // VPXOR XMM3,XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */ + LONG $0xfad469c5 \ // VPADDQ XMM15,XMM2,XMM2 /* temp reg = reg*2 */ + LONG $0xd273e9c5; BYTE $0x3f \ // VPSRLQ XMM2,XMM2,0x3f /* reg = reg>>63 */ + LONG $0xef69c1c4; BYTE $0xd7 \ // VPXOR XMM2,XMM2,XMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */ + LONG $0xfbd461c5 \ // VPADDQ XMM15,XMM3,XMM3 /* temp reg = reg*2 */ + LONG $0xd373e1c5; BYTE $0x3f \ // VPSRLQ XMM3,XMM3,0x3f /* reg = reg>>63 */ + LONG $0xef61c1c4; BYTE $0xdf // VPXOR XMM3,XMM3,XMM15 /* ORed together: v6 = v6<<(64-63) | v6>>63, v7 = v7<<(64-63) | v7>>63 */ + +#define DIAGONALIZE \ + \ // DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + MOVOU X6, X13 \ /* t0 = row4l;\ */ + MOVOU X2, X14 \ /* t1 = row2l;\ */ + MOVOU X4, X6 \ /* row4l = row3l;\ */ + MOVOU X5, X4 \ /* row3l = row3h;\ */ + MOVOU X6, X5 \ /* row3h = row4l;\ */ + LONG $0x6c1141c4; BYTE $0xfd \ // VPUNPCKLQDQ XMM15, XMM13, XMM13 /* _mm_unpacklo_epi64(t0, t0) */ + LONG $0x6d41c1c4; BYTE $0xf7 \ // VPUNPCKHQDQ XMM6, XMM7, XMM15 /* row4l = _mm_unpackhi_epi64(row4h, ); \ */ + LONG $0xff6c41c5 \ // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */ + LONG $0x6d11c1c4; BYTE $0xff \ // VPUNPCKHQDQ XMM7, XMM13, XMM15 /* row4h = _mm_unpackhi_epi64(t0, ); \ */ + LONG $0xfb6c61c5 \ // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */ + LONG $0x6d69c1c4; BYTE $0xd7 \ // VPUNPCKHQDQ XMM2, XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2l, ); \ */ + LONG $0x6c0941c4; BYTE $0xfe \ // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */ + LONG $0x6d61c1c4; BYTE $0xdf // VPUNPCKHQDQ XMM3, XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */ + +#define UNDIAGONALIZE \ + \ // UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + MOVOU X4, X13 \ /* t0 = row3l;\ */ + MOVOU X5, X4 \ /* row3l = row3h;\ */ + MOVOU X13, X5 \ /* row3h = t0;\ */ + MOVOU X2, X13 \ /* t0 = row2l;\ */ + MOVOU X6, X14 \ /* t1 = row4l;\ */ + LONG $0xfa6c69c5 \ // VPUNPCKLQDQ XMM15, XMM2, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */ + LONG $0x6d61c1c4; BYTE $0xd7 \ // VPUNPCKHQDQ XMM2, XMM3, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */ + LONG $0xfb6c61c5 \ // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */ + LONG $0x6d11c1c4; BYTE $0xdf \ // VPUNPCKHQDQ XMM3, XMM13, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */ + LONG $0xff6c41c5 \ // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */ + LONG $0x6d49c1c4; BYTE $0xf7 \ // VPUNPCKHQDQ XMM6, XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */ + LONG $0x6c0941c4; BYTE $0xfe \ // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */ + LONG $0x6d41c1c4; BYTE $0xff // VPUNPCKHQDQ XMM7, XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */ + +#define LOAD_SHUFFLE \ + \ // Load shuffle value + MOVQ shffle+120(FP), SI \ // SI: &shuffle + MOVOU 0(SI), X12 // X12 = 03040506 07000102 0b0c0d0e 0f08090a + +// func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64) +TEXT ·blockAVXLoop(SB), 7, $0 + // REGISTER USE + // R8: loop counter + // DX: message pointer + // SI: temp pointer for loading + // X0 - X7: v0 - v15 + // X8 - X11: m[0] - m[7] + // X12: shuffle value + // X13 - X15: temp registers + + // Load digest + MOVQ in+24(FP), SI // SI: &in + MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */ + MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */ + MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */ + MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */ + + // Already store digest into &out (so we can reload it later generically) + MOVQ out+144(FP), SI // SI: &out + MOVOU X0, 0(SI) // out[0]+out[1] = X0 + MOVOU X1, 16(SI) // out[2]+out[3] = X1 + MOVOU X2, 32(SI) // out[4]+out[5] = X2 + MOVOU X3, 48(SI) // out[6]+out[7] = X3 + + // Initialize message pointer and loop counter + MOVQ message+0(FP), DX // DX: &p (message) + MOVQ message_len+8(FP), R8 // R8: len(message) + SHRQ $7, R8 // len(message) / 128 + CMPQ R8, $0 + JEQ complete + +loop: + // Increment counter + MOVQ t+72(FP), SI // SI: &t + MOVQ 0(SI), R9 + ADDQ $128, R9 // /* d.t[0] += BlockSize */ + MOVQ R9, 0(SI) + CMPQ R9, $128 // /* if d.t[0] < BlockSize { */ + JGE noincr + MOVQ 8(SI), R9 + ADDQ $1, R9 // /* d.t[1]++ */ + MOVQ R9, 8(SI) +noincr: // /* } */ + + // Load initialization vector + MOVQ iv+48(FP), SI // SI: &iv + MOVOU 0(SI), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */ + MOVOU 16(SI), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */ + MOVOU 32(SI), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */ + MOVOU 48(SI), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */ + MOVQ t+72(FP), SI // SI: &t + MOVOU 0(SI), X8 // X8 = t[0]+t[1] /* LOAD( &S->t[0] ) */ + PXOR X8, X6 // X6 = X6 ^ X8 /* row4l = _mm_xor_si128( , ); */ + MOVQ t+96(FP), SI // SI: &f + MOVOU 0(SI), X8 // X8 = f[0]+f[1] /* LOAD( &S->f[0] ) */ + PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */ + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+m[1] + MOVOU 16(DX), X13 // X13 = m[2]+m[3] + MOVOU 32(DX), X14 // X14 = m[4]+m[5] + MOVOU 48(DX), X15 // X15 = m[6]+m[7] + LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[2] */ + LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[4], m[6] */ + LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[1], m[3] */ + LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[5], m[7] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 64(DX), X12 // X12 = m[8]+ m[9] + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[8],m[10] */ + LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[12],m[14] */ + LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[9],m[11] */ + LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[13],m[15] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 2 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 112(DX), X12 // X12 = m[14]+m[15] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 96(DX), X15 // X15 = m[12]+m[13] + LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[14], m[4] */ + LONG $0x6d0941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM14, XMM15 /* m[9], m[13] */ + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU 48(DX), X15 // X15 = m[6]+ m[7] + LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[10], m[8] */ + LONG $0x0f0143c4; WORD $0x08dc // VPALIGNR XMM11, XMM15, XMM12, 0x8 /* m[15], m[6] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + LONG $0x0f1943c4; WORD $0x08c4 // VPALIGNR XMM8, XMM12, XMM12, 0x8 /* m[1], m[0] */ + LONG $0x6d0941c4; BYTE $0xcd // VPUNPCKHQDQ XMM9, XMM14, XMM13 /* m[11], m[5] */ + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[2] */ + LONG $0x6d1141c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM13, XMM12 /* m[7], m[3] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 3 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 32(DX), X12 // X12 = m[4]+ m[5] + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x0f0943c4; WORD $0x08c5 // VPALIGNR XMM8, XMM14, XMM13, 0x8 /* m[11], m[12] */ + LONG $0x6d1941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM12, XMM15 /* m[5], m[15] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 16(DX), X13 // X13 = m[2]+ m[3] + MOVOU 64(DX), X15 // X15 = m[8]+ m[9] + LONG $0x6c0141c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM15, XMM12 /* m[8], m[0] */ + LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[13] */ + LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[2], ___ */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + LONG $0x6d1941c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM12, XMM12 /* ___, m[3] */ + LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[10], ___ */ + LONG $0x6d1141c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM13, XMM14 /* m[7], m[9] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X14 // X14 = m[4]+ m[5] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6c0141c4; BYTE $0xd5 // VPUNPCKLQDQ XMM10, XMM15, XMM13 /* m[14], m[6] */ + LONG $0x0f0943c4; WORD $0x08dc // VPALIGNR XMM11, XMM14, XMM12, 0x8 /* m[1], m[4] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 4 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 96(DX), X15 // X15 = m[12]+m[13] + LONG $0x6d1141c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM13, XMM12 /* m[7], m[3] */ + LONG $0x6d0141c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM15, XMM14 /* m[13], m[11] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 64(DX), X13 // X13 = m[8]+ m[9] + MOVOU 112(DX), X14 // X14 = m[14]+m[15] + LONG $0x6d1141c4; BYTE $0xd4 // VPUNPCKHQDQ XMM10, XMM13, XMM12 /* m[9], m[1] */ + LONG $0x6c0141c4; BYTE $0xde // VPUNPCKLQDQ XMM11, XMM15, XMM14 /* m[12], m[14] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d1141c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM13, XMM13 /* ___, m[5] */ + LONG $0x6c1941c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM12, XMM8 /* m[2], ____ */ + LONG $0x6d0141c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM15, XMM15 /* ___, m[15] */ + LONG $0x6c1141c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM13, XMM9 /* m[4], ____ */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 64(DX), X15 // X15 = m[8]+ m[9] + LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[6], m[10] */ + LONG $0x6c1941c4; BYTE $0xdf // VPUNPCKLQDQ XMM11, XMM12, XMM15 /* m[0], m[8] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 5 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + LONG $0x6d0941c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM14, XMM13 /* m[9], m[5] */ + LONG $0x6c1941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM12, XMM15 /* m[2], m[10] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X14 // X14 = m[6]+ m[7] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d0941c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM14, XMM14 /* ___, m[7] */ + LONG $0x6c1941c4; BYTE $0xd2 // VPUNPCKLQDQ XMM10, XMM12, XMM10 /* m[0], ____ */ + LONG $0x6d0141c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM15, XMM15 /* ___, m[15] */ + LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[4], ____ */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d0941c4; BYTE $0xc6 // VPUNPCKHQDQ XMM8, XMM14, XMM14 /* ___, m[11] */ + LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[14], ____ */ + LONG $0x6d1941c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM12, XMM12 /* ___, m[3] */ + LONG $0x6c1141c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM13, XMM9 /* m[6], ____ */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 64(DX), X13 // X13 = m[8]+ m[9] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + LONG $0x0f0943c4; WORD $0x08d4 // VPALIGNR XMM10, XMM14, XMM12, 0x8 /* m[1], m[12] */ + LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[13] */ + LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[8], ____ */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 6 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 16(DX), X13 // X13 = m[2]+ m[3] + MOVOU 48(DX), X14 // X14 = m[6]+ m[7] + MOVOU 64(DX), X15 // X15 = m[8]+ m[9] + LONG $0x6c1141c4; BYTE $0xc6 // VPUNPCKLQDQ XMM8, XMM13, XMM14 /* m[2], m[6] */ + LONG $0x6c1941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM12, XMM15 /* m[0], m[8] */ + MOVOU 80(DX), X12 // X12 = m[10]+m[11] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[10] */ + LONG $0x6d1941c4; BYTE $0xdd // VPUNPCKHQDQ XMM11, XMM12, XMM13 /* m[11], m[3] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 48(DX), X14 // X14 = m[6]+ m[7] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d0941c4; BYTE $0xc6 // VPUNPCKHQDQ XMM8, XMM14, XMM14 /* ___, m[7] */ + LONG $0x6c1141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM13, XMM8 /* m[4], ____ */ + LONG $0x6d0141c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM15, XMM12 /* m[15], m[1] */ + MOVOU 64(DX), X12 // X12 = m[8]+ m[9] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + LONG $0x6d0941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM14, XMM13 /* m[13], m[5] */ + LONG $0x6d1941c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM12, XMM12 /* ___, m[9] */ + LONG $0x6c0141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM15, XMM11 /* m[14], ____ */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 7 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d1941c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM12, XMM12 /* ___, m[1] */ + LONG $0x6c0941c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM14, XMM8 /* m[12], ____ */ + LONG $0x6c0141c4; BYTE $0xcd // VPUNPCKLQDQ XMM9, XMM15, XMM13 /* m[14], m[4] */ + MOVOU 80(DX), X12 // X12 = m[10]+m[11] + LONG $0x6d1141c4; BYTE $0xd7 // VPUNPCKHQDQ XMM10, XMM13, XMM15 /* m[5], m[15] */ + LONG $0x0f1943c4; WORD $0x08de // VPALIGNR XMM11, XMM12, XMM14, 0x8 /* m[13], m[10] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[6] */ + LONG $0x0f0943c4; WORD $0x08ce // VPALIGNR XMM9, XMM14, XMM14, 0x8 /* m[9], m[8] */ + MOVOU 16(DX), X14 // X14 = m[2]+ m[3] + LONG $0x6d1141c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM13, XMM14 /* m[7], m[3] */ + LONG $0x6d0141c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM15, XMM15 /* ___, m[11] */ + LONG $0x6c0941c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM14, XMM11 /* m[2], ____ */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 8 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d0941c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM14, XMM13 /* m[13], m[7] */ + LONG $0x6d1941c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM12, XMM12 /* ___, m[3] */ + LONG $0x6c0941c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM14, XMM9 /* m[12], ____ */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 64(DX), X13 // X13 = m[8]+ m[9] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + LONG $0x0f0143c4; WORD $0x08d6 // VPALIGNR XMM10, XMM15, XMM14, 0x8 /* m[11], m[14] */ + LONG $0x6d1941c4; BYTE $0xdd // VPUNPCKHQDQ XMM11, XMM12, XMM13 /* m[1], m[9] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d1141c4; BYTE $0xc7 // VPUNPCKHQDQ XMM8, XMM13, XMM15 /* m[5], m[15] */ + LONG $0x6c0941c4; BYTE $0xcc // VPUNPCKLQDQ XMM9, XMM14, XMM12 /* m[8], m[2] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X14 // X14 = m[6]+ m[7] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + LONG $0x6c1941c4; BYTE $0xd5 // VPUNPCKLQDQ XMM10, XMM12, XMM13 /* m[0], m[4] */ + LONG $0x6c0941c4; BYTE $0xdf // VPUNPCKLQDQ XMM11, XMM14, XMM15 /* m[6], m[10] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 9 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6c1141c4; BYTE $0xc7 // VPUNPCKLQDQ XMM8, XMM13, XMM15 /* m[6], m[14] */ + LONG $0x0f1943c4; WORD $0x08ce // VPALIGNR XMM9, XMM12, XMM14, 0x8 /* m[11], m[0] */ + MOVOU 16(DX), X13 // X13 = m[2]+ m[3] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + LONG $0x6d0141c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM15, XMM14 /* m[15], m[9] */ + LONG $0x0f0943c4; WORD $0x08dd // VPALIGNR XMM11, XMM14, XMM13, 0x8 /* m[3], m[8] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 16(DX), X13 // X13 = m[2]+ m[3] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 96(DX), X15 // X15 = m[12]+m[13] + LONG $0x6d0141c4; BYTE $0xc7 // VPUNPCKHQDQ XMM8, XMM15, XMM15 /* ___, m[13] */ + LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[12], ____ */ + LONG $0x0f0943c4; WORD $0x08cc // VPALIGNR XMM9, XMM14, XMM12, 0x8 /* m[1], m[10] */ + MOVOU 32(DX), X12 // X12 = m[4]+ m[5] + MOVOU 48(DX), X15 // X15 = m[6]+ m[7] + LONG $0x6d0141c4; BYTE $0xd7 // VPUNPCKHQDQ XMM10, XMM15, XMM15 /* ___, m[7] */ + LONG $0x6c1141c4; BYTE $0xd2 // VPUNPCKLQDQ XMM10, XMM13, XMM10 /* m[2], ____ */ + LONG $0x6d1941c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM12, XMM12 /* ___, m[5] */ + LONG $0x6c1941c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM12, XMM11 /* m[4], ____ */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 0 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + LONG $0x6c0141c4; BYTE $0xc6 // VPUNPCKLQDQ XMM8, XMM15, XMM14 /* m[10], m[8] */ + LONG $0x6d1141c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM13, XMM12 /* m[7], m[1] */ + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 32(DX), X14 // X14 = m[4]+ m[5] + LONG $0x6c1941c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM12, XMM14 /* m[2], m[4] */ + LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[5] */ + LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[6], ____ */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 64(DX), X13 // X13 = m[8]+ m[9] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d0141c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM15, XMM13 /* m[15], m[9] */ + LONG $0x6d1941c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM12, XMM14 /* m[3], m[13] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + LONG $0x0f0143c4; WORD $0x08d5 // VPALIGNR XMM10, XMM15, XMM13, 0x8 /* m[11], m[14] */ + LONG $0x6c0941c4; BYTE $0xdc // VPUNPCKLQDQ XMM11, XMM14, XMM12 /* m[12], m[0] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 1 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+m[1] + MOVOU 16(DX), X13 // X13 = m[2]+m[3] + MOVOU 32(DX), X14 // X14 = m[4]+m[5] + MOVOU 48(DX), X15 // X15 = m[6]+m[7] + LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[2] */ + LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[4], m[6] */ + LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[1], m[3] */ + LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[5], m[7] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 64(DX), X12 // X12 = m[8]+ m[9] + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[8],m[10] */ + LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[12],m[14] */ + LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[9],m[11] */ + LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[13],m[15] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 2 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 112(DX), X12 // X12 = m[14]+m[15] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 96(DX), X15 // X15 = m[12]+m[13] + LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[14], m[4] */ + LONG $0x6d0941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM14, XMM15 /* m[9], m[13] */ + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU 48(DX), X15 // X15 = m[6]+ m[7] + LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[10], m[8] */ + LONG $0x0f0143c4; WORD $0x08dc // VPALIGNR XMM11, XMM15, XMM12, 0x8 /* m[15], m[6] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + LONG $0x0f1943c4; WORD $0x08c4 // VPALIGNR XMM8, XMM12, XMM12, 0x8 /* m[1], m[0] */ + LONG $0x6d0941c4; BYTE $0xcd // VPUNPCKHQDQ XMM9, XMM14, XMM13 /* m[11], m[5] */ + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[2] */ + LONG $0x6d1141c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM13, XMM12 /* m[7], m[3] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + // Reload digest (most current value store in &out) + MOVQ out+144(FP), SI // SI: &in + MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */ + MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */ + MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */ + MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */ + + // Final computations and prepare for storing + PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */ + PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */ + PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */ + PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */ + PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */ + PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */ + PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */ + PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */ + + // Store digest into &out + MOVQ out+144(FP), SI // SI: &out + MOVOU X0, 0(SI) // out[0]+out[1] = X0 + MOVOU X1, 16(SI) // out[2]+out[3] = X1 + MOVOU X2, 32(SI) // out[4]+out[5] = X2 + MOVOU X3, 48(SI) // out[6]+out[7] = X3 + + // Increment message pointer and check if there's more to do + ADDQ $128, DX // message += 128 + SUBQ $1, R8 + JNZ loop + +complete: + RET diff --git a/vendor/github.com/minio/blake2b-simd/compressSse_amd64.go b/vendor/github.com/minio/blake2b-simd/compressSse_amd64.go new file mode 100644 index 000000000..d539a7ade --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compressSse_amd64.go @@ -0,0 +1,41 @@ +//+build !noasm +//+build !appengine + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package blake2b + +//go:noescape +func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64) + +func compressSSE(d *digest, p []uint8) { + var ( + in [8]uint64 + out [8]uint64 + shffle [2]uint64 + ) + + // vector for PSHUFB instruction + shffle[0] = 0x0201000706050403 + shffle[1] = 0x0a09080f0e0d0c0b + + in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] + + blockSSELoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:]) + + d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7] +} diff --git a/vendor/github.com/minio/blake2b-simd/compressSse_amd64.s b/vendor/github.com/minio/blake2b-simd/compressSse_amd64.s new file mode 100644 index 000000000..6f31c949e --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compressSse_amd64.s @@ -0,0 +1,770 @@ +//+build !noasm !appengine + +// +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// +// Based on SSE implementation from https://github.com/BLAKE2/BLAKE2/blob/master/sse/blake2b.c +// +// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent +// +// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as: +// #define ROUND(r) \ +// LOAD_MSG_ ##r ##_1(b0, b1); \ +// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// LOAD_MSG_ ##r ##_2(b0, b1); \ +// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ +// LOAD_MSG_ ##r ##_3(b0, b1); \ +// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// LOAD_MSG_ ##r ##_4(b0, b1); \ +// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ +// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); +// +// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go +// +// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly) +// +// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and +// rounds 2 & 12 are identical) +// + +#define G1 \ + \ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); + LONG $0xd40f4166; BYTE $0xc0 \ // PADDQ XMM0,XMM8 /* v0 += m[0], v1 += m[2] */ + LONG $0xd40f4166; BYTE $0xc9 \ // PADDQ XMM1,XMM9 /* v2 += m[4], v3 += m[6] */ + LONG $0xc2d40f66 \ // PADDQ XMM0,XMM2 /* v0 += v4, v1 += v5 */ + LONG $0xcbd40f66 \ // PADDQ XMM1,XMM3 /* v2 += v6, v3 += v7 */ + LONG $0xf0ef0f66 \ // PXOR XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */ + LONG $0xf9ef0f66 \ // PXOR XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */ + LONG $0xf6700f66; BYTE $0xb1 \ // PSHUFD XMM6,XMM6,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = v13<<(64-32) | v13>>32 */ + LONG $0xff700f66; BYTE $0xb1 \ // PSHUFD XMM7,XMM7,0xb1 /* v14 = v14<<(64-32) | v14>>32, v15 = v15<<(64-32) | v15>>32 */ + LONG $0xe6d40f66 \ // PADDQ XMM4,XMM6 /* v8 += v12, v9 += v13 */ + LONG $0xefd40f66 \ // PADDQ XMM5,XMM7 /* v10 += v14, v11 += v15 */ + LONG $0xd4ef0f66 \ // PXOR XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */ + LONG $0xddef0f66 \ // PXOR XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */ + LONG $0x380f4166; WORD $0xd400 \ // PSHUFB XMM2,XMM12 /* v4 = v4<<(64-24) | v4>>24, v5 = v5<<(64-24) | v5>>24 */ + LONG $0x380f4166; WORD $0xdc00 // PSHUFB XMM3,XMM12 /* v6 = v6<<(64-24) | v6>>24, v7 = v7<<(64-24) | v7>>24 */ + +#define G2 \ + \ // G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); + LONG $0xd40f4166; BYTE $0xc2 \ // PADDQ XMM0,XMM10 /* v0 += m[1], v1 += m[3] */ + LONG $0xd40f4166; BYTE $0xcb \ // PADDQ XMM1,XMM11 /* v2 += m[5], v3 += m[7] */ + LONG $0xc2d40f66 \ // PADDQ XMM0,XMM2 /* v0 += v4, v1 += v5 */ + LONG $0xcbd40f66 \ // PADDQ XMM1,XMM3 /* v2 += v6, v3 += v7 */ + LONG $0xf0ef0f66 \ // PXOR XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */ + LONG $0xf9ef0f66 \ // PXOR XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */ + LONG $0xf6700ff2; BYTE $0x39 \ // PSHUFLW XMM6,XMM6,0x39 /* combined with next ... */ + LONG $0xf6700ff3; BYTE $0x39 \ // PSHUFHW XMM6,XMM6,0x39 /* v12 = v12<<(64-16) | v12>>16, v13 = v13<<(64-16) | v13>>16 */ + LONG $0xff700ff2; BYTE $0x39 \ // PSHUFLW XMM7,XMM7,0x39 /* combined with next ... */ + LONG $0xff700ff3; BYTE $0x39 \ // PSHUFHW XMM7,XMM7,0x39 /* v14 = v14<<(64-16) | v14>>16, v15 = v15<<(64-16) | v15>>16 */ + LONG $0xe6d40f66 \ // PADDQ XMM4,XMM6 /* v8 += v12, v9 += v13 */ + LONG $0xefd40f66 \ // PADDQ XMM5,XMM7 /* v10 += v14, v11 += v15 */ + LONG $0xd4ef0f66 \ // PXOR XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */ + LONG $0xddef0f66 \ // PXOR XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */ + MOVOU X2, X15 \ + LONG $0xd40f4466; BYTE $0xfa \ // PADDQ XMM15,XMM2 /* temp reg = reg*2 */ + LONG $0xd2730f66; BYTE $0x3f \ // PSRLQ XMM2,0x3f /* reg = reg>>63 */ + LONG $0xef0f4166; BYTE $0xd7 \ // PXOR XMM2,XMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */ + MOVOU X3, X15 \ + LONG $0xd40f4466; BYTE $0xfb \ // PADDQ XMM15,XMM3 /* temp reg = reg*2 */ + LONG $0xd3730f66; BYTE $0x3f \ // PSRLQ XMM3,0x3f /* reg = reg>>63 */ + LONG $0xef0f4166; BYTE $0xdf // PXOR XMM3,XMM15 /* ORed together: v6 = v6<<(64-63) | v6>>63, v7 = v7<<(64-63) | v7>>63 */ + +#define DIAGONALIZE \ + \ // DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + MOVOU X6, X13 \ /* t0 = row4l;\ */ + MOVOU X2, X14 \ /* t1 = row2l;\ */ + MOVOU X4, X6 \ /* row4l = row3l;\ */ + MOVOU X5, X4 \ /* row3l = row3h;\ */ + MOVOU X6, X5 \ /* row3h = row4l;\ */ + LONG $0x6c0f4566; BYTE $0xfd \ // PUNPCKLQDQ XMM15, XMM13 /* _mm_unpacklo_epi64(t0, t0) */ + MOVOU X7, X6 \ + LONG $0x6d0f4166; BYTE $0xf7 \ // PUNPCKHQDQ XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4h, ); \ */ + LONG $0x6c0f4466; BYTE $0xff \ // PUNPCKLQDQ XMM15, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */ + MOVOU X13, X7 \ + LONG $0x6d0f4166; BYTE $0xff \ // PUNPCKHQDQ XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(t0, ); \ */ + LONG $0x6c0f4466; BYTE $0xfb \ // PUNPCKLQDQ XMM15, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */ + LONG $0x6d0f4166; BYTE $0xd7 \ // PUNPCKHQDQ XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2l, ); \ */ + LONG $0x6c0f4566; BYTE $0xfe \ // PUNPCKLQDQ XMM15, XMM14 /* _mm_unpacklo_epi64(t1, t1) */ + LONG $0x6d0f4166; BYTE $0xdf // PUNPCKHQDQ XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */ + +#define UNDIAGONALIZE \ + \ // UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + MOVOU X4, X13 \ /* t0 = row3l;\ */ + MOVOU X5, X4 \ /* row3l = row3h;\ */ + MOVOU X13, X5 \ /* row3h = t0;\ */ + MOVOU X2, X13 \ /* t0 = row2l;\ */ + MOVOU X6, X14 \ /* t1 = row4l;\ */ + LONG $0x6c0f4466; BYTE $0xfa \ // PUNPCKLQDQ XMM15, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */ + MOVOU X3, X2 \ + LONG $0x6d0f4166; BYTE $0xd7 \ // PUNPCKHQDQ XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */ + LONG $0x6c0f4466; BYTE $0xfb \ // PUNPCKLQDQ XMM15, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */ + MOVOU X13, X3 \ + LONG $0x6d0f4166; BYTE $0xdf \ // PUNPCKHQDQ XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */ + LONG $0x6c0f4466; BYTE $0xff \ // PUNPCKLQDQ XMM15, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */ + LONG $0x6d0f4166; BYTE $0xf7 \ // PUNPCKHQDQ XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */ + LONG $0x6c0f4566; BYTE $0xfe \ // PUNPCKLQDQ XMM15, XMM14 /* _mm_unpacklo_epi64(t1, t1) */ + LONG $0x6d0f4166; BYTE $0xff // PUNPCKHQDQ XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */ + +#define LOAD_SHUFFLE \ + \ // Load shuffle value + MOVQ shffle+120(FP), SI \ // SI: &shuffle + MOVOU 0(SI), X12 // X12 = 03040506 07000102 0b0c0d0e 0f08090a + +// func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64) +TEXT ·blockSSELoop(SB), 7, $0 + // REGISTER USE + // R8: loop counter + // DX: message pointer + // SI: temp pointer for loading + // X0 - X7: v0 - v15 + // X8 - X11: m[0] - m[7] + // X12: shuffle value + // X13 - X15: temp registers + + // Load digest + MOVQ in+24(FP), SI // SI: &in + MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */ + MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */ + MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */ + MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */ + + // Already store digest into &out (so we can reload it later generically) + MOVQ out+144(FP), SI // SI: &out + MOVOU X0, 0(SI) // out[0]+out[1] = X0 + MOVOU X1, 16(SI) // out[2]+out[3] = X1 + MOVOU X2, 32(SI) // out[4]+out[5] = X2 + MOVOU X3, 48(SI) // out[6]+out[7] = X3 + + // Initialize message pointer and loop counter + MOVQ message+0(FP), DX // DX: &p (message) + MOVQ message_len+8(FP), R8 // R8: len(message) + SHRQ $7, R8 // len(message) / 128 + CMPQ R8, $0 + JEQ complete + +loop: + // Increment counter + MOVQ t+72(FP), SI // SI: &t + MOVQ 0(SI), R9 + ADDQ $128, R9 // /* d.t[0] += BlockSize */ + MOVQ R9, 0(SI) + CMPQ R9, $128 // /* if d.t[0] < BlockSize { */ + JGE noincr + MOVQ 8(SI), R9 + ADDQ $1, R9 // /* d.t[1]++ */ + MOVQ R9, 8(SI) + +noincr: // /* } */ + + // Load initialization vector + MOVQ iv+48(FP), SI // SI: &iv + MOVOU 0(SI), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */ + MOVOU 16(SI), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */ + MOVOU 32(SI), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */ + MOVOU 48(SI), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */ + MOVQ t+72(FP), SI // SI: &t + MOVOU 0(SI), X8 // X8 = t[0]+t[1] /* LOAD( &S->t[0] ) */ + PXOR X8, X6 // X6 = X6 ^ X8 /* row4l = _mm_xor_si128( , ); */ + MOVQ t+96(FP), SI // SI: &f + MOVOU 0(SI), X8 // X8 = f[0]+f[1] /* LOAD( &S->f[0] ) */ + PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */ + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+m[1] + MOVOU 16(DX), X13 // X13 = m[2]+m[3] + MOVOU 32(DX), X14 // X14 = m[4]+m[5] + MOVOU 48(DX), X15 // X15 = m[6]+m[7] + MOVOU X12, X8 + LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[2] */ + MOVOU X14, X9 + LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[4], m[6] */ + MOVOU X12, X10 + LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[1], m[3] */ + MOVOU X14, X11 + LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[5], m[7] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 64(DX), X12 // X12 = m[8]+ m[9] + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X12, X8 + LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[8],m[10] */ + MOVOU X14, X9 + LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[12],m[14] */ + MOVOU X12, X10 + LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[9],m[11] */ + MOVOU X14, X11 + LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[13],m[15] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 2 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 112(DX), X12 // X12 = m[14]+m[15] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 96(DX), X15 // X15 = m[12]+m[13] + MOVOU X12, X8 + LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[14], m[4] */ + MOVOU X14, X9 + LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[9], m[13] */ + MOVOU 80(DX), X10 // X10 = m[10]+m[11] + MOVOU 48(DX), X11 // X11 = m[6]+ m[7] + LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[10], m[8] */ + LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[15], m[6] */; ; ; ; ; + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU X12, X8 + LONG $0x3a0f4566; WORD $0xc40f; BYTE $0x08 // PALIGNR XMM8, XMM12, 0x8 /* m[1], m[0] */ + MOVOU X14, X9 + LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* m[11], m[5] */ + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X11 // X11 = m[6]+ m[7] + MOVOU 96(DX), X10 // X10 = m[12]+m[13] + LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[2] */ + LONG $0x6d0f4566; BYTE $0xdc // PUNPCKHQDQ XMM11, XMM12 /* m[7], m[3] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 3 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 32(DX), X12 // X12 = m[4]+ m[5] + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X14, X8 + LONG $0x3a0f4566; WORD $0xc50f; BYTE $0x08 // PALIGNR XMM8, XMM13, 0x8 /* m[11], m[12] */ + MOVOU X12, X9 + LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[5], m[15] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 16(DX), X13 // X13 = m[2]+ m[3] + MOVOU 64(DX), X10 // X10 = m[8]+ m[9] + LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[8], m[0] */ + LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[13] */ + MOVOU X13, X11 + LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[2], ___ */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + MOVOU X12, X9 + LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* ___, m[3] */ + MOVOU X15, X8 + LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[10], ___ */ + MOVOU X13, X9 + LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[7], m[9] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X11 // X11 = m[4]+ m[5] + MOVOU 112(DX), X10 // X10 = m[14]+m[15] + LONG $0x6c0f4566; BYTE $0xd5 // PUNPCKLQDQ XMM10, XMM13 /* m[14], m[6] */ + LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[1], m[4] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 4 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 96(DX), X15 // X15 = m[12]+m[13] + MOVOU X13, X8 + LONG $0x6d0f4566; BYTE $0xc4 // PUNPCKHQDQ XMM8, XMM12 /* m[7], m[3] */ + MOVOU X15, X9 + LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[13], m[11] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 64(DX), X10 // X10 = m[8]+ m[9] + MOVOU 112(DX), X14 // X14 = m[14]+m[15] + LONG $0x6d0f4566; BYTE $0xd4 // PUNPCKHQDQ XMM10, XMM12 /* m[9], m[1] */ + MOVOU X15, X11 + LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[12], m[14] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X13, X9 + LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* ___, m[5] */ + MOVOU X12, X8 + LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[2], ____ */ + MOVOU X15, X10 + LONG $0x6d0f4566; BYTE $0xd7 // PUNPCKHQDQ XMM10, XMM15 /* ___, m[15] */ + MOVOU X13, X9 + LONG $0x6c0f4566; BYTE $0xca // PUNPCKLQDQ XMM9, XMM10 /* m[4], ____ */ + MOVOU 0(DX), X11 // X11 = m[0]+ m[1] + MOVOU 48(DX), X10 // X10 = m[6]+ m[7] + MOVOU 64(DX), X15 // X15 = m[8]+ m[9] + LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[6], m[10] */ + LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[0], m[8] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 5 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + MOVOU X14, X8 + LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[9], m[5] */ + MOVOU X12, X9 + LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[2], m[10] */ + MOVOU 0(DX), X10 // X10 = m[0]+ m[1] + MOVOU 48(DX), X14 // X14 = m[6]+ m[7] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[7] */ + LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[0], ____ */ + LONG $0x6d0f4566; BYTE $0xff // PUNPCKHQDQ XMM15, XMM15 /* ___, m[15] */ + MOVOU X13, X11 + LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[4], ____ */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[11] */ + MOVOU X15, X8 + LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[14], ____ */ + LONG $0x6d0f4566; BYTE $0xe4 // PUNPCKHQDQ XMM12, XMM12 /* ___, m[3] */ + MOVOU X13, X9 + LONG $0x6c0f4566; BYTE $0xcc // PUNPCKLQDQ XMM9, XMM12 /* m[6], ____ */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 64(DX), X11 // X11 = m[8]+ m[9] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU X14, X10 + LONG $0x3a0f4566; WORD $0xd40f; BYTE $0x08 // PALIGNR XMM10, XMM12, 0x8 /* m[1], m[12] */ + LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[13] */ + LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[8], ____ */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 6 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 16(DX), X13 // X13 = m[2]+ m[3] + MOVOU 48(DX), X14 // X14 = m[6]+ m[7] + MOVOU 64(DX), X15 // X15 = m[8]+ m[9] + MOVOU X13, X8 + LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[2], m[6] */ + MOVOU X12, X9 + LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[0], m[8] */ + MOVOU 80(DX), X12 // X12 = m[10]+m[11] + MOVOU 96(DX), X10 // X10 = m[12]+m[13] + LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[10] */ + MOVOU X12, X11 + LONG $0x6d0f4566; BYTE $0xdd // PUNPCKHQDQ XMM11, XMM13 /* m[11], m[3] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 48(DX), X14 // X14 = m[6]+ m[7] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X14, X9 + LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* ___, m[7] */ + MOVOU X13, X8 + LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[4], ____ */ + MOVOU X15, X9 + LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* m[15], m[1] */ + MOVOU 64(DX), X12 // X12 = m[8]+ m[9] + MOVOU 96(DX), X10 // X10 = m[12]+m[13] + LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[13], m[5] */ + LONG $0x6d0f4566; BYTE $0xe4 // PUNPCKHQDQ XMM12, XMM12 /* ___, m[9] */ + MOVOU X15, X11 + LONG $0x6c0f4566; BYTE $0xdc // PUNPCKLQDQ XMM11, XMM12 /* m[14], ____ */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 7 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X12, X9 + LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* ___, m[1] */ + MOVOU X14, X8 + LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[12], ____ */ + MOVOU X15, X9 + LONG $0x6c0f4566; BYTE $0xcd // PUNPCKLQDQ XMM9, XMM13 /* m[14], m[4] */ + MOVOU 80(DX), X11 // X11 = m[10]+m[11] + MOVOU X13, X10 + LONG $0x6d0f4566; BYTE $0xd7 // PUNPCKHQDQ XMM10, XMM15 /* m[5], m[15] */ + LONG $0x3a0f4566; WORD $0xde0f; BYTE $0x08 // PALIGNR XMM11, XMM14, 0x8 /* m[13], m[10] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + MOVOU X12, X8 + LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[6] */ + MOVOU X14, X9 + LONG $0x3a0f4566; WORD $0xce0f; BYTE $0x08 // PALIGNR XMM9, XMM14, 0x8 /* m[9], m[8] */ + MOVOU 16(DX), X11 // X14 = m[2]+ m[3] + MOVOU X13, X10 + LONG $0x6d0f4566; BYTE $0xd3 // PUNPCKHQDQ XMM10, XMM11 /* m[7], m[3] */ + LONG $0x6d0f4566; BYTE $0xff // PUNPCKHQDQ XMM15, XMM15 /* ___, m[11] */ + LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[2], ____ */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 8 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X14, X8 + LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[13], m[7] */ + MOVOU X12, X10 + LONG $0x6d0f4566; BYTE $0xd4 // PUNPCKHQDQ XMM10, XMM12 /* ___, m[3] */ + MOVOU X14, X9 + LONG $0x6c0f4566; BYTE $0xca // PUNPCKLQDQ XMM9, XMM10 /* m[12], ____ */ + MOVOU 0(DX), X11 // X11 = m[0]+ m[1] + MOVOU 64(DX), X13 // X13 = m[8]+ m[9] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU X15, X10 + LONG $0x3a0f4566; WORD $0xd60f; BYTE $0x08 // PALIGNR XMM10, XMM14, 0x8 /* m[11], m[14] */ + LONG $0x6d0f4566; BYTE $0xdd // PUNPCKHQDQ XMM11, XMM13 /* m[1], m[9] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X13, X8 + LONG $0x6d0f4566; BYTE $0xc7 // PUNPCKHQDQ XMM8, XMM15 /* m[5], m[15] */ + MOVOU X14, X9 + LONG $0x6c0f4566; BYTE $0xcc // PUNPCKLQDQ XMM9, XMM12 /* m[8], m[2] */ + MOVOU 0(DX), X10 // X10 = m[0]+ m[1] + MOVOU 48(DX), X11 // X11 = m[6]+ m[7] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + LONG $0x6c0f4566; BYTE $0xd5 // PUNPCKLQDQ XMM10, XMM13 /* m[0], m[4] */ + LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[6], m[10] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 9 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X13, X8 + LONG $0x6c0f4566; BYTE $0xc7 // PUNPCKLQDQ XMM8, XMM15 /* m[6], m[14] */ + MOVOU X12, X9 + LONG $0x3a0f4566; WORD $0xce0f; BYTE $0x08 // PALIGNR XMM9, XMM14, 0x8 /* m[11], m[0] */ + MOVOU 16(DX), X13 // X13 = m[2]+ m[3] + MOVOU 64(DX), X11 // X11 = m[8]+ m[9] + MOVOU X15, X10 + LONG $0x6d0f4566; BYTE $0xd3 // PUNPCKHQDQ XMM10, XMM11 /* m[15], m[9] */ + LONG $0x3a0f4566; WORD $0xdd0f; BYTE $0x08 // PALIGNR XMM11, XMM13, 0x8 /* m[3], m[8] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 16(DX), X13 // X13 = m[2]+ m[3] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU 96(DX), X15 // X15 = m[12]+m[13] + MOVOU X15, X9 + LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* ___, m[13] */ + MOVOU X15, X8 + LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[12], ____ */ + MOVOU X14, X9 + LONG $0x3a0f4566; WORD $0xcc0f; BYTE $0x08 // PALIGNR XMM9, XMM12, 0x8 /* m[1], m[10] */ + MOVOU 32(DX), X12 // X12 = m[4]+ m[5] + MOVOU 48(DX), X15 // X15 = m[6]+ m[7] + MOVOU X15, X11 + LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* ___, m[7] */ + MOVOU X13, X10 + LONG $0x6c0f4566; BYTE $0xd3 // PUNPCKLQDQ XMM10, XMM11 /* m[2], ____ */ + MOVOU X12, X15 + LONG $0x6d0f4566; BYTE $0xfc // PUNPCKHQDQ XMM15, XMM12 /* ___, m[5] */ + MOVOU X12, X11 + LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[4], ____ */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 0 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 48(DX), X13 // X13 = m[6]+ m[7] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 80(DX), X15 // X15 = m[10]+m[11] + MOVOU X15, X8 + LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[10], m[8] */ + MOVOU X13, X9 + LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* m[7], m[1] */ + MOVOU 16(DX), X10 // X10 = m[2]+ m[3] + MOVOU 32(DX), X14 // X14 = m[4]+ m[5] + LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[2], m[4] */ + MOVOU X14, X15 + LONG $0x6d0f4566; BYTE $0xfe // PUNPCKHQDQ XMM15, XMM14 /* ___, m[5] */ + MOVOU X13, X11 + LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[6], ____ */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 64(DX), X13 // X13 = m[8]+ m[9] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X15, X8 + LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[15], m[9] */ + MOVOU X12, X9 + LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[3], m[13] */ + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU X15, X10 + LONG $0x3a0f4566; WORD $0xd50f; BYTE $0x08 // PALIGNR XMM10, XMM13, 0x8 /* m[11], m[14] */ + MOVOU X14, X11 + LONG $0x6c0f4566; BYTE $0xdc // PUNPCKLQDQ XMM11, XMM12 /* m[12], m[0] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 1 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+m[1] + MOVOU 16(DX), X13 // X13 = m[2]+m[3] + MOVOU 32(DX), X14 // X14 = m[4]+m[5] + MOVOU 48(DX), X15 // X15 = m[6]+m[7] + MOVOU X12, X8 + LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[2] */ + MOVOU X14, X9 + LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[4], m[6] */ + MOVOU X12, X10 + LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[1], m[3] */ + MOVOU X14, X11 + LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[5], m[7] */ + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 64(DX), X12 // X12 = m[8]+ m[9] + MOVOU 80(DX), X13 // X13 = m[10]+m[11] + MOVOU 96(DX), X14 // X14 = m[12]+m[13] + MOVOU 112(DX), X15 // X15 = m[14]+m[15] + MOVOU X12, X8 + LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[8],m[10] */ + MOVOU X14, X9 + LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[12],m[14] */ + MOVOU X12, X10 + LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[9],m[11] */ + MOVOU X14, X11 + LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[13],m[15] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + /////////////////////////////////////////////////////////////////////////// + // R O U N D 1 2 + /////////////////////////////////////////////////////////////////////////// + + // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) + MOVOU 112(DX), X12 // X12 = m[14]+m[15] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 64(DX), X14 // X14 = m[8]+ m[9] + MOVOU 96(DX), X15 // X15 = m[12]+m[13] + MOVOU X12, X8 + LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[14], m[4] */ + MOVOU X14, X9 + LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[9], m[13] */ + MOVOU 80(DX), X10 // X10 = m[10]+m[11] + MOVOU 48(DX), X11 // X11 = m[6]+ m[7] + LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[10], m[8] */ + LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[15], m[6] */; ; ; ; ; + + LOAD_SHUFFLE + G1 + G2 + DIAGONALIZE + + // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) + MOVOU 0(DX), X12 // X12 = m[0]+ m[1] + MOVOU 32(DX), X13 // X13 = m[4]+ m[5] + MOVOU 80(DX), X14 // X14 = m[10]+m[11] + MOVOU X12, X8 + LONG $0x3a0f4566; WORD $0xc40f; BYTE $0x08 // PALIGNR XMM8, XMM12, 0x8 /* m[1], m[0] */ + MOVOU X14, X9 + LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* m[11], m[5] */ + MOVOU 16(DX), X12 // X12 = m[2]+ m[3] + MOVOU 48(DX), X11 // X11 = m[6]+ m[7] + MOVOU 96(DX), X10 // X10 = m[12]+m[13] + LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[2] */ + LONG $0x6d0f4566; BYTE $0xdc // PUNPCKHQDQ XMM11, XMM12 /* m[7], m[3] */ + + LOAD_SHUFFLE + G1 + G2 + UNDIAGONALIZE + + // Reload digest (most current value store in &out) + MOVQ out+144(FP), SI // SI: &in + MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */ + MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */ + MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */ + MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */ + + // Final computations and prepare for storing + PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */ + PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */ + PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */ + PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */ + PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */ + PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */ + PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */ + PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */ + + // Store digest into &out + MOVQ out+144(FP), SI // SI: &out + MOVOU X0, 0(SI) // out[0]+out[1] = X0 + MOVOU X1, 16(SI) // out[2]+out[3] = X1 + MOVOU X2, 32(SI) // out[4]+out[5] = X2 + MOVOU X3, 48(SI) // out[6]+out[7] = X3 + + // Increment message pointer and check if there's more to do + ADDQ $128, DX // message += 128 + SUBQ $1, R8 + JNZ loop + +complete: + RET diff --git a/vendor/github.com/minio/blake2b-simd/compress_amd64.go b/vendor/github.com/minio/blake2b-simd/compress_amd64.go new file mode 100644 index 000000000..4fc5e388c --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compress_amd64.go @@ -0,0 +1,30 @@ +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package blake2b + +func compress(d *digest, p []uint8) { + // Verifies if AVX2 or AVX is available, use optimized code path. + if avx2 { + compressAVX2(d, p) + } else if avx { + compressAVX(d, p) + } else if ssse3 { + compressSSE(d, p) + } else { + compressGeneric(d, p) + } +} diff --git a/vendor/github.com/minio/blake2b-simd/compress_generic.go b/vendor/github.com/minio/blake2b-simd/compress_generic.go new file mode 100644 index 000000000..e9e16e8b9 --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compress_generic.go @@ -0,0 +1,1419 @@ +// Written in 2012 by Dmitry Chestnykh. +// +// To the extent possible under law, the author have dedicated all copyright +// and related and neighboring rights to this software to the public domain +// worldwide. This software is distributed without any warranty. +// http://creativecommons.org/publicdomain/zero/1.0/ + +package blake2b + +func compressGeneric(d *digest, p []uint8) { + h0, h1, h2, h3, h4, h5, h6, h7 := d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] + + for len(p) >= BlockSize { + // Increment counter. + d.t[0] += BlockSize + if d.t[0] < BlockSize { + d.t[1]++ + } + // Initialize compression function. + v0, v1, v2, v3, v4, v5, v6, v7 := h0, h1, h2, h3, h4, h5, h6, h7 + v8 := iv[0] + v9 := iv[1] + v10 := iv[2] + v11 := iv[3] + v12 := iv[4] ^ d.t[0] + v13 := iv[5] ^ d.t[1] + v14 := iv[6] ^ d.f[0] + v15 := iv[7] ^ d.f[1] + + j := 0 + var m [16]uint64 + for i := range m { + m[i] = uint64(p[j]) | uint64(p[j+1])<<8 | uint64(p[j+2])<<16 | + uint64(p[j+3])<<24 | uint64(p[j+4])<<32 | uint64(p[j+5])<<40 | + uint64(p[j+6])<<48 | uint64(p[j+7])<<56 + j += 8 + } + + // Round 1. + v0 += m[0] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[2] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[4] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[6] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[5] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[7] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[3] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[1] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[8] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[10] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[12] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[14] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[13] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[15] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[11] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[9] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 2. + v0 += m[14] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[4] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[9] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[13] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[15] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[6] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[8] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[10] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[1] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[0] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[11] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[5] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[7] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[3] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[2] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[12] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 3. + v0 += m[11] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[12] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[5] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[15] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[2] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[13] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[0] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[8] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[10] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[3] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[7] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[9] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[1] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[4] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[6] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[14] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 4. + v0 += m[7] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[3] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[13] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[11] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[12] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[14] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[1] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[9] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[2] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[5] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[4] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[15] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[0] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[8] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[10] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[6] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 5. + v0 += m[9] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[5] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[2] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[10] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[4] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[15] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[7] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[0] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[14] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[11] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[6] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[3] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[8] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[13] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[12] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[1] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 6. + v0 += m[2] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[6] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[0] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[8] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[11] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[3] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[10] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[12] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[4] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[7] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[15] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[1] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[14] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[9] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[5] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[13] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 7. + v0 += m[12] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[1] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[14] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[4] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[13] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[10] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[15] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[5] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[0] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[6] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[9] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[8] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[2] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[11] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[3] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[7] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 8. + v0 += m[13] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[7] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[12] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[3] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[1] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[9] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[14] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[11] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[5] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[15] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[8] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[2] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[6] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[10] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[4] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[0] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 9. + v0 += m[6] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[14] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[11] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[0] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[3] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[8] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[9] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[15] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[12] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[13] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[1] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[10] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[4] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[5] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[7] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[2] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 10. + v0 += m[10] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[8] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[7] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[1] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[6] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[5] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[4] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[2] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[15] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[9] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[3] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[13] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[12] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[0] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[14] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[11] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 11. + v0 += m[0] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[2] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[4] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[6] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[5] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[7] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[3] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[1] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[8] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[10] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[12] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[14] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[13] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[15] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[11] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[9] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + // Round 12. + v0 += m[14] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[4] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[9] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[13] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + v2 += m[15] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[6] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + v1 += m[8] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v0 += m[10] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v0 += m[1] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[0] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[11] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[5] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + v2 += m[7] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[3] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + v1 += m[2] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v0 += m[12] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + + h0 ^= v0 ^ v8 + h1 ^= v1 ^ v9 + h2 ^= v2 ^ v10 + h3 ^= v3 ^ v11 + h4 ^= v4 ^ v12 + h5 ^= v5 ^ v13 + h6 ^= v6 ^ v14 + h7 ^= v7 ^ v15 + + p = p[BlockSize:] + } + d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = h0, h1, h2, h3, h4, h5, h6, h7 +} diff --git a/vendor/github.com/minio/blake2b-simd/compress_noasm.go b/vendor/github.com/minio/blake2b-simd/compress_noasm.go new file mode 100644 index 000000000..d3c675847 --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/compress_noasm.go @@ -0,0 +1,23 @@ +//+build !amd64 noasm appengine + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package blake2b + +func compress(d *digest, p []uint8) { + compressGeneric(d, p) +} diff --git a/vendor/github.com/minio/blake2b-simd/cpuid.go b/vendor/github.com/minio/blake2b-simd/cpuid.go new file mode 100644 index 000000000..a9f95508e --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/cpuid.go @@ -0,0 +1,60 @@ +// +build 386,!gccgo amd64,!gccgo + +// Copyright 2016 Frank Wessels +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package blake2b + +func cpuid(op uint32) (eax, ebx, ecx, edx uint32) +func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +func xgetbv(index uint32) (eax, edx uint32) + +// True when SIMD instructions are available. +var avx2 = haveAVX2() +var avx = haveAVX() +var ssse3 = haveSSSE3() + +// haveAVX returns true when there is AVX support +func haveAVX() bool { + _, _, c, _ := cpuid(1) + + // Check XGETBV, OXSAVE and AVX bits + if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { + // Check for OS support + eax, _ := xgetbv(0) + return (eax & 0x6) == 0x6 + } + return false +} + +// haveAVX2 returns true when there is AVX2 support +func haveAVX2() bool { + mfi, _, _, _ := cpuid(0) + + // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. + if mfi >= 7 && haveAVX() { + _, ebx, _, _ := cpuidex(7, 0) + return (ebx & 0x00000020) != 0 + } + return false +} + +// haveSSSE3 returns true when there is SSSE3 support +func haveSSSE3() bool { + + _, _, c, _ := cpuid(1) + + return (c & 0x00000200) != 0 +} diff --git a/vendor/github.com/minio/blake2b-simd/cpuid_386.s b/vendor/github.com/minio/blake2b-simd/cpuid_386.s new file mode 100644 index 000000000..fa38814ec --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/cpuid_386.s @@ -0,0 +1,33 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// +build 386,!gccgo + +// func cpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuid(SB), 7, $0 + XORL CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+4(FP) + MOVL BX, ebx+8(FP) + MOVL CX, ecx+12(FP) + MOVL DX, edx+16(FP) + RET + +// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·xgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+4(FP) + MOVL DX, edx+8(FP) + RET diff --git a/vendor/github.com/minio/blake2b-simd/cpuid_amd64.s b/vendor/github.com/minio/blake2b-simd/cpuid_amd64.s new file mode 100644 index 000000000..fb45a6560 --- /dev/null +++ b/vendor/github.com/minio/blake2b-simd/cpuid_amd64.s @@ -0,0 +1,34 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// +build amd64,!gccgo + +// func cpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuid(SB), 7, $0 + XORQ CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + + +// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·xgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b.go b/vendor/golang.org/x/crypto/blake2b/blake2b.go deleted file mode 100644 index fa9e48e31..000000000 --- a/vendor/golang.org/x/crypto/blake2b/blake2b.go +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package blake2b implements the BLAKE2b hash algorithm as -// defined in RFC 7693. -package blake2b - -import ( - "encoding/binary" - "errors" - "hash" -) - -const ( - // The blocksize of BLAKE2b in bytes. - BlockSize = 128 - // The hash size of BLAKE2b-512 in bytes. - Size = 64 - // The hash size of BLAKE2b-384 in bytes. - Size384 = 48 - // The hash size of BLAKE2b-256 in bytes. - Size256 = 32 -) - -var ( - useAVX2 bool - useAVX bool - useSSE4 bool -) - -var errKeySize = errors.New("blake2b: invalid key size") - -var iv = [8]uint64{ - 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, - 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, -} - -// Sum512 returns the BLAKE2b-512 checksum of the data. -func Sum512(data []byte) [Size]byte { - var sum [Size]byte - checkSum(&sum, Size, data) - return sum -} - -// Sum384 returns the BLAKE2b-384 checksum of the data. -func Sum384(data []byte) [Size384]byte { - var sum [Size]byte - var sum384 [Size384]byte - checkSum(&sum, Size384, data) - copy(sum384[:], sum[:Size384]) - return sum384 -} - -// Sum256 returns the BLAKE2b-256 checksum of the data. -func Sum256(data []byte) [Size256]byte { - var sum [Size]byte - var sum256 [Size256]byte - checkSum(&sum, Size256, data) - copy(sum256[:], sum[:Size256]) - return sum256 -} - -// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil -// key turns the hash into a MAC. The key must between zero and 64 bytes long. -func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) } - -// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil -// key turns the hash into a MAC. The key must between zero and 64 bytes long. -func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) } - -// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil -// key turns the hash into a MAC. The key must between zero and 64 bytes long. -func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) } - -func newDigest(hashSize int, key []byte) (*digest, error) { - if len(key) > Size { - return nil, errKeySize - } - d := &digest{ - size: hashSize, - keyLen: len(key), - } - copy(d.key[:], key) - d.Reset() - return d, nil -} - -func checkSum(sum *[Size]byte, hashSize int, data []byte) { - h := iv - h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24) - var c [2]uint64 - - if length := len(data); length > BlockSize { - n := length &^ (BlockSize - 1) - if length == n { - n -= BlockSize - } - hashBlocks(&h, &c, 0, data[:n]) - data = data[n:] - } - - var block [BlockSize]byte - offset := copy(block[:], data) - remaining := uint64(BlockSize - offset) - if c[0] < remaining { - c[1]-- - } - c[0] -= remaining - - hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) - - for i, v := range h[:(hashSize+7)/8] { - binary.LittleEndian.PutUint64(sum[8*i:], v) - } -} - -type digest struct { - h [8]uint64 - c [2]uint64 - size int - block [BlockSize]byte - offset int - - key [BlockSize]byte - keyLen int -} - -func (d *digest) BlockSize() int { return BlockSize } - -func (d *digest) Size() int { return d.size } - -func (d *digest) Reset() { - d.h = iv - d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24) - d.offset, d.c[0], d.c[1] = 0, 0, 0 - if d.keyLen > 0 { - d.block = d.key - d.offset = BlockSize - } -} - -func (d *digest) Write(p []byte) (n int, err error) { - n = len(p) - - if d.offset > 0 { - remaining := BlockSize - d.offset - if n <= remaining { - d.offset += copy(d.block[d.offset:], p) - return - } - copy(d.block[d.offset:], p[:remaining]) - hashBlocks(&d.h, &d.c, 0, d.block[:]) - d.offset = 0 - p = p[remaining:] - } - - if length := len(p); length > BlockSize { - nn := length &^ (BlockSize - 1) - if length == nn { - nn -= BlockSize - } - hashBlocks(&d.h, &d.c, 0, p[:nn]) - p = p[nn:] - } - - if len(p) > 0 { - d.offset += copy(d.block[:], p) - } - - return -} - -func (d *digest) Sum(b []byte) []byte { - var block [BlockSize]byte - copy(block[:], d.block[:d.offset]) - remaining := uint64(BlockSize - d.offset) - - c := d.c - if c[0] < remaining { - c[1]-- - } - c[0] -= remaining - - h := d.h - hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) - - var sum [Size]byte - for i, v := range h[:(d.size+7)/8] { - binary.LittleEndian.PutUint64(sum[8*i:], v) - } - - return append(b, sum[:d.size]...) -} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go deleted file mode 100644 index 8c41cf6c7..000000000 --- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.7,amd64,!gccgo,!appengine - -package blake2b - -func init() { - useAVX2 = supportsAVX2() - useAVX = supportsAVX() - useSSE4 = supportsSSE4() -} - -//go:noescape -func supportsSSE4() bool - -//go:noescape -func supportsAVX() bool - -//go:noescape -func supportsAVX2() bool - -//go:noescape -func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) - -//go:noescape -func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) - -//go:noescape -func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) - -func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { - if useAVX2 { - hashBlocksAVX2(h, c, flag, blocks) - } else if useAVX { - hashBlocksAVX(h, c, flag, blocks) - } else if useSSE4 { - hashBlocksSSE4(h, c, flag, blocks) - } else { - hashBlocksGeneric(h, c, flag, blocks) - } -} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s deleted file mode 100644 index 96a51d524..000000000 --- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s +++ /dev/null @@ -1,502 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.7,amd64,!gccgo,!appengine - -#include "textflag.h" - -DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 - -// unfortunately the BYTE representation of VPERMQ must be used -#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ - VPADDQ m0, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m1, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 \ // VPERMQ 0x39, Y1, Y1 - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e \ // VPERMQ 0x4e, Y2, Y2 - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 \ // VPERMQ 0x93, Y3, Y3 - VPADDQ m2, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m3, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 \ // VPERMQ 0x39, Y3, Y3 - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e \ // VPERMQ 0x4e, Y2, Y2 - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 \ // VPERMQ 0x93, Y1, Y1 - -// load msg into Y12, Y13, Y14, Y15 -#define LOAD_MSG_AVX2(src, i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15) \ - MOVQ i0*8(src), X12; \ - PINSRQ $1, i1*8(src), X12; \ - MOVQ i2*8(src), X11; \ - PINSRQ $1, i3*8(src), X11; \ - VINSERTI128 $1, X11, Y12, Y12; \ - MOVQ i4*8(src), X13; \ - PINSRQ $1, i5*8(src), X13; \ - MOVQ i6*8(src), X11; \ - PINSRQ $1, i7*8(src), X11; \ - VINSERTI128 $1, X11, Y13, Y13; \ - MOVQ i8*8(src), X14; \ - PINSRQ $1, i9*8(src), X14; \ - MOVQ i10*8(src), X11; \ - PINSRQ $1, i11*8(src), X11; \ - VINSERTI128 $1, X11, Y14, Y14; \ - MOVQ i12*8(src), X15; \ - PINSRQ $1, i13*8(src), X15; \ - MOVQ i14*8(src), X11; \ - PINSRQ $1, i15*8(src), X11; \ - VINSERTI128 $1, X11, Y15, Y15 - -// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, DX - MOVQ SP, R9 - ADDQ $31, R9 - ANDQ $~31, R9 - MOVQ R9, SP - - MOVQ CX, 16(SP) - XORQ CX, CX - MOVQ CX, 24(SP) - - VMOVDQU ·AVX2_c40<>(SB), Y4 - VMOVDQU ·AVX2_c48<>(SB), Y5 - - VMOVDQU 0(AX), Y8 - VMOVDQU 32(AX), Y9 - VMOVDQU ·AVX2_iv0<>(SB), Y6 - VMOVDQU ·AVX2_iv1<>(SB), Y7 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 - MOVQ R9, 8(SP) - -loop: - ADDQ $128, R8 - MOVQ R8, 0(SP) - CMPQ R8, $128 - JGE noinc - INCQ R9 - MOVQ R9, 8(SP) - -noinc: - VMOVDQA Y8, Y0 - VMOVDQA Y9, Y1 - VMOVDQA Y6, Y2 - VPXOR 0(SP), Y7, Y3 - - LOAD_MSG_AVX2(SI, 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15) - VMOVDQA Y12, 32(SP) - VMOVDQA Y13, 64(SP) - VMOVDQA Y14, 96(SP) - VMOVDQA Y15, 128(SP) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3) - VMOVDQA Y12, 160(SP) - VMOVDQA Y13, 192(SP) - VMOVDQA Y14, 224(SP) - VMOVDQA Y15, 256(SP) - - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2(SI, 10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - - ROUND_AVX2(32(SP), 64(SP), 96(SP), 128(SP), Y10, Y4, Y5) - ROUND_AVX2(160(SP), 192(SP), 224(SP), 256(SP), Y10, Y4, Y5) - - VPXOR Y0, Y8, Y8 - VPXOR Y1, Y9, Y9 - VPXOR Y2, Y8, Y8 - VPXOR Y3, Y9, Y9 - - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop - - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - - VMOVDQU Y8, 0(AX) - VMOVDQU Y9, 32(AX) - - MOVQ DX, SP - RET - -// unfortunately the BYTE representation of VPUNPCKLQDQ and VPUNPCKHQDQ must be used -#define VPUNPCKLQDQ_X8_X8_X10 BYTE $0xC4; BYTE $0x41; BYTE $0x39; BYTE $0x6C; BYTE $0xD0 -#define VPUNPCKHQDQ_X7_X10_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF2 -#define VPUNPCKLQDQ_X7_X7_X10 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xD7 -#define VPUNPCKHQDQ_X8_X10_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x39; BYTE $0x6D; BYTE $0xFA -#define VPUNPCKLQDQ_X3_X3_X10 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xD3 -#define VPUNPCKHQDQ_X2_X10_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD2 -#define VPUNPCKLQDQ_X9_X9_X10 BYTE $0xC4; BYTE $0x41; BYTE $0x31; BYTE $0x6C; BYTE $0xD1 -#define VPUNPCKHQDQ_X3_X10_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDA -#define VPUNPCKLQDQ_X2_X2_X10 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xD2 -#define VPUNPCKHQDQ_X3_X10_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD2 -#define VPUNPCKHQDQ_X8_X10_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x39; BYTE $0x6D; BYTE $0xDA -#define VPUNPCKHQDQ_X6_X10_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF2 -#define VPUNPCKHQDQ_X7_X10_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFA - -// shuffle X2 and X6 using the temp registers X8, X9, X10 -#define SHUFFLE_AVX() \ - VMOVDQA X4, X9; \ - VMOVDQA X5, X4; \ - VMOVDQA X9, X5; \ - VMOVDQA X6, X8; \ - VPUNPCKLQDQ_X8_X8_X10; \ - VPUNPCKHQDQ_X7_X10_X6; \ - VPUNPCKLQDQ_X7_X7_X10; \ - VPUNPCKHQDQ_X8_X10_X7; \ - VPUNPCKLQDQ_X3_X3_X10; \ - VMOVDQA X2, X9; \ - VPUNPCKHQDQ_X2_X10_X2; \ - VPUNPCKLQDQ_X9_X9_X10; \ - VPUNPCKHQDQ_X3_X10_X3; \ - -// inverse shuffle X2 and X6 using the temp registers X8, X9, X10 -#define SHUFFLE_AVX_INV() \ - VMOVDQA X4, X9; \ - VMOVDQA X5, X4; \ - VMOVDQA X9, X5; \ - VMOVDQA X2, X8; \ - VPUNPCKLQDQ_X2_X2_X10; \ - VPUNPCKHQDQ_X3_X10_X2; \ - VPUNPCKLQDQ_X3_X3_X10; \ - VPUNPCKHQDQ_X8_X10_X3; \ - VPUNPCKLQDQ_X7_X7_X10; \ - VMOVDQA X6, X9; \ - VPUNPCKHQDQ_X6_X10_X6; \ - VPUNPCKLQDQ_X9_X9_X10; \ - VPUNPCKHQDQ_X7_X10_X7; \ - -#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - VPADDQ m0, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m1, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFD $-79, v6, v6; \ - VPSHUFD $-79, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPSHUFB c40, v2, v2; \ - VPSHUFB c40, v3, v3; \ - VPADDQ m2, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m3, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFB c48, v6, v6; \ - VPSHUFB c48, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPADDQ v2, v2, t0; \ - VPSRLQ $63, v2, v2; \ - VPXOR t0, v2, v2; \ - VPADDQ v3, v3, t0; \ - VPSRLQ $63, v3, v3; \ - VPXOR t0, v3, v3 - -// unfortunately the BYTE representation of VPINSRQ must be used -#define VPINSRQ_1_R10_X8_X8 BYTE $0xC4; BYTE $0x43; BYTE $0xB9; BYTE $0x22; BYTE $0xC2; BYTE $0x01 -#define VPINSRQ_1_R11_X9_X9 BYTE $0xC4; BYTE $0x43; BYTE $0xB1; BYTE $0x22; BYTE $0xCB; BYTE $0x01 -#define VPINSRQ_1_R12_X10_X10 BYTE $0xC4; BYTE $0x43; BYTE $0xA9; BYTE $0x22; BYTE $0xD4; BYTE $0x01 -#define VPINSRQ_1_R13_X11_X11 BYTE $0xC4; BYTE $0x43; BYTE $0xA1; BYTE $0x22; BYTE $0xDD; BYTE $0x01 - -#define VPINSRQ_1_R9_X8_X8 BYTE $0xC4; BYTE $0x43; BYTE $0xB9; BYTE $0x22; BYTE $0xC1; BYTE $0x01 - -// load src into X8, X9, X10 and X11 using R10, R11, R12 and R13 for temp registers -#define LOAD_MSG_AVX(src, i0, i1, i2, i3, i4, i5, i6, i7) \ - MOVQ i0*8(src), X8; \ - MOVQ i1*8(src), R10; \ - MOVQ i2*8(src), X9; \ - MOVQ i3*8(src), R11; \ - MOVQ i4*8(src), X10; \ - MOVQ i5*8(src), R12; \ - MOVQ i6*8(src), X11; \ - MOVQ i7*8(src), R13; \ - VPINSRQ_1_R10_X8_X8; \ - VPINSRQ_1_R11_X9_X9; \ - VPINSRQ_1_R12_X10_X10; \ - VPINSRQ_1_R13_X11_X11 - -// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, BP - MOVQ SP, R9 - ADDQ $15, R9 - ANDQ $~15, R9 - MOVQ R9, SP - - MOVOU ·AVX_c40<>(SB), X13 - MOVOU ·AVX_c48<>(SB), X14 - - VMOVDQU ·AVX_iv3<>(SB), X0 - VMOVDQA X0, 0(SP) - XORQ CX, 0(SP) // 0(SP) = ·AVX_iv3 ^ (CX || 0) - - VMOVDQU 0(AX), X12 - VMOVDQU 16(AX), X15 - VMOVDQU 32(AX), X2 - VMOVDQU 48(AX), X3 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 - -loop: - ADDQ $128, R8 - CMPQ R8, $128 - JGE noinc - INCQ R9 - -noinc: - MOVQ R8, X8 - VPINSRQ_1_R9_X8_X8 - - VMOVDQA X12, X0 - VMOVDQA X15, X1 - VMOVDQU ·AVX_iv0<>(SB), X4 - VMOVDQU ·AVX_iv1<>(SB), X5 - VMOVDQU ·AVX_iv2<>(SB), X6 - - VPXOR X8, X6, X6 - VMOVDQA 0(SP), X7 - - LOAD_MSG_AVX(SI, 0, 2, 4, 6, 1, 3, 5, 7) - VMOVDQA X8, 16(SP) - VMOVDQA X9, 32(SP) - VMOVDQA X10, 48(SP) - VMOVDQA X11, 64(SP) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 8, 10, 12, 14, 9, 11, 13, 15) - VMOVDQA X8, 80(SP) - VMOVDQA X9, 96(SP) - VMOVDQA X10, 112(SP) - VMOVDQA X11, 128(SP) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 14, 4, 9, 13, 10, 8, 15, 6) - VMOVDQA X8, 144(SP) - VMOVDQA X9, 160(SP) - VMOVDQA X10, 176(SP) - VMOVDQA X11, 192(SP) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 1, 0, 11, 5, 12, 2, 7, 3) - VMOVDQA X8, 208(SP) - VMOVDQA X9, 224(SP) - VMOVDQA X10, 240(SP) - VMOVDQA X11, 256(SP) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 11, 12, 5, 15, 8, 0, 2, 13) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 2, 5, 4, 15, 6, 10, 0, 8) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 9, 5, 2, 10, 0, 7, 4, 15) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 2, 6, 0, 8, 12, 10, 11, 3) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 0, 6, 9, 8, 7, 3, 2, 11) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 5, 15, 8, 2, 0, 4, 6, 10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 6, 14, 11, 0, 15, 9, 3, 8) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 12, 13, 1, 10, 2, 7, 4, 5) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(SI, 10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX() - LOAD_MSG_AVX(SI, 15, 9, 3, 13, 11, 14, 12, 0) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14) - SHUFFLE_AVX_INV() - - VMOVDQU 32(AX), X10 - VMOVDQU 48(AX), X11 - VPXOR X0, X12, X12 - VPXOR X1, X15, X15 - VPXOR X2, X10, X10 - VPXOR X3, X11, X11 - VPXOR X4, X12, X12 - VPXOR X5, X15, X15 - VPXOR X6, X10, X2 - VPXOR X7, X11, X3 - VMOVDQU X2, 32(AX) - VMOVDQU X3, 48(AX) - - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop - - VMOVDQU X12, 0(AX) - VMOVDQU X15, 16(AX) - - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - - VZEROUPPER - - MOVQ BP, SP - RET - -// func supportsAVX2() bool -TEXT ·supportsAVX2(SB), 4, $0-1 - MOVQ runtime·support_avx2(SB), AX - MOVB AX, ret+0(FP) - RET - -// func supportsAVX() bool -TEXT ·supportsAVX(SB), 4, $0-1 - MOVQ runtime·support_avx(SB), AX - MOVB AX, ret+0(FP) - RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go deleted file mode 100644 index 2ab7c30fc..000000000 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !go1.7,amd64,!gccgo,!appengine - -package blake2b - -func init() { - useSSE4 = supportsSSE4() -} - -//go:noescape -func supportsSSE4() bool - -//go:noescape -func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) - -func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { - if useSSE4 { - hashBlocksSSE4(h, c, flag, blocks) - } else { - hashBlocksGeneric(h, c, flag, blocks) - } -} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s deleted file mode 100644 index 64530740b..000000000 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build amd64,!gccgo,!appengine - -#include "textflag.h" - -DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 - -#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v6, t1; \ - PUNPCKLQDQ v6, t2; \ - PUNPCKHQDQ v7, v6; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ v7, t2; \ - MOVO t1, v7; \ - MOVO v2, t1; \ - PUNPCKHQDQ t2, v7; \ - PUNPCKLQDQ v3, t2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v3 - -#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v2, t1; \ - PUNPCKLQDQ v2, t2; \ - PUNPCKHQDQ v3, v2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ v3, t2; \ - MOVO t1, v3; \ - MOVO v6, t1; \ - PUNPCKHQDQ t2, v3; \ - PUNPCKLQDQ v7, t2; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v7 - -#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - PADDQ m0, v0; \ - PADDQ m1, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFD $0xB1, v6, v6; \ - PSHUFD $0xB1, v7, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - PSHUFB c40, v2; \ - PSHUFB c40, v3; \ - PADDQ m2, v0; \ - PADDQ m3, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFB c48, v6; \ - PSHUFB c48, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - MOVOU v2, t0; \ - PADDQ v2, t0; \ - PSRLQ $63, v2; \ - PXOR t0, v2; \ - MOVOU v3, t0; \ - PADDQ v3, t0; \ - PSRLQ $63, v3; \ - PXOR t0, v3 - -#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ - MOVQ i0*8(src), m0; \ - PINSRQ $1, i1*8(src), m0; \ - MOVQ i2*8(src), m1; \ - PINSRQ $1, i3*8(src), m1; \ - MOVQ i4*8(src), m2; \ - PINSRQ $1, i5*8(src), m2; \ - MOVQ i6*8(src), m3; \ - PINSRQ $1, i7*8(src), m3 - -// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, BP - MOVQ SP, R9 - ADDQ $15, R9 - ANDQ $~15, R9 - MOVQ R9, SP - - MOVOU ·iv3<>(SB), X0 - MOVO X0, 0(SP) - XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0) - - MOVOU ·c40<>(SB), X13 - MOVOU ·c48<>(SB), X14 - - MOVOU 0(AX), X12 - MOVOU 16(AX), X15 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 - -loop: - ADDQ $128, R8 - CMPQ R8, $128 - JGE noinc - INCQ R9 - -noinc: - MOVQ R8, X8 - PINSRQ $1, R9, X8 - - MOVO X12, X0 - MOVO X15, X1 - MOVOU 32(AX), X2 - MOVOU 48(AX), X3 - MOVOU ·iv0<>(SB), X4 - MOVOU ·iv1<>(SB), X5 - MOVOU ·iv2<>(SB), X6 - - PXOR X8, X6 - MOVO 0(SP), X7 - - LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) - MOVO X8, 16(SP) - MOVO X9, 32(SP) - MOVO X10, 48(SP) - MOVO X11, 64(SP) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) - MOVO X8, 80(SP) - MOVO X9, 96(SP) - MOVO X10, 112(SP) - MOVO X11, 128(SP) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) - MOVO X8, 144(SP) - MOVO X9, 160(SP) - MOVO X10, 176(SP) - MOVO X11, 192(SP) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) - MOVO X8, 208(SP) - MOVO X9, 224(SP) - MOVO X10, 240(SP) - MOVO X11, 256(SP) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - MOVOU 32(AX), X10 - MOVOU 48(AX), X11 - PXOR X0, X12 - PXOR X1, X15 - PXOR X2, X10 - PXOR X3, X11 - PXOR X4, X12 - PXOR X5, X15 - PXOR X6, X10 - PXOR X7, X11 - MOVOU X10, 32(AX) - MOVOU X11, 48(AX) - - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop - - MOVOU X12, 0(AX) - MOVOU X15, 16(AX) - - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - - MOVQ BP, SP - RET - -// func supportsSSE4() bool -TEXT ·supportsSSE4(SB), 4, $0-1 - MOVL $1, AX - CPUID - SHRL $19, CX // Bit 19 indicates SSE4 support - ANDL $1, CX // CX != 0 if support SSE4 - MOVB CX, ret+0(FP) - RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go deleted file mode 100644 index 4bd2abc91..000000000 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package blake2b - -import "encoding/binary" - -// the precomputed values for BLAKE2b -// there are 12 16-byte arrays - one for each round -// the entries are calculated from the sigma constants. -var precomputed = [12][16]byte{ - {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, - {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, - {11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4}, - {7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8}, - {9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13}, - {2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9}, - {12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11}, - {13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10}, - {6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5}, - {10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0}, - {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first - {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second -} - -func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { - var m [16]uint64 - c0, c1 := c[0], c[1] - - for i := 0; i < len(blocks); { - c0 += BlockSize - if c0 < BlockSize { - c1++ - } - - v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] - v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7] - v12 ^= c0 - v13 ^= c1 - v14 ^= flag - - for j := range m { - m[j] = binary.LittleEndian.Uint64(blocks[i:]) - i += 8 - } - - for j := range precomputed { - s := &(precomputed[j]) - - v0 += m[s[0]] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[s[1]] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[s[2]] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[s[3]] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - - v0 += m[s[4]] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v1 += m[s[5]] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v2 += m[s[6]] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[s[7]] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - - v0 += m[s[8]] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[s[9]] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[s[10]] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[s[11]] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - - v0 += m[s[12]] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - v1 += m[s[13]] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v2 += m[s[14]] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[s[15]] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - - } - - h[0] ^= v0 ^ v8 - h[1] ^= v1 ^ v9 - h[2] ^= v2 ^ v10 - h[3] ^= v3 ^ v11 - h[4] ^= v4 ^ v12 - h[5] ^= v5 ^ v13 - h[6] ^= v6 ^ v14 - h[7] ^= v7 ^ v15 - } - c[0], c[1] = c0, c1 -} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go deleted file mode 100644 index da156a1ba..000000000 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64 appengine gccgo - -package blake2b - -func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { - hashBlocksGeneric(h, c, flag, blocks) -} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_test.go b/vendor/golang.org/x/crypto/blake2b/blake2b_test.go deleted file mode 100644 index a38fceb20..000000000 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_test.go +++ /dev/null @@ -1,448 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package blake2b - -import ( - "bytes" - "encoding/hex" - "fmt" - "hash" - "testing" -) - -func fromHex(s string) []byte { - b, err := hex.DecodeString(s) - if err != nil { - panic(err) - } - return b -} - -func TestHashes(t *testing.T) { - defer func(sse4, avx, avx2 bool) { - useSSE4, useAVX, useAVX2 = sse4, useAVX, avx2 - }(useSSE4, useAVX, useAVX2) - - if useAVX2 { - t.Log("AVX2 version") - testHashes(t) - useAVX2 = false - } - if useAVX { - t.Log("AVX version") - testHashes(t) - useAVX = false - } - if useSSE4 { - t.Log("SSE4 version") - testHashes(t) - useSSE4 = false - } - t.Log("generic version") - testHashes(t) -} - -func testHashes(t *testing.T) { - key, _ := hex.DecodeString("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f") - - input := make([]byte, 255) - for i := range input { - input[i] = byte(i) - } - - for i, expectedHex := range hashes { - h, err := New512(key) - if err != nil { - t.Fatalf("#%d: error from New512: %v", i, err) - } - - h.Write(input[:i]) - sum := h.Sum(nil) - - if gotHex := fmt.Sprintf("%x", sum); gotHex != expectedHex { - t.Fatalf("#%d (single write): got %s, wanted %s", i, gotHex, expectedHex) - } - - h.Reset() - for j := 0; j < i; j++ { - h.Write(input[j : j+1]) - } - - sum = h.Sum(sum[:0]) - if gotHex := fmt.Sprintf("%x", sum); gotHex != expectedHex { - t.Fatalf("#%d (byte-by-byte): got %s, wanted %s", i, gotHex, expectedHex) - } - } -} - -func generateSequence(out []byte, seed uint32) { - a := 0xDEAD4BAD * seed // prime - b := uint32(1) - - for i := range out { // fill the buf - a, b = b, a+b - out[i] = byte(b >> 24) - } -} - -func computeMAC(msg []byte, hashSize int, key []byte) (sum []byte) { - var h hash.Hash - switch hashSize { - case Size: - h, _ = New512(key) - case Size384: - h, _ = New384(key) - case Size256: - h, _ = New256(key) - case 20: - h, _ = newDigest(20, key) - default: - panic("unexpected hashSize") - } - - h.Write(msg) - return h.Sum(sum) -} - -func computeHash(msg []byte, hashSize int) (sum []byte) { - switch hashSize { - case Size: - hash := Sum512(msg) - return hash[:] - case Size384: - hash := Sum384(msg) - return hash[:] - case Size256: - hash := Sum256(msg) - return hash[:] - case 20: - var hash [64]byte - checkSum(&hash, 20, msg) - return hash[:20] - default: - panic("unexpected hashSize") - } -} - -// Test function from RFC 7693. -func TestSelfTest(t *testing.T) { - hashLens := [4]int{20, 32, 48, 64} - msgLens := [6]int{0, 3, 128, 129, 255, 1024} - - msg := make([]byte, 1024) - key := make([]byte, 64) - - h, _ := New256(nil) - for _, hashSize := range hashLens { - for _, msgLength := range msgLens { - generateSequence(msg[:msgLength], uint32(msgLength)) // unkeyed hash - - md := computeHash(msg[:msgLength], hashSize) - h.Write(md) - - generateSequence(key[:], uint32(hashSize)) // keyed hash - md = computeMAC(msg[:msgLength], hashSize, key[:hashSize]) - h.Write(md) - } - } - - sum := h.Sum(nil) - expected := [32]byte{ - 0xc2, 0x3a, 0x78, 0x00, 0xd9, 0x81, 0x23, 0xbd, - 0x10, 0xf5, 0x06, 0xc6, 0x1e, 0x29, 0xda, 0x56, - 0x03, 0xd7, 0x63, 0xb8, 0xbb, 0xad, 0x2e, 0x73, - 0x7f, 0x5e, 0x76, 0x5a, 0x7b, 0xcc, 0xd4, 0x75, - } - if !bytes.Equal(sum, expected[:]) { - t.Fatalf("got %x, wanted %x", sum, expected) - } -} - -// Benchmarks - -func benchmarkSum(b *testing.B, size int) { - data := make([]byte, size) - b.SetBytes(int64(size)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - Sum512(data) - } -} - -func benchmarkWrite(b *testing.B, size int) { - data := make([]byte, size) - h, _ := New512(nil) - b.SetBytes(int64(size)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - h.Write(data) - } -} - -func BenchmarkWrite128(b *testing.B) { benchmarkWrite(b, 128) } -func BenchmarkWrite1K(b *testing.B) { benchmarkWrite(b, 1024) } - -func BenchmarkSum128(b *testing.B) { benchmarkSum(b, 128) } -func BenchmarkSum1K(b *testing.B) { benchmarkSum(b, 1024) } - -// These values were taken from https://blake2.net/blake2b-test.txt. -var hashes = []string{ - "10ebb67700b1868efb4417987acf4690ae9d972fb7a590c2f02871799aaa4786b5e996e8f0f4eb981fc214b005f42d2ff4233499391653df7aefcbc13fc51568", - "961f6dd1e4dd30f63901690c512e78e4b45e4742ed197c3c5e45c549fd25f2e4187b0bc9fe30492b16b0d0bc4ef9b0f34c7003fac09a5ef1532e69430234cebd", - "da2cfbe2d8409a0f38026113884f84b50156371ae304c4430173d08a99d9fb1b983164a3770706d537f49e0c916d9f32b95cc37a95b99d857436f0232c88a965", - "33d0825dddf7ada99b0e7e307104ad07ca9cfd9692214f1561356315e784f3e5a17e364ae9dbb14cb2036df932b77f4b292761365fb328de7afdc6d8998f5fc1", - "beaa5a3d08f3807143cf621d95cd690514d0b49efff9c91d24b59241ec0eefa5f60196d407048bba8d2146828ebcb0488d8842fd56bb4f6df8e19c4b4daab8ac", - "098084b51fd13deae5f4320de94a688ee07baea2800486689a8636117b46c1f4c1f6af7f74ae7c857600456a58a3af251dc4723a64cc7c0a5ab6d9cac91c20bb", - "6044540d560853eb1c57df0077dd381094781cdb9073e5b1b3d3f6c7829e12066bbaca96d989a690de72ca3133a83652ba284a6d62942b271ffa2620c9e75b1f", - "7a8cfe9b90f75f7ecb3acc053aaed6193112b6f6a4aeeb3f65d3de541942deb9e2228152a3c4bbbe72fc3b12629528cfbb09fe630f0474339f54abf453e2ed52", - "380beaf6ea7cc9365e270ef0e6f3a64fb902acae51dd5512f84259ad2c91f4bc4108db73192a5bbfb0cbcf71e46c3e21aee1c5e860dc96e8eb0b7b8426e6abe9", - "60fe3c4535e1b59d9a61ea8500bfac41a69dffb1ceadd9aca323e9a625b64da5763bad7226da02b9c8c4f1a5de140ac5a6c1124e4f718ce0b28ea47393aa6637", - "4fe181f54ad63a2983feaaf77d1e7235c2beb17fa328b6d9505bda327df19fc37f02c4b6f0368ce23147313a8e5738b5fa2a95b29de1c7f8264eb77b69f585cd", - "f228773ce3f3a42b5f144d63237a72d99693adb8837d0e112a8a0f8ffff2c362857ac49c11ec740d1500749dac9b1f4548108bf3155794dcc9e4082849e2b85b", - "962452a8455cc56c8511317e3b1f3b2c37df75f588e94325fdd77070359cf63a9ae6e930936fdf8e1e08ffca440cfb72c28f06d89a2151d1c46cd5b268ef8563", - "43d44bfa18768c59896bf7ed1765cb2d14af8c260266039099b25a603e4ddc5039d6ef3a91847d1088d401c0c7e847781a8a590d33a3c6cb4df0fab1c2f22355", - "dcffa9d58c2a4ca2cdbb0c7aa4c4c1d45165190089f4e983bb1c2cab4aaeff1fa2b5ee516fecd780540240bf37e56c8bcca7fab980e1e61c9400d8a9a5b14ac6", - "6fbf31b45ab0c0b8dad1c0f5f4061379912dde5aa922099a030b725c73346c524291adef89d2f6fd8dfcda6d07dad811a9314536c2915ed45da34947e83de34e", - "a0c65bddde8adef57282b04b11e7bc8aab105b99231b750c021f4a735cb1bcfab87553bba3abb0c3e64a0b6955285185a0bd35fb8cfde557329bebb1f629ee93", - "f99d815550558e81eca2f96718aed10d86f3f1cfb675cce06b0eff02f617c5a42c5aa760270f2679da2677c5aeb94f1142277f21c7f79f3c4f0cce4ed8ee62b1", - "95391da8fc7b917a2044b3d6f5374e1ca072b41454d572c7356c05fd4bc1e0f40b8bb8b4a9f6bce9be2c4623c399b0dca0dab05cb7281b71a21b0ebcd9e55670", - "04b9cd3d20d221c09ac86913d3dc63041989a9a1e694f1e639a3ba7e451840f750c2fc191d56ad61f2e7936bc0ac8e094b60caeed878c18799045402d61ceaf9", - "ec0e0ef707e4ed6c0c66f9e089e4954b058030d2dd86398fe84059631f9ee591d9d77375355149178c0cf8f8e7c49ed2a5e4f95488a2247067c208510fadc44c", - "9a37cce273b79c09913677510eaf7688e89b3314d3532fd2764c39de022a2945b5710d13517af8ddc0316624e73bec1ce67df15228302036f330ab0cb4d218dd", - "4cf9bb8fb3d4de8b38b2f262d3c40f46dfe747e8fc0a414c193d9fcf753106ce47a18f172f12e8a2f1c26726545358e5ee28c9e2213a8787aafbc516d2343152", - "64e0c63af9c808fd893137129867fd91939d53f2af04be4fa268006100069b2d69daa5c5d8ed7fddcb2a70eeecdf2b105dd46a1e3b7311728f639ab489326bc9", - "5e9c93158d659b2def06b0c3c7565045542662d6eee8a96a89b78ade09fe8b3dcc096d4fe48815d88d8f82620156602af541955e1f6ca30dce14e254c326b88f", - "7775dff889458dd11aef417276853e21335eb88e4dec9cfb4e9edb49820088551a2ca60339f12066101169f0dfe84b098fddb148d9da6b3d613df263889ad64b", - "f0d2805afbb91f743951351a6d024f9353a23c7ce1fc2b051b3a8b968c233f46f50f806ecb1568ffaa0b60661e334b21dde04f8fa155ac740eeb42e20b60d764", - "86a2af316e7d7754201b942e275364ac12ea8962ab5bd8d7fb276dc5fbffc8f9a28cae4e4867df6780d9b72524160927c855da5b6078e0b554aa91e31cb9ca1d", - "10bdf0caa0802705e706369baf8a3f79d72c0a03a80675a7bbb00be3a45e516424d1ee88efb56f6d5777545ae6e27765c3a8f5e493fc308915638933a1dfee55", - "b01781092b1748459e2e4ec178696627bf4ebafebba774ecf018b79a68aeb84917bf0b84bb79d17b743151144cd66b7b33a4b9e52c76c4e112050ff5385b7f0b", - "c6dbc61dec6eaeac81e3d5f755203c8e220551534a0b2fd105a91889945a638550204f44093dd998c076205dffad703a0e5cd3c7f438a7e634cd59fededb539e", - "eba51acffb4cea31db4b8d87e9bf7dd48fe97b0253ae67aa580f9ac4a9d941f2bea518ee286818cc9f633f2a3b9fb68e594b48cdd6d515bf1d52ba6c85a203a7", - "86221f3ada52037b72224f105d7999231c5e5534d03da9d9c0a12acb68460cd375daf8e24386286f9668f72326dbf99ba094392437d398e95bb8161d717f8991", - "5595e05c13a7ec4dc8f41fb70cb50a71bce17c024ff6de7af618d0cc4e9c32d9570d6d3ea45b86525491030c0d8f2b1836d5778c1ce735c17707df364d054347", - "ce0f4f6aca89590a37fe034dd74dd5fa65eb1cbd0a41508aaddc09351a3cea6d18cb2189c54b700c009f4cbf0521c7ea01be61c5ae09cb54f27bc1b44d658c82", - "7ee80b06a215a3bca970c77cda8761822bc103d44fa4b33f4d07dcb997e36d55298bceae12241b3fa07fa63be5576068da387b8d5859aeab701369848b176d42", - "940a84b6a84d109aab208c024c6ce9647676ba0aaa11f86dbb7018f9fd2220a6d901a9027f9abcf935372727cbf09ebd61a2a2eeb87653e8ecad1bab85dc8327", - "2020b78264a82d9f4151141adba8d44bf20c5ec062eee9b595a11f9e84901bf148f298e0c9f8777dcdbc7cc4670aac356cc2ad8ccb1629f16f6a76bcefbee760", - "d1b897b0e075ba68ab572adf9d9c436663e43eb3d8e62d92fc49c9be214e6f27873fe215a65170e6bea902408a25b49506f47babd07cecf7113ec10c5dd31252", - "b14d0c62abfa469a357177e594c10c194243ed2025ab8aa5ad2fa41ad318e0ff48cd5e60bec07b13634a711d2326e488a985f31e31153399e73088efc86a5c55", - "4169c5cc808d2697dc2a82430dc23e3cd356dc70a94566810502b8d655b39abf9e7f902fe717e0389219859e1945df1af6ada42e4ccda55a197b7100a30c30a1", - "258a4edb113d66c839c8b1c91f15f35ade609f11cd7f8681a4045b9fef7b0b24c82cda06a5f2067b368825e3914e53d6948ede92efd6e8387fa2e537239b5bee", - "79d2d8696d30f30fb34657761171a11e6c3f1e64cbe7bebee159cb95bfaf812b4f411e2f26d9c421dc2c284a3342d823ec293849e42d1e46b0a4ac1e3c86abaa", - "8b9436010dc5dee992ae38aea97f2cd63b946d94fedd2ec9671dcde3bd4ce9564d555c66c15bb2b900df72edb6b891ebcadfeff63c9ea4036a998be7973981e7", - "c8f68e696ed28242bf997f5b3b34959508e42d613810f1e2a435c96ed2ff560c7022f361a9234b9837feee90bf47922ee0fd5f8ddf823718d86d1e16c6090071", - "b02d3eee4860d5868b2c39ce39bfe81011290564dd678c85e8783f29302dfc1399ba95b6b53cd9ebbf400cca1db0ab67e19a325f2d115812d25d00978ad1bca4", - "7693ea73af3ac4dad21ca0d8da85b3118a7d1c6024cfaf557699868217bc0c2f44a199bc6c0edd519798ba05bd5b1b4484346a47c2cadf6bf30b785cc88b2baf", - "a0e5c1c0031c02e48b7f09a5e896ee9aef2f17fc9e18e997d7f6cac7ae316422c2b1e77984e5f3a73cb45deed5d3f84600105e6ee38f2d090c7d0442ea34c46d", - "41daa6adcfdb69f1440c37b596440165c15ada596813e2e22f060fcd551f24dee8e04ba6890387886ceec4a7a0d7fc6b44506392ec3822c0d8c1acfc7d5aebe8", - "14d4d40d5984d84c5cf7523b7798b254e275a3a8cc0a1bd06ebc0bee726856acc3cbf516ff667cda2058ad5c3412254460a82c92187041363cc77a4dc215e487", - "d0e7a1e2b9a447fee83e2277e9ff8010c2f375ae12fa7aaa8ca5a6317868a26a367a0b69fbc1cf32a55d34eb370663016f3d2110230eba754028a56f54acf57c", - "e771aa8db5a3e043e8178f39a0857ba04a3f18e4aa05743cf8d222b0b095825350ba422f63382a23d92e4149074e816a36c1cd28284d146267940b31f8818ea2", - "feb4fd6f9e87a56bef398b3284d2bda5b5b0e166583a66b61e538457ff0584872c21a32962b9928ffab58de4af2edd4e15d8b35570523207ff4e2a5aa7754caa", - "462f17bf005fb1c1b9e671779f665209ec2873e3e411f98dabf240a1d5ec3f95ce6796b6fc23fe171903b502023467dec7273ff74879b92967a2a43a5a183d33", - "d3338193b64553dbd38d144bea71c5915bb110e2d88180dbc5db364fd6171df317fc7268831b5aef75e4342b2fad8797ba39eddcef80e6ec08159350b1ad696d", - "e1590d585a3d39f7cb599abd479070966409a6846d4377acf4471d065d5db94129cc9be92573b05ed226be1e9b7cb0cabe87918589f80dadd4ef5ef25a93d28e", - "f8f3726ac5a26cc80132493a6fedcb0e60760c09cfc84cad178175986819665e76842d7b9fedf76dddebf5d3f56faaad4477587af21606d396ae570d8e719af2", - "30186055c07949948183c850e9a756cc09937e247d9d928e869e20bafc3cd9721719d34e04a0899b92c736084550186886efba2e790d8be6ebf040b209c439a4", - "f3c4276cb863637712c241c444c5cc1e3554e0fddb174d035819dd83eb700b4ce88df3ab3841ba02085e1a99b4e17310c5341075c0458ba376c95a6818fbb3e2", - "0aa007c4dd9d5832393040a1583c930bca7dc5e77ea53add7e2b3f7c8e231368043520d4a3ef53c969b6bbfd025946f632bd7f765d53c21003b8f983f75e2a6a", - "08e9464720533b23a04ec24f7ae8c103145f765387d738777d3d343477fd1c58db052142cab754ea674378e18766c53542f71970171cc4f81694246b717d7564", - "d37ff7ad297993e7ec21e0f1b4b5ae719cdc83c5db687527f27516cbffa822888a6810ee5c1ca7bfe3321119be1ab7bfa0a502671c8329494df7ad6f522d440f", - "dd9042f6e464dcf86b1262f6accfafbd8cfd902ed3ed89abf78ffa482dbdeeb6969842394c9a1168ae3d481a017842f660002d42447c6b22f7b72f21aae021c9", - "bd965bf31e87d70327536f2a341cebc4768eca275fa05ef98f7f1b71a0351298de006fba73fe6733ed01d75801b4a928e54231b38e38c562b2e33ea1284992fa", - "65676d800617972fbd87e4b9514e1c67402b7a331096d3bfac22f1abb95374abc942f16e9ab0ead33b87c91968a6e509e119ff07787b3ef483e1dcdccf6e3022", - "939fa189699c5d2c81ddd1ffc1fa207c970b6a3685bb29ce1d3e99d42f2f7442da53e95a72907314f4588399a3ff5b0a92beb3f6be2694f9f86ecf2952d5b41c", - "c516541701863f91005f314108ceece3c643e04fc8c42fd2ff556220e616aaa6a48aeb97a84bad74782e8dff96a1a2fa949339d722edcaa32b57067041df88cc", - "987fd6e0d6857c553eaebb3d34970a2c2f6e89a3548f492521722b80a1c21a153892346d2cba6444212d56da9a26e324dccbc0dcde85d4d2ee4399eec5a64e8f", - "ae56deb1c2328d9c4017706bce6e99d41349053ba9d336d677c4c27d9fd50ae6aee17e853154e1f4fe7672346da2eaa31eea53fcf24a22804f11d03da6abfc2b", - "49d6a608c9bde4491870498572ac31aac3fa40938b38a7818f72383eb040ad39532bc06571e13d767e6945ab77c0bdc3b0284253343f9f6c1244ebf2ff0df866", - "da582ad8c5370b4469af862aa6467a2293b2b28bd80ae0e91f425ad3d47249fdf98825cc86f14028c3308c9804c78bfeeeee461444ce243687e1a50522456a1d", - "d5266aa3331194aef852eed86d7b5b2633a0af1c735906f2e13279f14931a9fc3b0eac5ce9245273bd1aa92905abe16278ef7efd47694789a7283b77da3c70f8", - "2962734c28252186a9a1111c732ad4de4506d4b4480916303eb7991d659ccda07a9911914bc75c418ab7a4541757ad054796e26797feaf36e9f6ad43f14b35a4", - "e8b79ec5d06e111bdfafd71e9f5760f00ac8ac5d8bf768f9ff6f08b8f026096b1cc3a4c973333019f1e3553e77da3f98cb9f542e0a90e5f8a940cc58e59844b3", - "dfb320c44f9d41d1efdcc015f08dd5539e526e39c87d509ae6812a969e5431bf4fa7d91ffd03b981e0d544cf72d7b1c0374f8801482e6dea2ef903877eba675e", - "d88675118fdb55a5fb365ac2af1d217bf526ce1ee9c94b2f0090b2c58a06ca58187d7fe57c7bed9d26fca067b4110eefcd9a0a345de872abe20de368001b0745", - "b893f2fc41f7b0dd6e2f6aa2e0370c0cff7df09e3acfcc0e920b6e6fad0ef747c40668417d342b80d2351e8c175f20897a062e9765e6c67b539b6ba8b9170545", - "6c67ec5697accd235c59b486d7b70baeedcbd4aa64ebd4eef3c7eac189561a726250aec4d48cadcafbbe2ce3c16ce2d691a8cce06e8879556d4483ed7165c063", - "f1aa2b044f8f0c638a3f362e677b5d891d6fd2ab0765f6ee1e4987de057ead357883d9b405b9d609eea1b869d97fb16d9b51017c553f3b93c0a1e0f1296fedcd", - "cbaa259572d4aebfc1917acddc582b9f8dfaa928a198ca7acd0f2aa76a134a90252e6298a65b08186a350d5b7626699f8cb721a3ea5921b753ae3a2dce24ba3a", - "fa1549c9796cd4d303dcf452c1fbd5744fd9b9b47003d920b92de34839d07ef2a29ded68f6fc9e6c45e071a2e48bd50c5084e96b657dd0404045a1ddefe282ed", - "5cf2ac897ab444dcb5c8d87c495dbdb34e1838b6b629427caa51702ad0f9688525f13bec503a3c3a2c80a65e0b5715e8afab00ffa56ec455a49a1ad30aa24fcd", - "9aaf80207bace17bb7ab145757d5696bde32406ef22b44292ef65d4519c3bb2ad41a59b62cc3e94b6fa96d32a7faadae28af7d35097219aa3fd8cda31e40c275", - "af88b163402c86745cb650c2988fb95211b94b03ef290eed9662034241fd51cf398f8073e369354c43eae1052f9b63b08191caa138aa54fea889cc7024236897", - "48fa7d64e1ceee27b9864db5ada4b53d00c9bc7626555813d3cd6730ab3cc06ff342d727905e33171bde6e8476e77fb1720861e94b73a2c538d254746285f430", - "0e6fd97a85e904f87bfe85bbeb34f69e1f18105cf4ed4f87aec36c6e8b5f68bd2a6f3dc8a9ecb2b61db4eedb6b2ea10bf9cb0251fb0f8b344abf7f366b6de5ab", - "06622da5787176287fdc8fed440bad187d830099c94e6d04c8e9c954cda70c8bb9e1fc4a6d0baa831b9b78ef6648681a4867a11da93ee36e5e6a37d87fc63f6f", - "1da6772b58fabf9c61f68d412c82f182c0236d7d575ef0b58dd22458d643cd1dfc93b03871c316d8430d312995d4197f0874c99172ba004a01ee295abac24e46", - "3cd2d9320b7b1d5fb9aab951a76023fa667be14a9124e394513918a3f44096ae4904ba0ffc150b63bc7ab1eeb9a6e257e5c8f000a70394a5afd842715de15f29", - "04cdc14f7434e0b4be70cb41db4c779a88eaef6accebcb41f2d42fffe7f32a8e281b5c103a27021d0d08362250753cdf70292195a53a48728ceb5844c2d98bab", - "9071b7a8a075d0095b8fb3ae5113785735ab98e2b52faf91d5b89e44aac5b5d4ebbf91223b0ff4c71905da55342e64655d6ef8c89a4768c3f93a6dc0366b5bc8", - "ebb30240dd96c7bc8d0abe49aa4edcbb4afdc51ff9aaf720d3f9e7fbb0f9c6d6571350501769fc4ebd0b2141247ff400d4fd4be414edf37757bb90a32ac5c65a", - "8532c58bf3c8015d9d1cbe00eef1f5082f8f3632fbe9f1ed4f9dfb1fa79e8283066d77c44c4af943d76b300364aecbd0648c8a8939bd204123f4b56260422dec", - "fe9846d64f7c7708696f840e2d76cb4408b6595c2f81ec6a28a7f2f20cb88cfe6ac0b9e9b8244f08bd7095c350c1d0842f64fb01bb7f532dfcd47371b0aeeb79", - "28f17ea6fb6c42092dc264257e29746321fb5bdaea9873c2a7fa9d8f53818e899e161bc77dfe8090afd82bf2266c5c1bc930a8d1547624439e662ef695f26f24", - "ec6b7d7f030d4850acae3cb615c21dd25206d63e84d1db8d957370737ba0e98467ea0ce274c66199901eaec18a08525715f53bfdb0aacb613d342ebdceeddc3b", - "b403d3691c03b0d3418df327d5860d34bbfcc4519bfbce36bf33b208385fadb9186bc78a76c489d89fd57e7dc75412d23bcd1dae8470ce9274754bb8585b13c5", - "31fc79738b8772b3f55cd8178813b3b52d0db5a419d30ba9495c4b9da0219fac6df8e7c23a811551a62b827f256ecdb8124ac8a6792ccfecc3b3012722e94463", - "bb2039ec287091bcc9642fc90049e73732e02e577e2862b32216ae9bedcd730c4c284ef3968c368b7d37584f97bd4b4dc6ef6127acfe2e6ae2509124e66c8af4", - "f53d68d13f45edfcb9bd415e2831e938350d5380d3432278fc1c0c381fcb7c65c82dafe051d8c8b0d44e0974a0e59ec7bf7ed0459f86e96f329fc79752510fd3", - "8d568c7984f0ecdf7640fbc483b5d8c9f86634f6f43291841b309a350ab9c1137d24066b09da9944bac54d5bb6580d836047aac74ab724b887ebf93d4b32eca9", - "c0b65ce5a96ff774c456cac3b5f2c4cd359b4ff53ef93a3da0778be4900d1e8da1601e769e8f1b02d2a2f8c5b9fa10b44f1c186985468feeb008730283a6657d", - "4900bba6f5fb103ece8ec96ada13a5c3c85488e05551da6b6b33d988e611ec0fe2e3c2aa48ea6ae8986a3a231b223c5d27cec2eadde91ce07981ee652862d1e4", - "c7f5c37c7285f927f76443414d4357ff789647d7a005a5a787e03c346b57f49f21b64fa9cf4b7e45573e23049017567121a9c3d4b2b73ec5e9413577525db45a", - "ec7096330736fdb2d64b5653e7475da746c23a4613a82687a28062d3236364284ac01720ffb406cfe265c0df626a188c9e5963ace5d3d5bb363e32c38c2190a6", - "82e744c75f4649ec52b80771a77d475a3bc091989556960e276a5f9ead92a03f718742cdcfeaee5cb85c44af198adc43a4a428f5f0c2ddb0be36059f06d7df73", - "2834b7a7170f1f5b68559ab78c1050ec21c919740b784a9072f6e5d69f828d70c919c5039fb148e39e2c8a52118378b064ca8d5001cd10a5478387b966715ed6", - "16b4ada883f72f853bb7ef253efcab0c3e2161687ad61543a0d2824f91c1f81347d86be709b16996e17f2dd486927b0288ad38d13063c4a9672c39397d3789b6", - "78d048f3a69d8b54ae0ed63a573ae350d89f7c6cf1f3688930de899afa037697629b314e5cd303aa62feea72a25bf42b304b6c6bcb27fae21c16d925e1fbdac3", - "0f746a48749287ada77a82961f05a4da4abdb7d77b1220f836d09ec814359c0ec0239b8c7b9ff9e02f569d1b301ef67c4612d1de4f730f81c12c40cc063c5caa", - "f0fc859d3bd195fbdc2d591e4cdac15179ec0f1dc821c11df1f0c1d26e6260aaa65b79fafacafd7d3ad61e600f250905f5878c87452897647a35b995bcadc3a3", - "2620f687e8625f6a412460b42e2cef67634208ce10a0cbd4dff7044a41b7880077e9f8dc3b8d1216d3376a21e015b58fb279b521d83f9388c7382c8505590b9b", - "227e3aed8d2cb10b918fcb04f9de3e6d0a57e08476d93759cd7b2ed54a1cbf0239c528fb04bbf288253e601d3bc38b21794afef90b17094a182cac557745e75f", - "1a929901b09c25f27d6b35be7b2f1c4745131fdebca7f3e2451926720434e0db6e74fd693ad29b777dc3355c592a361c4873b01133a57c2e3b7075cbdb86f4fc", - "5fd7968bc2fe34f220b5e3dc5af9571742d73b7d60819f2888b629072b96a9d8ab2d91b82d0a9aaba61bbd39958132fcc4257023d1eca591b3054e2dc81c8200", - "dfcce8cf32870cc6a503eadafc87fd6f78918b9b4d0737db6810be996b5497e7e5cc80e312f61e71ff3e9624436073156403f735f56b0b01845c18f6caf772e6", - "02f7ef3a9ce0fff960f67032b296efca3061f4934d690749f2d01c35c81c14f39a67fa350bc8a0359bf1724bffc3bca6d7c7bba4791fd522a3ad353c02ec5aa8", - "64be5c6aba65d594844ae78bb022e5bebe127fd6b6ffa5a13703855ab63b624dcd1a363f99203f632ec386f3ea767fc992e8ed9686586aa27555a8599d5b808f", - "f78585505c4eaa54a8b5be70a61e735e0ff97af944ddb3001e35d86c4e2199d976104b6ae31750a36a726ed285064f5981b503889fef822fcdc2898dddb7889a", - "e4b5566033869572edfd87479a5bb73c80e8759b91232879d96b1dda36c012076ee5a2ed7ae2de63ef8406a06aea82c188031b560beafb583fb3de9e57952a7e", - "e1b3e7ed867f6c9484a2a97f7715f25e25294e992e41f6a7c161ffc2adc6daaeb7113102d5e6090287fe6ad94ce5d6b739c6ca240b05c76fb73f25dd024bf935", - "85fd085fdc12a080983df07bd7012b0d402a0f4043fcb2775adf0bad174f9b08d1676e476985785c0a5dcc41dbff6d95ef4d66a3fbdc4a74b82ba52da0512b74", - "aed8fa764b0fbff821e05233d2f7b0900ec44d826f95e93c343c1bc3ba5a24374b1d616e7e7aba453a0ada5e4fab5382409e0d42ce9c2bc7fb39a99c340c20f0", - "7ba3b2e297233522eeb343bd3ebcfd835a04007735e87f0ca300cbee6d416565162171581e4020ff4cf176450f1291ea2285cb9ebffe4c56660627685145051c", - "de748bcf89ec88084721e16b85f30adb1a6134d664b5843569babc5bbd1a15ca9b61803c901a4fef32965a1749c9f3a4e243e173939dc5a8dc495c671ab52145", - "aaf4d2bdf200a919706d9842dce16c98140d34bc433df320aba9bd429e549aa7a3397652a4d768277786cf993cde2338673ed2e6b66c961fefb82cd20c93338f", - "c408218968b788bf864f0997e6bc4c3dba68b276e2125a4843296052ff93bf5767b8cdce7131f0876430c1165fec6c4f47adaa4fd8bcfacef463b5d3d0fa61a0", - "76d2d819c92bce55fa8e092ab1bf9b9eab237a25267986cacf2b8ee14d214d730dc9a5aa2d7b596e86a1fd8fa0804c77402d2fcd45083688b218b1cdfa0dcbcb", - "72065ee4dd91c2d8509fa1fc28a37c7fc9fa7d5b3f8ad3d0d7a25626b57b1b44788d4caf806290425f9890a3a2a35a905ab4b37acfd0da6e4517b2525c9651e4", - "64475dfe7600d7171bea0b394e27c9b00d8e74dd1e416a79473682ad3dfdbb706631558055cfc8a40e07bd015a4540dcdea15883cbbf31412df1de1cd4152b91", - "12cd1674a4488a5d7c2b3160d2e2c4b58371bedad793418d6f19c6ee385d70b3e06739369d4df910edb0b0a54cbff43d54544cd37ab3a06cfa0a3ddac8b66c89", - "60756966479dedc6dd4bcff8ea7d1d4ce4d4af2e7b097e32e3763518441147cc12b3c0ee6d2ecabf1198cec92e86a3616fba4f4e872f5825330adbb4c1dee444", - "a7803bcb71bc1d0f4383dde1e0612e04f872b715ad30815c2249cf34abb8b024915cb2fc9f4e7cc4c8cfd45be2d5a91eab0941c7d270e2da4ca4a9f7ac68663a", - "b84ef6a7229a34a750d9a98ee2529871816b87fbe3bc45b45fa5ae82d5141540211165c3c5d7a7476ba5a4aa06d66476f0d9dc49a3f1ee72c3acabd498967414", - "fae4b6d8efc3f8c8e64d001dabec3a21f544e82714745251b2b4b393f2f43e0da3d403c64db95a2cb6e23ebb7b9e94cdd5ddac54f07c4a61bd3cb10aa6f93b49", - "34f7286605a122369540141ded79b8957255da2d4155abbf5a8dbb89c8eb7ede8eeef1daa46dc29d751d045dc3b1d658bb64b80ff8589eddb3824b13da235a6b", - "3b3b48434be27b9eababba43bf6b35f14b30f6a88dc2e750c358470d6b3aa3c18e47db4017fa55106d8252f016371a00f5f8b070b74ba5f23cffc5511c9f09f0", - "ba289ebd6562c48c3e10a8ad6ce02e73433d1e93d7c9279d4d60a7e879ee11f441a000f48ed9f7c4ed87a45136d7dccdca482109c78a51062b3ba4044ada2469", - "022939e2386c5a37049856c850a2bb10a13dfea4212b4c732a8840a9ffa5faf54875c5448816b2785a007da8a8d2bc7d71a54e4e6571f10b600cbdb25d13ede3", - "e6fec19d89ce8717b1a087024670fe026f6c7cbda11caef959bb2d351bf856f8055d1c0ebdaaa9d1b17886fc2c562b5e99642fc064710c0d3488a02b5ed7f6fd", - "94c96f02a8f576aca32ba61c2b206f907285d9299b83ac175c209a8d43d53bfe683dd1d83e7549cb906c28f59ab7c46f8751366a28c39dd5fe2693c9019666c8", - "31a0cd215ebd2cb61de5b9edc91e6195e31c59a5648d5c9f737e125b2605708f2e325ab3381c8dce1a3e958886f1ecdc60318f882cfe20a24191352e617b0f21", - "91ab504a522dce78779f4c6c6ba2e6b6db5565c76d3e7e7c920caf7f757ef9db7c8fcf10e57f03379ea9bf75eb59895d96e149800b6aae01db778bb90afbc989", - "d85cabc6bd5b1a01a5afd8c6734740da9fd1c1acc6db29bfc8a2e5b668b028b6b3154bfb8703fa3180251d589ad38040ceb707c4bad1b5343cb426b61eaa49c1", - "d62efbec2ca9c1f8bd66ce8b3f6a898cb3f7566ba6568c618ad1feb2b65b76c3ce1dd20f7395372faf28427f61c9278049cf0140df434f5633048c86b81e0399", - "7c8fdc6175439e2c3db15bafa7fb06143a6a23bc90f449e79deef73c3d492a671715c193b6fea9f036050b946069856b897e08c00768f5ee5ddcf70b7cd6d0e0", - "58602ee7468e6bc9df21bd51b23c005f72d6cb013f0a1b48cbec5eca299299f97f09f54a9a01483eaeb315a6478bad37ba47ca1347c7c8fc9e6695592c91d723", - "27f5b79ed256b050993d793496edf4807c1d85a7b0a67c9c4fa99860750b0ae66989670a8ffd7856d7ce411599e58c4d77b232a62bef64d15275be46a68235ff", - "3957a976b9f1887bf004a8dca942c92d2b37ea52600f25e0c9bc5707d0279c00c6e85a839b0d2d8eb59c51d94788ebe62474a791cadf52cccf20f5070b6573fc", - "eaa2376d55380bf772ecca9cb0aa4668c95c707162fa86d518c8ce0ca9bf7362b9f2a0adc3ff59922df921b94567e81e452f6c1a07fc817cebe99604b3505d38", - "c1e2c78b6b2734e2480ec550434cb5d613111adcc21d475545c3b1b7e6ff12444476e5c055132e2229dc0f807044bb919b1a5662dd38a9ee65e243a3911aed1a", - "8ab48713389dd0fcf9f965d3ce66b1e559a1f8c58741d67683cd971354f452e62d0207a65e436c5d5d8f8ee71c6abfe50e669004c302b31a7ea8311d4a916051", - "24ce0addaa4c65038bd1b1c0f1452a0b128777aabc94a29df2fd6c7e2f85f8ab9ac7eff516b0e0a825c84a24cfe492eaad0a6308e46dd42fe8333ab971bb30ca", - "5154f929ee03045b6b0c0004fa778edee1d139893267cc84825ad7b36c63de32798e4a166d24686561354f63b00709a1364b3c241de3febf0754045897467cd4", - "e74e907920fd87bd5ad636dd11085e50ee70459c443e1ce5809af2bc2eba39f9e6d7128e0e3712c316da06f4705d78a4838e28121d4344a2c79c5e0db307a677", - "bf91a22334bac20f3fd80663b3cd06c4e8802f30e6b59f90d3035cc9798a217ed5a31abbda7fa6842827bdf2a7a1c21f6fcfccbb54c6c52926f32da816269be1", - "d9d5c74be5121b0bd742f26bffb8c89f89171f3f934913492b0903c271bbe2b3395ef259669bef43b57f7fcc3027db01823f6baee66e4f9fead4d6726c741fce", - "50c8b8cf34cd879f80e2faab3230b0c0e1cc3e9dcadeb1b9d97ab923415dd9a1fe38addd5c11756c67990b256e95ad6d8f9fedce10bf1c90679cde0ecf1be347", - "0a386e7cd5dd9b77a035e09fe6fee2c8ce61b5383c87ea43205059c5e4cd4f4408319bb0a82360f6a58e6c9ce3f487c446063bf813bc6ba535e17fc1826cfc91", - "1f1459cb6b61cbac5f0efe8fc487538f42548987fcd56221cfa7beb22504769e792c45adfb1d6b3d60d7b749c8a75b0bdf14e8ea721b95dca538ca6e25711209", - "e58b3836b7d8fedbb50ca5725c6571e74c0785e97821dab8b6298c10e4c079d4a6cdf22f0fedb55032925c16748115f01a105e77e00cee3d07924dc0d8f90659", - "b929cc6505f020158672deda56d0db081a2ee34c00c1100029bdf8ea98034fa4bf3e8655ec697fe36f40553c5bb46801644a627d3342f4fc92b61f03290fb381", - "72d353994b49d3e03153929a1e4d4f188ee58ab9e72ee8e512f29bc773913819ce057ddd7002c0433ee0a16114e3d156dd2c4a7e80ee53378b8670f23e33ef56", - "c70ef9bfd775d408176737a0736d68517ce1aaad7e81a93c8c1ed967ea214f56c8a377b1763e676615b60f3988241eae6eab9685a5124929d28188f29eab06f7", - "c230f0802679cb33822ef8b3b21bf7a9a28942092901d7dac3760300831026cf354c9232df3e084d9903130c601f63c1f4a4a4b8106e468cd443bbe5a734f45f", - "6f43094cafb5ebf1f7a4937ec50f56a4c9da303cbb55ac1f27f1f1976cd96beda9464f0e7b9c54620b8a9fba983164b8be3578425a024f5fe199c36356b88972", - "3745273f4c38225db2337381871a0c6aafd3af9b018c88aa02025850a5dc3a42a1a3e03e56cbf1b0876d63a441f1d2856a39b8801eb5af325201c415d65e97fe", - "c50c44cca3ec3edaae779a7e179450ebdda2f97067c690aa6c5a4ac7c30139bb27c0df4db3220e63cb110d64f37ffe078db72653e2daacf93ae3f0a2d1a7eb2e", - "8aef263e385cbc61e19b28914243262af5afe8726af3ce39a79c27028cf3ecd3f8d2dfd9cfc9ad91b58f6f20778fd5f02894a3d91c7d57d1e4b866a7f364b6be", - "28696141de6e2d9bcb3235578a66166c1448d3e905a1b482d423be4bc5369bc8c74dae0acc9cc123e1d8ddce9f97917e8c019c552da32d39d2219b9abf0fa8c8", - "2fb9eb2085830181903a9dafe3db428ee15be7662224efd643371fb25646aee716e531eca69b2bdc8233f1a8081fa43da1500302975a77f42fa592136710e9dc", - "66f9a7143f7a3314a669bf2e24bbb35014261d639f495b6c9c1f104fe8e320aca60d4550d69d52edbd5a3cdeb4014ae65b1d87aa770b69ae5c15f4330b0b0ad8", - "f4c4dd1d594c3565e3e25ca43dad82f62abea4835ed4cd811bcd975e46279828d44d4c62c3679f1b7f7b9dd4571d7b49557347b8c5460cbdc1bef690fb2a08c0", - "8f1dc9649c3a84551f8f6e91cac68242a43b1f8f328ee92280257387fa7559aa6db12e4aeadc2d26099178749c6864b357f3f83b2fb3efa8d2a8db056bed6bcc", - "3139c1a7f97afd1675d460ebbc07f2728aa150df849624511ee04b743ba0a833092f18c12dc91b4dd243f333402f59fe28abdbbbae301e7b659c7a26d5c0f979", - "06f94a2996158a819fe34c40de3cf0379fd9fb85b3e363ba3926a0e7d960e3f4c2e0c70c7ce0ccb2a64fc29869f6e7ab12bd4d3f14fce943279027e785fb5c29", - "c29c399ef3eee8961e87565c1ce263925fc3d0ce267d13e48dd9e732ee67b0f69fad56401b0f10fcaac119201046cca28c5b14abdea3212ae65562f7f138db3d", - "4cec4c9df52eef05c3f6faaa9791bc7445937183224ecc37a1e58d0132d35617531d7e795f52af7b1eb9d147de1292d345fe341823f8e6bc1e5badca5c656108", - "898bfbae93b3e18d00697eab7d9704fa36ec339d076131cefdf30edbe8d9cc81c3a80b129659b163a323bab9793d4feed92d54dae966c77529764a09be88db45", - "ee9bd0469d3aaf4f14035be48a2c3b84d9b4b1fff1d945e1f1c1d38980a951be197b25fe22c731f20aeacc930ba9c4a1f4762227617ad350fdabb4e80273a0f4", - "3d4d3113300581cd96acbf091c3d0f3c310138cd6979e6026cde623e2dd1b24d4a8638bed1073344783ad0649cc6305ccec04beb49f31c633088a99b65130267", - "95c0591ad91f921ac7be6d9ce37e0663ed8011c1cfd6d0162a5572e94368bac02024485e6a39854aa46fe38e97d6c6b1947cd272d86b06bb5b2f78b9b68d559d", - "227b79ded368153bf46c0a3ca978bfdbef31f3024a5665842468490b0ff748ae04e7832ed4c9f49de9b1706709d623e5c8c15e3caecae8d5e433430ff72f20eb", - "5d34f3952f0105eef88ae8b64c6ce95ebfade0e02c69b08762a8712d2e4911ad3f941fc4034dc9b2e479fdbcd279b902faf5d838bb2e0c6495d372b5b7029813", - "7f939bf8353abce49e77f14f3750af20b7b03902e1a1e7fb6aaf76d0259cd401a83190f15640e74f3e6c5a90e839c7821f6474757f75c7bf9002084ddc7a62dc", - "062b61a2f9a33a71d7d0a06119644c70b0716a504de7e5e1be49bd7b86e7ed6817714f9f0fc313d06129597e9a2235ec8521de36f7290a90ccfc1ffa6d0aee29", - "f29e01eeae64311eb7f1c6422f946bf7bea36379523e7b2bbaba7d1d34a22d5ea5f1c5a09d5ce1fe682cced9a4798d1a05b46cd72dff5c1b355440b2a2d476bc", - "ec38cd3bbab3ef35d7cb6d5c914298351d8a9dc97fcee051a8a02f58e3ed6184d0b7810a5615411ab1b95209c3c810114fdeb22452084e77f3f847c6dbaafe16", - "c2aef5e0ca43e82641565b8cb943aa8ba53550caef793b6532fafad94b816082f0113a3ea2f63608ab40437ecc0f0229cb8fa224dcf1c478a67d9b64162b92d1", - "15f534efff7105cd1c254d074e27d5898b89313b7d366dc2d7d87113fa7d53aae13f6dba487ad8103d5e854c91fdb6e1e74b2ef6d1431769c30767dde067a35c", - "89acbca0b169897a0a2714c2df8c95b5b79cb69390142b7d6018bb3e3076b099b79a964152a9d912b1b86412b7e372e9cecad7f25d4cbab8a317be36492a67d7", - "e3c0739190ed849c9c962fd9dbb55e207e624fcac1eb417691515499eea8d8267b7e8f1287a63633af5011fde8c4ddf55bfdf722edf88831414f2cfaed59cb9a", - "8d6cf87c08380d2d1506eee46fd4222d21d8c04e585fbfd08269c98f702833a156326a0724656400ee09351d57b440175e2a5de93cc5f80db6daf83576cf75fa", - "da24bede383666d563eeed37f6319baf20d5c75d1635a6ba5ef4cfa1ac95487e96f8c08af600aab87c986ebad49fc70a58b4890b9c876e091016daf49e1d322e", - "f9d1d1b1e87ea7ae753a029750cc1cf3d0157d41805e245c5617bb934e732f0ae3180b78e05bfe76c7c3051e3e3ac78b9b50c05142657e1e03215d6ec7bfd0fc", - "11b7bc1668032048aa43343de476395e814bbbc223678db951a1b03a021efac948cfbe215f97fe9a72a2f6bc039e3956bfa417c1a9f10d6d7ba5d3d32ff323e5", - "b8d9000e4fc2b066edb91afee8e7eb0f24e3a201db8b6793c0608581e628ed0bcc4e5aa6787992a4bcc44e288093e63ee83abd0bc3ec6d0934a674a4da13838a", - "ce325e294f9b6719d6b61278276ae06a2564c03bb0b783fafe785bdf89c7d5acd83e78756d301b445699024eaeb77b54d477336ec2a4f332f2b3f88765ddb0c3", - "29acc30e9603ae2fccf90bf97e6cc463ebe28c1b2f9b4b765e70537c25c702a29dcbfbf14c99c54345ba2b51f17b77b5f15db92bbad8fa95c471f5d070a137cc", - "3379cbaae562a87b4c0425550ffdd6bfe1203f0d666cc7ea095be407a5dfe61ee91441cd5154b3e53b4f5fb31ad4c7a9ad5c7af4ae679aa51a54003a54ca6b2d", - "3095a349d245708c7cf550118703d7302c27b60af5d4e67fc978f8a4e60953c7a04f92fcf41aee64321ccb707a895851552b1e37b00bc5e6b72fa5bcef9e3fff", - "07262d738b09321f4dbccec4bb26f48cb0f0ed246ce0b31b9a6e7bc683049f1f3e5545f28ce932dd985c5ab0f43bd6de0770560af329065ed2e49d34624c2cbb", - "b6405eca8ee3316c87061cc6ec18dba53e6c250c63ba1f3bae9e55dd3498036af08cd272aa24d713c6020d77ab2f3919af1a32f307420618ab97e73953994fb4", - "7ee682f63148ee45f6e5315da81e5c6e557c2c34641fc509c7a5701088c38a74756168e2cd8d351e88fd1a451f360a01f5b2580f9b5a2e8cfc138f3dd59a3ffc", - "1d263c179d6b268f6fa016f3a4f29e943891125ed8593c81256059f5a7b44af2dcb2030d175c00e62ecaf7ee96682aa07ab20a611024a28532b1c25b86657902", - "106d132cbdb4cd2597812846e2bc1bf732fec5f0a5f65dbb39ec4e6dc64ab2ce6d24630d0f15a805c3540025d84afa98e36703c3dbee713e72dde8465bc1be7e", - "0e79968226650667a8d862ea8da4891af56a4e3a8b6d1750e394f0dea76d640d85077bcec2cc86886e506751b4f6a5838f7f0b5fef765d9dc90dcdcbaf079f08", - "521156a82ab0c4e566e5844d5e31ad9aaf144bbd5a464fdca34dbd5717e8ff711d3ffebbfa085d67fe996a34f6d3e4e60b1396bf4b1610c263bdbb834d560816", - "1aba88befc55bc25efbce02db8b9933e46f57661baeabeb21cc2574d2a518a3cba5dc5a38e49713440b25f9c744e75f6b85c9d8f4681f676160f6105357b8406", - "5a9949fcb2c473cda968ac1b5d08566dc2d816d960f57e63b898fa701cf8ebd3f59b124d95bfbbedc5f1cf0e17d5eaed0c02c50b69d8a402cabcca4433b51fd4", - "b0cead09807c672af2eb2b0f06dde46cf5370e15a4096b1a7d7cbb36ec31c205fbefca00b7a4162fa89fb4fb3eb78d79770c23f44e7206664ce3cd931c291e5d", - "bb6664931ec97044e45b2ae420ae1c551a8874bc937d08e969399c3964ebdba8346cdd5d09caafe4c28ba7ec788191ceca65ddd6f95f18583e040d0f30d0364d", - "65bc770a5faa3792369803683e844b0be7ee96f29f6d6a35568006bd5590f9a4ef639b7a8061c7b0424b66b60ac34af3119905f33a9d8c3ae18382ca9b689900", - "ea9b4dca333336aaf839a45c6eaa48b8cb4c7ddabffea4f643d6357ea6628a480a5b45f2b052c1b07d1fedca918b6f1139d80f74c24510dcbaa4be70eacc1b06", - "e6342fb4a780ad975d0e24bce149989b91d360557e87994f6b457b895575cc02d0c15bad3ce7577f4c63927ff13f3e381ff7e72bdbe745324844a9d27e3f1c01", - "3e209c9b33e8e461178ab46b1c64b49a07fb745f1c8bc95fbfb94c6b87c69516651b264ef980937fad41238b91ddc011a5dd777c7efd4494b4b6ecd3a9c22ac0", - "fd6a3d5b1875d80486d6e69694a56dbb04a99a4d051f15db2689776ba1c4882e6d462a603b7015dc9f4b7450f05394303b8652cfb404a266962c41bae6e18a94", - "951e27517e6bad9e4195fc8671dee3e7e9be69cee1422cb9fecfce0dba875f7b310b93ee3a3d558f941f635f668ff832d2c1d033c5e2f0997e4c66f147344e02", - "8eba2f874f1ae84041903c7c4253c82292530fc8509550bfdc34c95c7e2889d5650b0ad8cb988e5c4894cb87fbfbb19612ea93ccc4c5cad17158b9763464b492", - "16f712eaa1b7c6354719a8e7dbdfaf55e4063a4d277d947550019b38dfb564830911057d50506136e2394c3b28945cc964967d54e3000c2181626cfb9b73efd2", - "c39639e7d5c7fb8cdd0fd3e6a52096039437122f21c78f1679cea9d78a734c56ecbeb28654b4f18e342c331f6f7229ec4b4bc281b2d80a6eb50043f31796c88c", - "72d081af99f8a173dcc9a0ac4eb3557405639a29084b54a40172912a2f8a395129d5536f0918e902f9e8fa6000995f4168ddc5f893011be6a0dbc9b8a1a3f5bb", - "c11aa81e5efd24d5fc27ee586cfd8847fbb0e27601ccece5ecca0198e3c7765393bb74457c7e7a27eb9170350e1fb53857177506be3e762cc0f14d8c3afe9077", - "c28f2150b452e6c0c424bcde6f8d72007f9310fed7f2f87de0dbb64f4479d6c1441ba66f44b2accee61609177ed340128b407ecec7c64bbe50d63d22d8627727", - "f63d88122877ec30b8c8b00d22e89000a966426112bd44166e2f525b769ccbe9b286d437a0129130dde1a86c43e04bedb594e671d98283afe64ce331de9828fd", - "348b0532880b88a6614a8d7408c3f913357fbb60e995c60205be9139e74998aede7f4581e42f6b52698f7fa1219708c14498067fd1e09502de83a77dd281150c", - "5133dc8bef725359dff59792d85eaf75b7e1dcd1978b01c35b1b85fcebc63388ad99a17b6346a217dc1a9622ebd122ecf6913c4d31a6b52a695b86af00d741a0", - "2753c4c0e98ecad806e88780ec27fccd0f5c1ab547f9e4bf1659d192c23aa2cc971b58b6802580baef8adc3b776ef7086b2545c2987f348ee3719cdef258c403", - "b1663573ce4b9d8caefc865012f3e39714b9898a5da6ce17c25a6a47931a9ddb9bbe98adaa553beed436e89578455416c2a52a525cf2862b8d1d49a2531b7391", - "64f58bd6bfc856f5e873b2a2956ea0eda0d6db0da39c8c7fc67c9f9feefcff3072cdf9e6ea37f69a44f0c61aa0da3693c2db5b54960c0281a088151db42b11e8", - "0764c7be28125d9065c4b98a69d60aede703547c66a12e17e1c618994132f5ef82482c1e3fe3146cc65376cc109f0138ed9a80e49f1f3c7d610d2f2432f20605", - "f748784398a2ff03ebeb07e155e66116a839741a336e32da71ec696001f0ad1b25cd48c69cfca7265eca1dd71904a0ce748ac4124f3571076dfa7116a9cf00e9", - "3f0dbc0186bceb6b785ba78d2a2a013c910be157bdaffae81bb6663b1a73722f7f1228795f3ecada87cf6ef0078474af73f31eca0cc200ed975b6893f761cb6d", - "d4762cd4599876ca75b2b8fe249944dbd27ace741fdab93616cbc6e425460feb51d4e7adcc38180e7fc47c89024a7f56191adb878dfde4ead62223f5a2610efe", - "cd36b3d5b4c91b90fcbba79513cfee1907d8645a162afd0cd4cf4192d4a5f4c892183a8eacdb2b6b6a9d9aa8c11ac1b261b380dbee24ca468f1bfd043c58eefe", - "98593452281661a53c48a9d8cd790826c1a1ce567738053d0bee4a91a3d5bd92eefdbabebe3204f2031ca5f781bda99ef5d8ae56e5b04a9e1ecd21b0eb05d3e1", - "771f57dd2775ccdab55921d3e8e30ccf484d61fe1c1b9c2ae819d0fb2a12fab9be70c4a7a138da84e8280435daade5bbe66af0836a154f817fb17f3397e725a3", - "c60897c6f828e21f16fbb5f15b323f87b6c8955eabf1d38061f707f608abdd993fac3070633e286cf8339ce295dd352df4b4b40b2f29da1dd50b3a05d079e6bb", - "8210cd2c2d3b135c2cf07fa0d1433cd771f325d075c6469d9c7f1ba0943cd4ab09808cabf4acb9ce5bb88b498929b4b847f681ad2c490d042db2aec94214b06b", - "1d4edfffd8fd80f7e4107840fa3aa31e32598491e4af7013c197a65b7f36dd3ac4b478456111cd4309d9243510782fa31b7c4c95fa951520d020eb7e5c36e4ef", - "af8e6e91fab46ce4873e1a50a8ef448cc29121f7f74deef34a71ef89cc00d9274bc6c2454bbb3230d8b2ec94c62b1dec85f3593bfa30ea6f7a44d7c09465a253", - "29fd384ed4906f2d13aa9fe7af905990938bed807f1832454a372ab412eea1f5625a1fcc9ac8343b7c67c5aba6e0b1cc4644654913692c6b39eb9187ceacd3ec", - "a268c7885d9874a51c44dffed8ea53e94f78456e0b2ed99ff5a3924760813826d960a15edbedbb5de5226ba4b074e71b05c55b9756bb79e55c02754c2c7b6c8a", - "0cf8545488d56a86817cd7ecb10f7116b7ea530a45b6ea497b6c72c997e09e3d0da8698f46bb006fc977c2cd3d1177463ac9057fdd1662c85d0c126443c10473", - "b39614268fdd8781515e2cfebf89b4d5402bab10c226e6344e6b9ae000fb0d6c79cb2f3ec80e80eaeb1980d2f8698916bd2e9f747236655116649cd3ca23a837", - "74bef092fc6f1e5dba3663a3fb003b2a5ba257496536d99f62b9d73f8f9eb3ce9ff3eec709eb883655ec9eb896b9128f2afc89cf7d1ab58a72f4a3bf034d2b4a", - "3a988d38d75611f3ef38b8774980b33e573b6c57bee0469ba5eed9b44f29945e7347967fba2c162e1c3be7f310f2f75ee2381e7bfd6b3f0baea8d95dfb1dafb1", - "58aedfce6f67ddc85a28c992f1c0bd0969f041e66f1ee88020a125cbfcfebcd61709c9c4eba192c15e69f020d462486019fa8dea0cd7a42921a19d2fe546d43d", - "9347bd291473e6b4e368437b8e561e065f649a6d8ada479ad09b1999a8f26b91cf6120fd3bfe014e83f23acfa4c0ad7b3712b2c3c0733270663112ccd9285cd9", - "b32163e7c5dbb5f51fdc11d2eac875efbbcb7e7699090a7e7ff8a8d50795af5d74d9ff98543ef8cdf89ac13d0485278756e0ef00c817745661e1d59fe38e7537", - "1085d78307b1c4b008c57a2e7e5b234658a0a82e4ff1e4aaac72b312fda0fe27d233bc5b10e9cc17fdc7697b540c7d95eb215a19a1a0e20e1abfa126efd568c7", - "4e5c734c7dde011d83eac2b7347b373594f92d7091b9ca34cb9c6f39bdf5a8d2f134379e16d822f6522170ccf2ddd55c84b9e6c64fc927ac4cf8dfb2a17701f2", - "695d83bd990a1117b3d0ce06cc888027d12a054c2677fd82f0d4fbfc93575523e7991a5e35a3752e9b70ce62992e268a877744cdd435f5f130869c9a2074b338", - "a6213743568e3b3158b9184301f3690847554c68457cb40fc9a4b8cfd8d4a118c301a07737aeda0f929c68913c5f51c80394f53bff1c3e83b2e40ca97eba9e15", - "d444bfa2362a96df213d070e33fa841f51334e4e76866b8139e8af3bb3398be2dfaddcbc56b9146de9f68118dc5829e74b0c28d7711907b121f9161cb92b69a9", - "142709d62e28fcccd0af97fad0f8465b971e82201dc51070faa0372aa43e92484be1c1e73ba10906d5d1853db6a4106e0a7bf9800d373d6dee2d46d62ef2a461", -} diff --git a/vendor/vendor.json b/vendor/vendor.json index 8b55ad8eb..87783717c 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -154,6 +154,12 @@ "revision": "56b76bdf51f7708750eac80fa38b952bb9f32639", "revisionTime": "2015-12-11T09:06:21+09:00" }, + { + "checksumSHA1": "4gphCNVXIjp5ytz3+S3SD3Dp948=", + "path": "github.com/minio/blake2b-simd", + "revision": "3f5f724cb5b182a5c278d6d3d55b40e7f8c2efb4", + "revisionTime": "2016-07-23T06:10:19Z" + }, { "path": "github.com/minio/cli", "revision": "c4a07c7b68db77ccd119183fb1d01dd5972434ab",