mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
Update reedsolomon/highwayhash to start using ppc64le support (#7003)
Thanks to @fwessels for the upstream work on reedsolomon and highwayhash which has resulted in 10x performance improvement on ppc64 architecture.
This commit is contained in:
parent
bc67410548
commit
def04f01cf
18
vendor/github.com/klauspost/reedsolomon/README.md
generated
vendored
18
vendor/github.com/klauspost/reedsolomon/README.md
generated
vendored
@ -24,6 +24,10 @@ go get -u github.com/klauspost/reedsolomon
|
||||
|
||||
# Changes
|
||||
|
||||
## December 18, 2018
|
||||
|
||||
Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
|
||||
|
||||
## November 18, 2017
|
||||
|
||||
Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
|
||||
@ -259,6 +263,18 @@ By exploiting NEON instructions the performance for ARM has been accelerated. Be
|
||||
| 10 | 2 | 20% | 188 | 1738 | 925% |
|
||||
| 10 | 4 | 40% | 96 | 839 | 877% |
|
||||
|
||||
# Performance on ppc64le
|
||||
|
||||
The performance for ppc64le has been accelerated. This gives roughly a 10x performance improvement on this architecture as can been seen below:
|
||||
|
||||
```
|
||||
benchmark old MB/s new MB/s speedup
|
||||
BenchmarkGalois128K-160 948.87 8878.85 9.36x
|
||||
BenchmarkGalois1M-160 968.85 9041.92 9.33x
|
||||
BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
|
||||
BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
|
||||
```
|
||||
|
||||
# asm2plan9s
|
||||
|
||||
[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
|
||||
@ -266,8 +282,10 @@ By exploiting NEON instructions the performance for ARM has been accelerated. Be
|
||||
# Links
|
||||
* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
|
||||
* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
|
||||
* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
|
||||
* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
|
||||
* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
|
||||
* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
|
||||
* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
|
||||
* [rsraid](https://github.com/goayame/rsraid). A similar library written in Go. Slower, but supports more shards.
|
||||
* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
|
||||
|
1
vendor/github.com/klauspost/reedsolomon/galois_amd64.go
generated
vendored
1
vendor/github.com/klauspost/reedsolomon/galois_amd64.go
generated
vendored
@ -1,5 +1,6 @@
|
||||
//+build !noasm
|
||||
//+build !appengine
|
||||
//+build !gccgo
|
||||
|
||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||
|
||||
|
2
vendor/github.com/klauspost/reedsolomon/galois_amd64.s
generated
vendored
2
vendor/github.com/klauspost/reedsolomon/galois_amd64.s
generated
vendored
@ -1,4 +1,4 @@
|
||||
//+build !noasm !appengine
|
||||
//+build !noasm !appengine !gccgo
|
||||
|
||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||
|
||||
|
1
vendor/github.com/klauspost/reedsolomon/galois_arm64.go
generated
vendored
1
vendor/github.com/klauspost/reedsolomon/galois_arm64.go
generated
vendored
@ -1,5 +1,6 @@
|
||||
//+build !noasm
|
||||
//+build !appengine
|
||||
//+build !gccgo
|
||||
|
||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||
// Copyright 2017, Minio, Inc.
|
||||
|
2
vendor/github.com/klauspost/reedsolomon/galois_arm64.s
generated
vendored
2
vendor/github.com/klauspost/reedsolomon/galois_arm64.s
generated
vendored
@ -1,4 +1,4 @@
|
||||
//+build !noasm !appengine
|
||||
//+build !noasm !appengine !gccgo
|
||||
|
||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||
// Copyright 2017, Minio, Inc.
|
||||
|
5
vendor/github.com/klauspost/reedsolomon/galois_noasm.go
generated
vendored
5
vendor/github.com/klauspost/reedsolomon/galois_noasm.go
generated
vendored
@ -1,5 +1,6 @@
|
||||
//+build !amd64 noasm appengine
|
||||
//+build !arm64 noasm appengine
|
||||
//+build !amd64 noasm appengine gccgo
|
||||
//+build !arm64 noasm appengine gccgo
|
||||
//+build !ppc64le noasm appengine gccgo
|
||||
|
||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||
|
||||
|
67
vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
generated
vendored
Normal file
67
vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
generated
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
//+build !noasm
|
||||
//+build !appengine
|
||||
//+build !gccgo
|
||||
|
||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||
// Copyright 2018, Minio, Inc.
|
||||
|
||||
package reedsolomon
|
||||
|
||||
//go:noescape
|
||||
func galMulPpc(low, high, in, out []byte)
|
||||
|
||||
//go:noescape
|
||||
func galMulPpcXor(low, high, in, out []byte)
|
||||
|
||||
// This is what the assembler routines do in blocks of 16 bytes:
|
||||
/*
|
||||
func galMulPpc(low, high, in, out []byte) {
|
||||
for n, input := range in {
|
||||
l := input & 0xf
|
||||
h := input >> 4
|
||||
out[n] = low[l] ^ high[h]
|
||||
}
|
||||
}
|
||||
func galMulPpcXor(low, high, in, out []byte) {
|
||||
for n, input := range in {
|
||||
l := input & 0xf
|
||||
h := input >> 4
|
||||
out[n] ^= low[l] ^ high[h]
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
func galMulSlice(c byte, in, out []byte, ssse3, avx2 bool) {
|
||||
done := (len(in) >> 4) << 4
|
||||
if done > 0 {
|
||||
galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
|
||||
}
|
||||
remain := len(in) - done
|
||||
if remain > 0 {
|
||||
mt := mulTable[c]
|
||||
for i := done; i < len(in); i++ {
|
||||
out[i] = mt[in[i]]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func galMulSliceXor(c byte, in, out []byte, ssse3, avx2 bool) {
|
||||
done := (len(in) >> 4) << 4
|
||||
if done > 0 {
|
||||
galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
|
||||
}
|
||||
remain := len(in) - done
|
||||
if remain > 0 {
|
||||
mt := mulTable[c]
|
||||
for i := done; i < len(in); i++ {
|
||||
out[i] ^= mt[in[i]]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// slice galois add
|
||||
func sliceXor(in, out []byte, sse2 bool) {
|
||||
for n, input := range in {
|
||||
out[n] ^= input
|
||||
}
|
||||
}
|
126
vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
generated
vendored
Normal file
126
vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
generated
vendored
Normal file
@ -0,0 +1,126 @@
|
||||
//+build !noasm !appengine !gccgo
|
||||
|
||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||
// Copyright 2018, Minio, Inc.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define LOW R3
|
||||
#define HIGH R4
|
||||
#define IN R5
|
||||
#define LEN R6
|
||||
#define OUT R7
|
||||
#define CONSTANTS R8
|
||||
#define OFFSET R9
|
||||
#define OFFSET1 R10
|
||||
#define OFFSET2 R11
|
||||
|
||||
#define X6 VS34
|
||||
#define X6_ V2
|
||||
#define X7 VS35
|
||||
#define X7_ V3
|
||||
#define MSG VS36
|
||||
#define MSG_ V4
|
||||
#define MSG_HI VS37
|
||||
#define MSG_HI_ V5
|
||||
#define RESULT VS38
|
||||
#define RESULT_ V6
|
||||
#define ROTATE VS39
|
||||
#define ROTATE_ V7
|
||||
#define MASK VS40
|
||||
#define MASK_ V8
|
||||
#define FLIP VS41
|
||||
#define FLIP_ V9
|
||||
|
||||
|
||||
// func galMulPpc(low, high, in, out []byte)
|
||||
TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
|
||||
MOVD low+0(FP), LOW
|
||||
MOVD high+24(FP), HIGH
|
||||
MOVD in+48(FP), IN
|
||||
MOVD in_len+56(FP), LEN
|
||||
MOVD out+72(FP), OUT
|
||||
|
||||
MOVD $16, OFFSET1
|
||||
MOVD $32, OFFSET2
|
||||
|
||||
MOVD $·constants(SB), CONSTANTS
|
||||
LXVD2X (CONSTANTS)(R0), ROTATE
|
||||
LXVD2X (CONSTANTS)(OFFSET1), MASK
|
||||
LXVD2X (CONSTANTS)(OFFSET2), FLIP
|
||||
|
||||
LXVD2X (LOW)(R0), X6
|
||||
LXVD2X (HIGH)(R0), X7
|
||||
VPERM X6_, V31, FLIP_, X6_
|
||||
VPERM X7_, V31, FLIP_, X7_
|
||||
|
||||
MOVD $0, OFFSET
|
||||
|
||||
loop:
|
||||
LXVD2X (IN)(OFFSET), MSG
|
||||
|
||||
VSRB MSG_, ROTATE_, MSG_HI_
|
||||
VAND MSG_, MASK_, MSG_
|
||||
VPERM X6_, V31, MSG_, MSG_
|
||||
VPERM X7_, V31, MSG_HI_, MSG_HI_
|
||||
|
||||
VXOR MSG_, MSG_HI_, MSG_
|
||||
|
||||
STXVD2X MSG, (OUT)(OFFSET)
|
||||
|
||||
ADD $16, OFFSET, OFFSET
|
||||
CMP LEN, OFFSET
|
||||
BGT loop
|
||||
RET
|
||||
|
||||
|
||||
// func galMulPpcXorlow, high, in, out []byte)
|
||||
TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
|
||||
MOVD low+0(FP), LOW
|
||||
MOVD high+24(FP), HIGH
|
||||
MOVD in+48(FP), IN
|
||||
MOVD in_len+56(FP), LEN
|
||||
MOVD out+72(FP), OUT
|
||||
|
||||
MOVD $16, OFFSET1
|
||||
MOVD $32, OFFSET2
|
||||
|
||||
MOVD $·constants(SB), CONSTANTS
|
||||
LXVD2X (CONSTANTS)(R0), ROTATE
|
||||
LXVD2X (CONSTANTS)(OFFSET1), MASK
|
||||
LXVD2X (CONSTANTS)(OFFSET2), FLIP
|
||||
|
||||
LXVD2X (LOW)(R0), X6
|
||||
LXVD2X (HIGH)(R0), X7
|
||||
VPERM X6_, V31, FLIP_, X6_
|
||||
VPERM X7_, V31, FLIP_, X7_
|
||||
|
||||
MOVD $0, OFFSET
|
||||
|
||||
loopXor:
|
||||
LXVD2X (IN)(OFFSET), MSG
|
||||
LXVD2X (OUT)(OFFSET), RESULT
|
||||
|
||||
VSRB MSG_, ROTATE_, MSG_HI_
|
||||
VAND MSG_, MASK_, MSG_
|
||||
VPERM X6_, V31, MSG_, MSG_
|
||||
VPERM X7_, V31, MSG_HI_, MSG_HI_
|
||||
|
||||
VXOR MSG_, MSG_HI_, MSG_
|
||||
VXOR MSG_, RESULT_, RESULT_
|
||||
|
||||
STXVD2X RESULT, (OUT)(OFFSET)
|
||||
|
||||
ADD $16, OFFSET, OFFSET
|
||||
CMP LEN, OFFSET
|
||||
BGT loopXor
|
||||
RET
|
||||
|
||||
DATA ·constants+0x0(SB)/8, $0x0404040404040404
|
||||
DATA ·constants+0x8(SB)/8, $0x0404040404040404
|
||||
DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
|
||||
DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
|
||||
DATA ·constants+0x20(SB)/8, $0x0706050403020100
|
||||
DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
|
||||
|
||||
GLOBL ·constants(SB), 8, $48
|
6
vendor/github.com/klauspost/reedsolomon/reedsolomon.go
generated
vendored
6
vendor/github.com/klauspost/reedsolomon/reedsolomon.go
generated
vendored
@ -471,12 +471,12 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
|
||||
wg.Add(1)
|
||||
go func(start, stop int) {
|
||||
for c := 0; c < r.DataShards; c++ {
|
||||
in := inputs[c]
|
||||
in := inputs[c][start:stop]
|
||||
for iRow := 0; iRow < outputCount; iRow++ {
|
||||
if c == 0 {
|
||||
galMulSlice(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2)
|
||||
galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2)
|
||||
} else {
|
||||
galMulSliceXor(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2)
|
||||
galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
13
vendor/github.com/minio/highwayhash/README.md
generated
vendored
13
vendor/github.com/minio/highwayhash/README.md
generated
vendored
@ -7,7 +7,7 @@
|
||||
|
||||
It can be used to prevent hash-flooding attacks or authenticate short-lived messages. Additionally it can be used as a fingerprinting function. HighwayHash is not a general purpose cryptographic hash function (such as Blake2b, SHA-3 or SHA-2) and should not be used if strong collision resistance is required.
|
||||
|
||||
This repository contains a native Go version and optimized assembly implementations on both Intel and ARM platforms.
|
||||
This repository contains a native Go version and optimized assembly implementations for Intel, ARM and ppc64le architectures.
|
||||
|
||||
### High performance
|
||||
|
||||
@ -50,6 +50,17 @@ ARM64 NEON | 384 MB/s | 955 MB/s | 1053 MB/s
|
||||
|
||||
*Note: For now just the (main) update loop is implemented in assembly, so for small messages there is still considerable overhead due to initialization and finalization.*
|
||||
|
||||
### ppc64le Performance
|
||||
|
||||
The ppc64le accelerated version is roughly 10x faster compared to the non-optimized version:
|
||||
|
||||
```
|
||||
benchmark old MB/s new MB/s speedup
|
||||
BenchmarkWrite_8K 531.19 5566.41 10.48x
|
||||
BenchmarkSum64_8K 518.86 4971.88 9.58x
|
||||
BenchmarkSum256_8K 502.45 4474.20 8.90x
|
||||
```
|
||||
|
||||
### Performance compared to other hashing techniques
|
||||
|
||||
On a Skylake CPU (3.0 GHz Xeon Platinum 8124M) the table below shows how HighwayHash compares to other hashing techniques for 5 MB messages (single core performance, all Golang implementations, see [benchmark](https://github.com/fwessels/HashCompare/blob/master/benchmarks_test.go)).
|
||||
|
1
vendor/github.com/minio/highwayhash/highwayhashAVX2_amd64.go
generated
vendored
1
vendor/github.com/minio/highwayhash/highwayhashAVX2_amd64.go
generated
vendored
@ -13,6 +13,7 @@ var (
|
||||
useSSE4 = cpu.X86.HasSSE41
|
||||
useAVX2 = cpu.X86.HasAVX2
|
||||
useNEON = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
|
1
vendor/github.com/minio/highwayhash/highwayhash_amd64.go
generated
vendored
1
vendor/github.com/minio/highwayhash/highwayhash_amd64.go
generated
vendored
@ -13,6 +13,7 @@ var (
|
||||
useSSE4 = cpu.X86.HasSSE41
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
|
1
vendor/github.com/minio/highwayhash/highwayhash_arm64.go
generated
vendored
1
vendor/github.com/minio/highwayhash/highwayhash_arm64.go
generated
vendored
@ -10,6 +10,7 @@ var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = true
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
|
33
vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go
generated
vendored
Normal file
33
vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
//+build !noasm
|
||||
|
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package highwayhash
|
||||
|
||||
var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useVMX = true
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func updatePpc64Le(state *[16]uint64, msg []byte)
|
||||
|
||||
func initialize(state *[16]uint64, key []byte) {
|
||||
initializeGeneric(state, key)
|
||||
}
|
||||
|
||||
func update(state *[16]uint64, msg []byte) {
|
||||
if useVMX {
|
||||
updatePpc64Le(state, msg)
|
||||
} else {
|
||||
updateGeneric(state, msg)
|
||||
}
|
||||
}
|
||||
|
||||
func finalize(out []byte, state *[16]uint64) {
|
||||
finalizeGeneric(out, state)
|
||||
}
|
183
vendor/github.com/minio/highwayhash/highwayhash_ppc64le.s
generated
vendored
Normal file
183
vendor/github.com/minio/highwayhash/highwayhash_ppc64le.s
generated
vendored
Normal file
@ -0,0 +1,183 @@
|
||||
//+build !noasm !appengine
|
||||
|
||||
//
|
||||
// Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Definition of registers
|
||||
#define V0_LO VS32
|
||||
#define V0_LO_ V0
|
||||
#define V0_HI VS33
|
||||
#define V0_HI_ V1
|
||||
#define V1_LO VS34
|
||||
#define V1_LO_ V2
|
||||
#define V1_HI VS35
|
||||
#define V1_HI_ V3
|
||||
#define MUL0_LO VS36
|
||||
#define MUL0_LO_ V4
|
||||
#define MUL0_HI VS37
|
||||
#define MUL0_HI_ V5
|
||||
#define MUL1_LO VS38
|
||||
#define MUL1_LO_ V6
|
||||
#define MUL1_HI VS39
|
||||
#define MUL1_HI_ V7
|
||||
|
||||
// Message
|
||||
#define MSG_LO VS40
|
||||
#define MSG_LO_ V8
|
||||
#define MSG_HI VS41
|
||||
|
||||
// Constants
|
||||
#define ROTATE VS42
|
||||
#define ROTATE_ V10
|
||||
#define MASK VS43
|
||||
#define MASK_ V11
|
||||
|
||||
// Temps
|
||||
#define TEMP1 VS44
|
||||
#define TEMP1_ V12
|
||||
#define TEMP2 VS45
|
||||
#define TEMP2_ V13
|
||||
#define TEMP3 VS46
|
||||
#define TEMP3_ V14
|
||||
#define TEMP4_ V15
|
||||
#define TEMP5_ V16
|
||||
#define TEMP6_ V17
|
||||
#define TEMP7_ V18
|
||||
|
||||
// Regular registers
|
||||
#define STATE R3
|
||||
#define MSG_BASE R4
|
||||
#define MSG_LEN R5
|
||||
#define CONSTANTS R6
|
||||
#define P1 R7
|
||||
#define P2 R8
|
||||
#define P3 R9
|
||||
#define P4 R10
|
||||
#define P5 R11
|
||||
#define P6 R12
|
||||
#define P7 R14 // avoid using R13
|
||||
|
||||
TEXT ·updatePpc64Le(SB), NOFRAME|NOSPLIT, $0-32
|
||||
MOVD state+0(FP), STATE
|
||||
MOVD msg_base+8(FP), MSG_BASE
|
||||
MOVD msg_len+16(FP), MSG_LEN // length of message
|
||||
|
||||
// Sanity check for length
|
||||
CMPU MSG_LEN, $31
|
||||
BLE complete
|
||||
|
||||
// Setup offsets
|
||||
MOVD $16, P1
|
||||
MOVD $32, P2
|
||||
MOVD $48, P3
|
||||
MOVD $64, P4
|
||||
MOVD $80, P5
|
||||
MOVD $96, P6
|
||||
MOVD $112, P7
|
||||
|
||||
// Load state
|
||||
LXVD2X (STATE)(R0), V0_LO
|
||||
LXVD2X (STATE)(P1), V0_HI
|
||||
LXVD2X (STATE)(P2), V1_LO
|
||||
LXVD2X (STATE)(P3), V1_HI
|
||||
LXVD2X (STATE)(P4), MUL0_LO
|
||||
LXVD2X (STATE)(P5), MUL0_HI
|
||||
LXVD2X (STATE)(P6), MUL1_LO
|
||||
LXVD2X (STATE)(P7), MUL1_HI
|
||||
XXPERMDI V0_LO, V0_LO, $2, V0_LO
|
||||
XXPERMDI V0_HI, V0_HI, $2, V0_HI
|
||||
XXPERMDI V1_LO, V1_LO, $2, V1_LO
|
||||
XXPERMDI V1_HI, V1_HI, $2, V1_HI
|
||||
XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO
|
||||
XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI
|
||||
XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO
|
||||
XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI
|
||||
|
||||
// Load constants table pointer
|
||||
MOVD $·constants(SB), CONSTANTS
|
||||
LXVD2X (CONSTANTS)(R0), ROTATE
|
||||
LXVD2X (CONSTANTS)(P1), MASK
|
||||
XXLNAND MASK, MASK, MASK
|
||||
|
||||
loop:
|
||||
// Main highwayhash update loop
|
||||
LXVD2X (MSG_BASE)(R0), MSG_LO
|
||||
VADDUDM V0_LO_, MUL1_LO_, TEMP1_
|
||||
VRLD V0_LO_, ROTATE_, TEMP2_
|
||||
VADDUDM MUL1_HI_, V0_HI_, TEMP3_
|
||||
LXVD2X (MSG_BASE)(P1), MSG_HI
|
||||
ADD $32, MSG_BASE, MSG_BASE
|
||||
XXPERMDI MSG_LO, MSG_LO, $2, MSG_LO
|
||||
XXPERMDI MSG_HI, MSG_HI, $2, V0_LO
|
||||
VADDUDM MSG_LO_, MUL0_LO_, MSG_LO_
|
||||
VADDUDM V0_LO_, MUL0_HI_, V0_LO_
|
||||
VADDUDM MSG_LO_, V1_LO_, V1_LO_
|
||||
VSRD V0_HI_, ROTATE_, MSG_LO_
|
||||
VADDUDM V0_LO_, V1_HI_, V1_HI_
|
||||
VPERM V1_LO_, V1_LO_, MASK_, V0_LO_
|
||||
VMULOUW V1_LO_, TEMP2_, TEMP2_
|
||||
VPERM V1_HI_, V1_HI_, MASK_, TEMP7_
|
||||
VADDUDM V0_LO_, TEMP1_, V0_LO_
|
||||
VMULOUW V1_HI_, MSG_LO_, MSG_LO_
|
||||
VADDUDM TEMP7_, TEMP3_, V0_HI_
|
||||
VPERM V0_LO_, V0_LO_, MASK_, TEMP6_
|
||||
VRLD V1_LO_, ROTATE_, TEMP4_
|
||||
VSRD V1_HI_, ROTATE_, TEMP5_
|
||||
VPERM V0_HI_, V0_HI_, MASK_, TEMP7_
|
||||
XXLXOR MUL0_LO, TEMP2, MUL0_LO
|
||||
VMULOUW TEMP1_, TEMP4_, TEMP1_
|
||||
VMULOUW TEMP3_, TEMP5_, TEMP3_
|
||||
XXLXOR MUL0_HI, MSG_LO, MUL0_HI
|
||||
XXLXOR MUL1_LO, TEMP1, MUL1_LO
|
||||
XXLXOR MUL1_HI, TEMP3, MUL1_HI
|
||||
VADDUDM TEMP6_, V1_LO_, V1_LO_
|
||||
VADDUDM TEMP7_, V1_HI_, V1_HI_
|
||||
|
||||
SUB $32, MSG_LEN, MSG_LEN
|
||||
CMPU MSG_LEN, $32
|
||||
BGE loop
|
||||
|
||||
// Save state
|
||||
XXPERMDI V0_LO, V0_LO, $2, V0_LO
|
||||
XXPERMDI V0_HI, V0_HI, $2, V0_HI
|
||||
XXPERMDI V1_LO, V1_LO, $2, V1_LO
|
||||
XXPERMDI V1_HI, V1_HI, $2, V1_HI
|
||||
XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO
|
||||
XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI
|
||||
XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO
|
||||
XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI
|
||||
STXVD2X V0_LO, (STATE)(R0)
|
||||
STXVD2X V0_HI, (STATE)(P1)
|
||||
STXVD2X V1_LO, (STATE)(P2)
|
||||
STXVD2X V1_HI, (STATE)(P3)
|
||||
STXVD2X MUL0_LO, (STATE)(P4)
|
||||
STXVD2X MUL0_HI, (STATE)(P5)
|
||||
STXVD2X MUL1_LO, (STATE)(P6)
|
||||
STXVD2X MUL1_HI, (STATE)(P7)
|
||||
|
||||
complete:
|
||||
RET
|
||||
|
||||
|
||||
// Constants table
|
||||
DATA ·constants+0x0(SB)/8, $0x0000000000000020
|
||||
DATA ·constants+0x8(SB)/8, $0x0000000000000020
|
||||
DATA ·constants+0x10(SB)/8, $0x070806090d0a040b // zipper merge constant
|
||||
DATA ·constants+0x18(SB)/8, $0x000f010e05020c03 // zipper merge constant
|
||||
|
||||
GLOBL ·constants(SB), 8, $32
|
2
vendor/github.com/minio/highwayhash/highwayhash_ref.go
generated
vendored
2
vendor/github.com/minio/highwayhash/highwayhash_ref.go
generated
vendored
@ -4,6 +4,7 @@
|
||||
|
||||
// +build !amd64
|
||||
// +build !arm64
|
||||
// +build !ppc64le
|
||||
|
||||
package highwayhash
|
||||
|
||||
@ -11,6 +12,7 @@ var (
|
||||
useSSE4 = false
|
||||
useAVX2 = false
|
||||
useNEON = false
|
||||
useVMX = false
|
||||
)
|
||||
|
||||
func initialize(state *[16]uint64, k []byte) {
|
||||
|
12
vendor/vendor.json
vendored
12
vendor/vendor.json
vendored
@ -563,10 +563,10 @@
|
||||
"revisionTime": "2017-10-07T12:43:06Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "ehsrWipiGIWqa4To8TmelIx06vI=",
|
||||
"checksumSHA1": "KiQa3vguztElzJkoqeIGHlfLFJA=",
|
||||
"path": "github.com/klauspost/reedsolomon",
|
||||
"revision": "0b30fa71cc8e4e9010c9aba6d0320e2e5b163b29",
|
||||
"revisionTime": "2017-12-19T13:34:37Z"
|
||||
"revision": "8885f3a1c73882e6f11b766242c69a1eb8f44b28",
|
||||
"revisionTime": "2018-12-18T19:39:59Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "xxLSo5tKtXc7jGrR70yoEfza8Cw=",
|
||||
@ -634,10 +634,10 @@
|
||||
"revisionTime": "2018-01-23T12:12:34Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "2Fu1GmLwDo6FFdahjnlWnPkwJTE=",
|
||||
"checksumSHA1": "CD2MtlgA8h0z6hYJHURS5eOmZ1k=",
|
||||
"path": "github.com/minio/highwayhash",
|
||||
"revision": "85fc8a2dacad36a6beb2865793cd81363a496696",
|
||||
"revisionTime": "2018-05-01T08:09:13Z"
|
||||
"revision": "93ed73d641695483ab4438817457b6586ee5765c",
|
||||
"revisionTime": "2018-12-20T01:13:08Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "7/Hdd23/j4/yt4BXa+h0kqz1yjw=",
|
||||
|
Loading…
Reference in New Issue
Block a user