Release v0.1.0

This commit is contained in:
Manu Herrera
2019-10-01 12:22:30 -03:00
parent 41e6aad190
commit d301c63596
915 changed files with 378049 additions and 11 deletions

3
vendor/golang.org/x/crypto/AUTHORS generated vendored Normal file
View File

@@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at https://tip.golang.org/AUTHORS.

3
vendor/golang.org/x/crypto/CONTRIBUTORS generated vendored Normal file
View File

@@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at https://tip.golang.org/CONTRIBUTORS.

27
vendor/golang.org/x/crypto/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

22
vendor/golang.org/x/crypto/PATENTS generated vendored Normal file
View File

@@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

32
vendor/golang.org/x/crypto/internal/subtle/aliasing.go generated vendored Normal file
View File

@@ -0,0 +1,32 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// Package subtle implements functions that are often useful in cryptographic
// code but require careful thought to use correctly.
package subtle // import "golang.org/x/crypto/internal/subtle"
import "unsafe"
// AnyOverlap reports whether x and y share memory at any (not necessarily
// corresponding) index. The memory beyond the slice length is ignored.
func AnyOverlap(x, y []byte) bool {
return len(x) > 0 && len(y) > 0 &&
uintptr(unsafe.Pointer(&x[0])) <= uintptr(unsafe.Pointer(&y[len(y)-1])) &&
uintptr(unsafe.Pointer(&y[0])) <= uintptr(unsafe.Pointer(&x[len(x)-1]))
}
// InexactOverlap reports whether x and y share memory at any non-corresponding
// index. The memory beyond the slice length is ignored. Note that x and y can
// have different lengths and still not have any inexact overlap.
//
// InexactOverlap can be used to implement the requirements of the crypto/cipher
// AEAD, Block, BlockMode and Stream interfaces.
func InexactOverlap(x, y []byte) bool {
if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] {
return false
}
return AnyOverlap(x, y)
}

View File

@@ -0,0 +1,35 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build appengine
// Package subtle implements functions that are often useful in cryptographic
// code but require careful thought to use correctly.
package subtle // import "golang.org/x/crypto/internal/subtle"
// This is the Google App Engine standard variant based on reflect
// because the unsafe package and cgo are disallowed.
import "reflect"
// AnyOverlap reports whether x and y share memory at any (not necessarily
// corresponding) index. The memory beyond the slice length is ignored.
func AnyOverlap(x, y []byte) bool {
return len(x) > 0 && len(y) > 0 &&
reflect.ValueOf(&x[0]).Pointer() <= reflect.ValueOf(&y[len(y)-1]).Pointer() &&
reflect.ValueOf(&y[0]).Pointer() <= reflect.ValueOf(&x[len(x)-1]).Pointer()
}
// InexactOverlap reports whether x and y share memory at any non-corresponding
// index. The memory beyond the slice length is ignored. Note that x and y can
// have different lengths and still not have any inexact overlap.
//
// InexactOverlap can be used to implement the requirements of the crypto/cipher
// AEAD, Block, BlockMode and Stream interfaces.
func InexactOverlap(x, y []byte) bool {
if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] {
return false
}
return AnyOverlap(x, y)
}

173
vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go generated vendored Normal file
View File

@@ -0,0 +1,173 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package secretbox encrypts and authenticates small messages.
Secretbox uses XSalsa20 and Poly1305 to encrypt and authenticate messages with
secret-key cryptography. The length of messages is not hidden.
It is the caller's responsibility to ensure the uniqueness of nonces—for
example, by using nonce 1 for the first message, nonce 2 for the second
message, etc. Nonces are long enough that randomly generated nonces have
negligible risk of collision.
Messages should be small because:
1. The whole message needs to be held in memory to be processed.
2. Using large messages pressures implementations on small machines to decrypt
and process plaintext before authenticating it. This is very dangerous, and
this API does not allow it, but a protocol that uses excessive message sizes
might present some implementations with no other choice.
3. Fixed overheads will be sufficiently amortised by messages as small as 8KB.
4. Performance may be improved by working with messages that fit into data caches.
Thus large amounts of data should be chunked so that each message is small.
(Each message still needs a unique nonce.) If in doubt, 16KB is a reasonable
chunk size.
This package is interoperable with NaCl: https://nacl.cr.yp.to/secretbox.html.
*/
package secretbox // import "golang.org/x/crypto/nacl/secretbox"
import (
"golang.org/x/crypto/internal/subtle"
"golang.org/x/crypto/poly1305"
"golang.org/x/crypto/salsa20/salsa"
)
// Overhead is the number of bytes of overhead when boxing a message.
const Overhead = poly1305.TagSize
// setup produces a sub-key and Salsa20 counter given a nonce and key.
func setup(subKey *[32]byte, counter *[16]byte, nonce *[24]byte, key *[32]byte) {
// We use XSalsa20 for encryption so first we need to generate a
// key and nonce with HSalsa20.
var hNonce [16]byte
copy(hNonce[:], nonce[:])
salsa.HSalsa20(subKey, &hNonce, key, &salsa.Sigma)
// The final 8 bytes of the original nonce form the new nonce.
copy(counter[:], nonce[16:])
}
// sliceForAppend takes a slice and a requested number of bytes. It returns a
// slice with the contents of the given slice followed by that many bytes and a
// second slice that aliases into it and contains only the extra bytes. If the
// original slice has sufficient capacity then no allocation is performed.
func sliceForAppend(in []byte, n int) (head, tail []byte) {
if total := len(in) + n; cap(in) >= total {
head = in[:total]
} else {
head = make([]byte, total)
copy(head, in)
}
tail = head[len(in):]
return
}
// Seal appends an encrypted and authenticated copy of message to out, which
// must not overlap message. The key and nonce pair must be unique for each
// distinct message and the output will be Overhead bytes longer than message.
func Seal(out, message []byte, nonce *[24]byte, key *[32]byte) []byte {
var subKey [32]byte
var counter [16]byte
setup(&subKey, &counter, nonce, key)
// The Poly1305 key is generated by encrypting 32 bytes of zeros. Since
// Salsa20 works with 64-byte blocks, we also generate 32 bytes of
// keystream as a side effect.
var firstBlock [64]byte
salsa.XORKeyStream(firstBlock[:], firstBlock[:], &counter, &subKey)
var poly1305Key [32]byte
copy(poly1305Key[:], firstBlock[:])
ret, out := sliceForAppend(out, len(message)+poly1305.TagSize)
if subtle.AnyOverlap(out, message) {
panic("nacl: invalid buffer overlap")
}
// We XOR up to 32 bytes of message with the keystream generated from
// the first block.
firstMessageBlock := message
if len(firstMessageBlock) > 32 {
firstMessageBlock = firstMessageBlock[:32]
}
tagOut := out
out = out[poly1305.TagSize:]
for i, x := range firstMessageBlock {
out[i] = firstBlock[32+i] ^ x
}
message = message[len(firstMessageBlock):]
ciphertext := out
out = out[len(firstMessageBlock):]
// Now encrypt the rest.
counter[8] = 1
salsa.XORKeyStream(out, message, &counter, &subKey)
var tag [poly1305.TagSize]byte
poly1305.Sum(&tag, ciphertext, &poly1305Key)
copy(tagOut, tag[:])
return ret
}
// Open authenticates and decrypts a box produced by Seal and appends the
// message to out, which must not overlap box. The output will be Overhead
// bytes smaller than box.
func Open(out, box []byte, nonce *[24]byte, key *[32]byte) ([]byte, bool) {
if len(box) < Overhead {
return nil, false
}
var subKey [32]byte
var counter [16]byte
setup(&subKey, &counter, nonce, key)
// The Poly1305 key is generated by encrypting 32 bytes of zeros. Since
// Salsa20 works with 64-byte blocks, we also generate 32 bytes of
// keystream as a side effect.
var firstBlock [64]byte
salsa.XORKeyStream(firstBlock[:], firstBlock[:], &counter, &subKey)
var poly1305Key [32]byte
copy(poly1305Key[:], firstBlock[:])
var tag [poly1305.TagSize]byte
copy(tag[:], box)
if !poly1305.Verify(&tag, box[poly1305.TagSize:], &poly1305Key) {
return nil, false
}
ret, out := sliceForAppend(out, len(box)-Overhead)
if subtle.AnyOverlap(out, box) {
panic("nacl: invalid buffer overlap")
}
// We XOR up to 32 bytes of box with the keystream generated from
// the first block.
box = box[Overhead:]
firstMessageBlock := box
if len(firstMessageBlock) > 32 {
firstMessageBlock = firstMessageBlock[:32]
}
for i, x := range firstMessageBlock {
out[i] = firstBlock[32+i] ^ x
}
box = box[len(firstMessageBlock):]
out = out[len(firstMessageBlock):]
// Now decrypt the rest.
counter[8] = 1
salsa.XORKeyStream(out, box, &counter, &subKey)
return ret, true
}

77
vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go generated vendored Normal file
View File

@@ -0,0 +1,77 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package pbkdf2 implements the key derivation function PBKDF2 as defined in RFC
2898 / PKCS #5 v2.0.
A key derivation function is useful when encrypting data based on a password
or any other not-fully-random data. It uses a pseudorandom function to derive
a secure encryption key based on the password.
While v2.0 of the standard defines only one pseudorandom function to use,
HMAC-SHA1, the drafted v2.1 specification allows use of all five FIPS Approved
Hash Functions SHA-1, SHA-224, SHA-256, SHA-384 and SHA-512 for HMAC. To
choose, you can pass the `New` functions from the different SHA packages to
pbkdf2.Key.
*/
package pbkdf2 // import "golang.org/x/crypto/pbkdf2"
import (
"crypto/hmac"
"hash"
)
// Key derives a key from the password, salt and iteration count, returning a
// []byte of length keylen that can be used as cryptographic key. The key is
// derived based on the method described as PBKDF2 with the HMAC variant using
// the supplied hash function.
//
// For example, to use a HMAC-SHA-1 based PBKDF2 key derivation function, you
// can get a derived key for e.g. AES-256 (which needs a 32-byte key) by
// doing:
//
// dk := pbkdf2.Key([]byte("some password"), salt, 4096, 32, sha1.New)
//
// Remember to get a good random salt. At least 8 bytes is recommended by the
// RFC.
//
// Using a higher iteration count will increase the cost of an exhaustive
// search but will also make derivation proportionally slower.
func Key(password, salt []byte, iter, keyLen int, h func() hash.Hash) []byte {
prf := hmac.New(h, password)
hashLen := prf.Size()
numBlocks := (keyLen + hashLen - 1) / hashLen
var buf [4]byte
dk := make([]byte, 0, numBlocks*hashLen)
U := make([]byte, hashLen)
for block := 1; block <= numBlocks; block++ {
// N.B.: || means concatenation, ^ means XOR
// for each block T_i = U_1 ^ U_2 ^ ... ^ U_iter
// U_1 = PRF(password, salt || uint(i))
prf.Reset()
prf.Write(salt)
buf[0] = byte(block >> 24)
buf[1] = byte(block >> 16)
buf[2] = byte(block >> 8)
buf[3] = byte(block)
prf.Write(buf[:4])
dk = prf.Sum(dk)
T := dk[len(dk)-hashLen:]
copy(U, T)
// U_n = PRF(password, U_(n-1))
for n := 2; n <= iter; n++ {
prf.Reset()
prf.Write(U)
U = U[:0]
U = prf.Sum(U)
for x := range U {
T[x] ^= U[x]
}
}
}
return dk[:keyLen]
}

11
vendor/golang.org/x/crypto/poly1305/mac_noasm.go generated vendored Normal file
View File

@@ -0,0 +1,11 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!ppc64le gccgo appengine
package poly1305
type mac struct{ macGeneric }
func newMAC(key *[32]byte) mac { return mac{newMACGeneric(key)} }

83
vendor/golang.org/x/crypto/poly1305/poly1305.go generated vendored Normal file
View File

@@ -0,0 +1,83 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package poly1305 implements Poly1305 one-time message authentication code as
// specified in https://cr.yp.to/mac/poly1305-20050329.pdf.
//
// Poly1305 is a fast, one-time authentication function. It is infeasible for an
// attacker to generate an authenticator for a message without the key. However, a
// key must only be used for a single message. Authenticating two different
// messages with the same key allows an attacker to forge authenticators for other
// messages with the same key.
//
// Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was
// used with a fixed key in order to generate one-time keys from an nonce.
// However, in this package AES isn't used and the one-time key is specified
// directly.
package poly1305 // import "golang.org/x/crypto/poly1305"
import "crypto/subtle"
// TagSize is the size, in bytes, of a poly1305 authenticator.
const TagSize = 16
// Verify returns true if mac is a valid authenticator for m with the given
// key.
func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
var tmp [16]byte
Sum(&tmp, m, key)
return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1
}
// New returns a new MAC computing an authentication
// tag of all data written to it with the given key.
// This allows writing the message progressively instead
// of passing it as a single slice. Common users should use
// the Sum function instead.
//
// The key must be unique for each message, as authenticating
// two different messages with the same key allows an attacker
// to forge messages at will.
func New(key *[32]byte) *MAC {
return &MAC{
mac: newMAC(key),
finalized: false,
}
}
// MAC is an io.Writer computing an authentication tag
// of the data written to it.
//
// MAC cannot be used like common hash.Hash implementations,
// because using a poly1305 key twice breaks its security.
// Therefore writing data to a running MAC after calling
// Sum causes it to panic.
type MAC struct {
mac // platform-dependent implementation
finalized bool
}
// Size returns the number of bytes Sum will return.
func (h *MAC) Size() int { return TagSize }
// Write adds more data to the running message authentication code.
// It never returns an error.
//
// It must not be called after the first call of Sum.
func (h *MAC) Write(p []byte) (n int, err error) {
if h.finalized {
panic("poly1305: write to MAC after Sum")
}
return h.mac.Write(p)
}
// Sum computes the authenticator of all data written to the
// message authentication code.
func (h *MAC) Sum(b []byte) []byte {
var mac [TagSize]byte
h.mac.Sum(&mac)
h.finalized = true
return append(b, mac[:]...)
}

68
vendor/golang.org/x/crypto/poly1305/sum_amd64.go generated vendored Normal file
View File

@@ -0,0 +1,68 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine
package poly1305
//go:noescape
func initialize(state *[7]uint64, key *[32]byte)
//go:noescape
func update(state *[7]uint64, msg []byte)
//go:noescape
func finalize(tag *[TagSize]byte, state *[7]uint64)
// Sum generates an authenticator for m using a one-time key and puts the
// 16-byte result into out. Authenticating two different messages with the same
// key allows an attacker to forge messages at will.
func Sum(out *[16]byte, m []byte, key *[32]byte) {
h := newMAC(key)
h.Write(m)
h.Sum(out)
}
func newMAC(key *[32]byte) (h mac) {
initialize(&h.state, key)
return
}
type mac struct {
state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
buffer [TagSize]byte
offset int
}
func (h *mac) Write(p []byte) (n int, err error) {
n = len(p)
if h.offset > 0 {
remaining := TagSize - h.offset
if n < remaining {
h.offset += copy(h.buffer[h.offset:], p)
return n, nil
}
copy(h.buffer[h.offset:], p[:remaining])
p = p[remaining:]
h.offset = 0
update(&h.state, h.buffer[:])
}
if nn := len(p) - (len(p) % TagSize); nn > 0 {
update(&h.state, p[:nn])
p = p[nn:]
}
if len(p) > 0 {
h.offset += copy(h.buffer[h.offset:], p)
}
return n, nil
}
func (h *mac) Sum(out *[16]byte) {
state := h.state
if h.offset > 0 {
update(&state, h.buffer[:h.offset])
}
finalize(out, &state)
}

148
vendor/golang.org/x/crypto/poly1305/sum_amd64.s generated vendored Normal file
View File

@@ -0,0 +1,148 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine
#include "textflag.h"
#define POLY1305_ADD(msg, h0, h1, h2) \
ADDQ 0(msg), h0; \
ADCQ 8(msg), h1; \
ADCQ $1, h2; \
LEAQ 16(msg), msg
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
MOVQ r0, AX; \
MULQ h0; \
MOVQ AX, t0; \
MOVQ DX, t1; \
MOVQ r0, AX; \
MULQ h1; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ r0, t2; \
IMULQ h2, t2; \
ADDQ DX, t2; \
\
MOVQ r1, AX; \
MULQ h0; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ DX, h0; \
MOVQ r1, t3; \
IMULQ h2, t3; \
MOVQ r1, AX; \
MULQ h1; \
ADDQ AX, t2; \
ADCQ DX, t3; \
ADDQ h0, t2; \
ADCQ $0, t3; \
\
MOVQ t0, h0; \
MOVQ t1, h1; \
MOVQ t2, h2; \
ANDQ $3, h2; \
MOVQ t2, t0; \
ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
ADDQ t0, h0; \
ADCQ t3, h1; \
ADCQ $0, h2; \
SHRQ $2, t3, t2; \
SHRQ $2, t3; \
ADDQ t2, h0; \
ADCQ t3, h1; \
ADCQ $0, h2
DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
GLOBL ·poly1305Mask<>(SB), RODATA, $16
// func update(state *[7]uint64, msg []byte)
TEXT ·update(SB), $0-32
MOVQ state+0(FP), DI
MOVQ msg_base+8(FP), SI
MOVQ msg_len+16(FP), R15
MOVQ 0(DI), R8 // h0
MOVQ 8(DI), R9 // h1
MOVQ 16(DI), R10 // h2
MOVQ 24(DI), R11 // r0
MOVQ 32(DI), R12 // r1
CMPQ R15, $16
JB bytes_between_0_and_15
loop:
POLY1305_ADD(SI, R8, R9, R10)
multiply:
POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
SUBQ $16, R15
CMPQ R15, $16
JAE loop
bytes_between_0_and_15:
TESTQ R15, R15
JZ done
MOVQ $1, BX
XORQ CX, CX
XORQ R13, R13
ADDQ R15, SI
flush_buffer:
SHLQ $8, BX, CX
SHLQ $8, BX
MOVB -1(SI), R13
XORQ R13, BX
DECQ SI
DECQ R15
JNZ flush_buffer
ADDQ BX, R8
ADCQ CX, R9
ADCQ $0, R10
MOVQ $16, R15
JMP multiply
done:
MOVQ R8, 0(DI)
MOVQ R9, 8(DI)
MOVQ R10, 16(DI)
RET
// func initialize(state *[7]uint64, key *[32]byte)
TEXT ·initialize(SB), $0-16
MOVQ state+0(FP), DI
MOVQ key+8(FP), SI
// state[0...7] is initialized with zero
MOVOU 0(SI), X0
MOVOU 16(SI), X1
MOVOU ·poly1305Mask<>(SB), X2
PAND X2, X0
MOVOU X0, 24(DI)
MOVOU X1, 40(DI)
RET
// func finalize(tag *[TagSize]byte, state *[7]uint64)
TEXT ·finalize(SB), $0-16
MOVQ tag+0(FP), DI
MOVQ state+8(FP), SI
MOVQ 0(SI), AX
MOVQ 8(SI), BX
MOVQ 16(SI), CX
MOVQ AX, R8
MOVQ BX, R9
SUBQ $0xFFFFFFFFFFFFFFFB, AX
SBBQ $0xFFFFFFFFFFFFFFFF, BX
SBBQ $3, CX
CMOVQCS R8, AX
CMOVQCS R9, BX
ADDQ 40(SI), AX
ADCQ 48(SI), BX
MOVQ AX, 0(DI)
MOVQ BX, 8(DI)
RET

22
vendor/golang.org/x/crypto/poly1305/sum_arm.go generated vendored Normal file
View File

@@ -0,0 +1,22 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build arm,!gccgo,!appengine,!nacl
package poly1305
// This function is implemented in sum_arm.s
//go:noescape
func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)
// Sum generates an authenticator for m using a one-time key and puts the
// 16-byte result into out. Authenticating two different messages with the same
// key allows an attacker to forge messages at will.
func Sum(out *[16]byte, m []byte, key *[32]byte) {
var mPtr *byte
if len(m) > 0 {
mPtr = &m[0]
}
poly1305_auth_armv6(out, mPtr, uint32(len(m)), key)
}

427
vendor/golang.org/x/crypto/poly1305/sum_arm.s generated vendored Normal file
View File

@@ -0,0 +1,427 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build arm,!gccgo,!appengine,!nacl
#include "textflag.h"
// This code was translated into a form compatible with 5a from the public
// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20
// Warning: the linker may use R11 to synthesize certain instructions. Please
// take care and verify that no synthetic instructions use it.
TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0
// Needs 16 bytes of stack and 64 bytes of space pointed to by R0. (It
// might look like it's only 60 bytes of space but the final four bytes
// will be written by another function.) We need to skip over four
// bytes of stack because that's saving the value of 'g'.
ADD $4, R13, R8
MOVM.IB [R4-R7], (R8)
MOVM.IA.W (R1), [R2-R5]
MOVW $·poly1305_init_constants_armv6<>(SB), R7
MOVW R2, R8
MOVW R2>>26, R9
MOVW R3>>20, g
MOVW R4>>14, R11
MOVW R5>>8, R12
ORR R3<<6, R9, R9
ORR R4<<12, g, g
ORR R5<<18, R11, R11
MOVM.IA (R7), [R2-R6]
AND R8, R2, R2
AND R9, R3, R3
AND g, R4, R4
AND R11, R5, R5
AND R12, R6, R6
MOVM.IA.W [R2-R6], (R0)
EOR R2, R2, R2
EOR R3, R3, R3
EOR R4, R4, R4
EOR R5, R5, R5
EOR R6, R6, R6
MOVM.IA.W [R2-R6], (R0)
MOVM.IA.W (R1), [R2-R5]
MOVM.IA [R2-R6], (R0)
ADD $20, R13, R0
MOVM.DA (R0), [R4-R7]
RET
#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
MOVBU (offset+0)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+0)(Rdst); \
MOVBU (offset+1)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+1)(Rdst); \
MOVBU (offset+2)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+2)(Rdst); \
MOVBU (offset+3)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+3)(Rdst)
TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0
// Needs 24 bytes of stack for saved registers and then 88 bytes of
// scratch space after that. We assume that 24 bytes at (R13) have
// already been used: four bytes for the link register saved in the
// prelude of poly1305_auth_armv6, four bytes for saving the value of g
// in that function and 16 bytes of scratch space used around
// poly1305_finish_ext_armv6_skip1.
ADD $24, R13, R12
MOVM.IB [R4-R8, R14], (R12)
MOVW R0, 88(R13)
MOVW R1, 92(R13)
MOVW R2, 96(R13)
MOVW R1, R14
MOVW R2, R12
MOVW 56(R0), R8
WORD $0xe1180008 // TST R8, R8 not working see issue 5921
EOR R6, R6, R6
MOVW.EQ $(1<<24), R6
MOVW R6, 84(R13)
ADD $116, R13, g
MOVM.IA (R0), [R0-R9]
MOVM.IA [R0-R4], (g)
CMP $16, R12
BLO poly1305_blocks_armv6_done
poly1305_blocks_armv6_mainloop:
WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
BEQ poly1305_blocks_armv6_mainloop_aligned
ADD $100, R13, g
MOVW_UNALIGNED(R14, g, R0, 0)
MOVW_UNALIGNED(R14, g, R0, 4)
MOVW_UNALIGNED(R14, g, R0, 8)
MOVW_UNALIGNED(R14, g, R0, 12)
MOVM.IA (g), [R0-R3]
ADD $16, R14
B poly1305_blocks_armv6_mainloop_loaded
poly1305_blocks_armv6_mainloop_aligned:
MOVM.IA.W (R14), [R0-R3]
poly1305_blocks_armv6_mainloop_loaded:
MOVW R0>>26, g
MOVW R1>>20, R11
MOVW R2>>14, R12
MOVW R14, 92(R13)
MOVW R3>>8, R4
ORR R1<<6, g, g
ORR R2<<12, R11, R11
ORR R3<<18, R12, R12
BIC $0xfc000000, R0, R0
BIC $0xfc000000, g, g
MOVW 84(R13), R3
BIC $0xfc000000, R11, R11
BIC $0xfc000000, R12, R12
ADD R0, R5, R5
ADD g, R6, R6
ORR R3, R4, R4
ADD R11, R7, R7
ADD $116, R13, R14
ADD R12, R8, R8
ADD R4, R9, R9
MOVM.IA (R14), [R0-R4]
MULLU R4, R5, (R11, g)
MULLU R3, R5, (R14, R12)
MULALU R3, R6, (R11, g)
MULALU R2, R6, (R14, R12)
MULALU R2, R7, (R11, g)
MULALU R1, R7, (R14, R12)
ADD R4<<2, R4, R4
ADD R3<<2, R3, R3
MULALU R1, R8, (R11, g)
MULALU R0, R8, (R14, R12)
MULALU R0, R9, (R11, g)
MULALU R4, R9, (R14, R12)
MOVW g, 76(R13)
MOVW R11, 80(R13)
MOVW R12, 68(R13)
MOVW R14, 72(R13)
MULLU R2, R5, (R11, g)
MULLU R1, R5, (R14, R12)
MULALU R1, R6, (R11, g)
MULALU R0, R6, (R14, R12)
MULALU R0, R7, (R11, g)
MULALU R4, R7, (R14, R12)
ADD R2<<2, R2, R2
ADD R1<<2, R1, R1
MULALU R4, R8, (R11, g)
MULALU R3, R8, (R14, R12)
MULALU R3, R9, (R11, g)
MULALU R2, R9, (R14, R12)
MOVW g, 60(R13)
MOVW R11, 64(R13)
MOVW R12, 52(R13)
MOVW R14, 56(R13)
MULLU R0, R5, (R11, g)
MULALU R4, R6, (R11, g)
MULALU R3, R7, (R11, g)
MULALU R2, R8, (R11, g)
MULALU R1, R9, (R11, g)
ADD $52, R13, R0
MOVM.IA (R0), [R0-R7]
MOVW g>>26, R12
MOVW R4>>26, R14
ORR R11<<6, R12, R12
ORR R5<<6, R14, R14
BIC $0xfc000000, g, g
BIC $0xfc000000, R4, R4
ADD.S R12, R0, R0
ADC $0, R1, R1
ADD.S R14, R6, R6
ADC $0, R7, R7
MOVW R0>>26, R12
MOVW R6>>26, R14
ORR R1<<6, R12, R12
ORR R7<<6, R14, R14
BIC $0xfc000000, R0, R0
BIC $0xfc000000, R6, R6
ADD R14<<2, R14, R14
ADD.S R12, R2, R2
ADC $0, R3, R3
ADD R14, g, g
MOVW R2>>26, R12
MOVW g>>26, R14
ORR R3<<6, R12, R12
BIC $0xfc000000, g, R5
BIC $0xfc000000, R2, R7
ADD R12, R4, R4
ADD R14, R0, R0
MOVW R4>>26, R12
BIC $0xfc000000, R4, R8
ADD R12, R6, R9
MOVW 96(R13), R12
MOVW 92(R13), R14
MOVW R0, R6
CMP $32, R12
SUB $16, R12, R12
MOVW R12, 96(R13)
BHS poly1305_blocks_armv6_mainloop
poly1305_blocks_armv6_done:
MOVW 88(R13), R12
MOVW R5, 20(R12)
MOVW R6, 24(R12)
MOVW R7, 28(R12)
MOVW R8, 32(R12)
MOVW R9, 36(R12)
ADD $48, R13, R0
MOVM.DA (R0), [R4-R8, R14]
RET
#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
MOVBU.P 1(Rsrc), Rtmp; \
MOVBU.P Rtmp, 1(Rdst); \
MOVBU.P 1(Rsrc), Rtmp; \
MOVBU.P Rtmp, 1(Rdst)
#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
TEXT ·poly1305_auth_armv6(SB), $196-16
// The value 196, just above, is the sum of 64 (the size of the context
// structure) and 132 (the amount of stack needed).
//
// At this point, the stack pointer (R13) has been moved down. It
// points to the saved link register and there's 196 bytes of free
// space above it.
//
// The stack for this function looks like:
//
// +---------------------
// |
// | 64 bytes of context structure
// |
// +---------------------
// |
// | 112 bytes for poly1305_blocks_armv6
// |
// +---------------------
// | 16 bytes of final block, constructed at
// | poly1305_finish_ext_armv6_skip8
// +---------------------
// | four bytes of saved 'g'
// +---------------------
// | lr, saved by prelude <- R13 points here
// +---------------------
MOVW g, 4(R13)
MOVW out+0(FP), R4
MOVW m+4(FP), R5
MOVW mlen+8(FP), R6
MOVW key+12(FP), R7
ADD $136, R13, R0 // 136 = 4 + 4 + 16 + 112
MOVW R7, R1
// poly1305_init_ext_armv6 will write to the stack from R13+4, but
// that's ok because none of the other values have been written yet.
BL poly1305_init_ext_armv6<>(SB)
BIC.S $15, R6, R2
BEQ poly1305_auth_armv6_noblocks
ADD $136, R13, R0
MOVW R5, R1
ADD R2, R5, R5
SUB R2, R6, R6
BL poly1305_blocks_armv6<>(SB)
poly1305_auth_armv6_noblocks:
ADD $136, R13, R0
MOVW R5, R1
MOVW R6, R2
MOVW R4, R3
MOVW R0, R5
MOVW R1, R6
MOVW R2, R7
MOVW R3, R8
AND.S R2, R2, R2
BEQ poly1305_finish_ext_armv6_noremaining
EOR R0, R0
ADD $8, R13, R9 // 8 = offset to 16 byte scratch space
MOVW R0, (R9)
MOVW R0, 4(R9)
MOVW R0, 8(R9)
MOVW R0, 12(R9)
WORD $0xe3110003 // TST R1, #3 not working see issue 5921
BEQ poly1305_finish_ext_armv6_aligned
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip8
MOVWP_UNALIGNED(R1, R9, g)
MOVWP_UNALIGNED(R1, R9, g)
poly1305_finish_ext_armv6_skip8:
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip4
MOVWP_UNALIGNED(R1, R9, g)
poly1305_finish_ext_armv6_skip4:
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip2
MOVHUP_UNALIGNED(R1, R9, g)
B poly1305_finish_ext_armv6_skip2
poly1305_finish_ext_armv6_aligned:
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip8_aligned
MOVM.IA.W (R1), [g-R11]
MOVM.IA.W [g-R11], (R9)
poly1305_finish_ext_armv6_skip8_aligned:
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip4_aligned
MOVW.P 4(R1), g
MOVW.P g, 4(R9)
poly1305_finish_ext_armv6_skip4_aligned:
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip2
MOVHU.P 2(R1), g
MOVH.P g, 2(R9)
poly1305_finish_ext_armv6_skip2:
WORD $0xe3120001 // TST $1, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip1
MOVBU.P 1(R1), g
MOVBU.P g, 1(R9)
poly1305_finish_ext_armv6_skip1:
MOVW $1, R11
MOVBU R11, 0(R9)
MOVW R11, 56(R5)
MOVW R5, R0
ADD $8, R13, R1
MOVW $16, R2
BL poly1305_blocks_armv6<>(SB)
poly1305_finish_ext_armv6_noremaining:
MOVW 20(R5), R0
MOVW 24(R5), R1
MOVW 28(R5), R2
MOVW 32(R5), R3
MOVW 36(R5), R4
MOVW R4>>26, R12
BIC $0xfc000000, R4, R4
ADD R12<<2, R12, R12
ADD R12, R0, R0
MOVW R0>>26, R12
BIC $0xfc000000, R0, R0
ADD R12, R1, R1
MOVW R1>>26, R12
BIC $0xfc000000, R1, R1
ADD R12, R2, R2
MOVW R2>>26, R12
BIC $0xfc000000, R2, R2
ADD R12, R3, R3
MOVW R3>>26, R12
BIC $0xfc000000, R3, R3
ADD R12, R4, R4
ADD $5, R0, R6
MOVW R6>>26, R12
BIC $0xfc000000, R6, R6
ADD R12, R1, R7
MOVW R7>>26, R12
BIC $0xfc000000, R7, R7
ADD R12, R2, g
MOVW g>>26, R12
BIC $0xfc000000, g, g
ADD R12, R3, R11
MOVW $-(1<<26), R12
ADD R11>>26, R12, R12
BIC $0xfc000000, R11, R11
ADD R12, R4, R9
MOVW R9>>31, R12
SUB $1, R12
AND R12, R6, R6
AND R12, R7, R7
AND R12, g, g
AND R12, R11, R11
AND R12, R9, R9
MVN R12, R12
AND R12, R0, R0
AND R12, R1, R1
AND R12, R2, R2
AND R12, R3, R3
AND R12, R4, R4
ORR R6, R0, R0
ORR R7, R1, R1
ORR g, R2, R2
ORR R11, R3, R3
ORR R9, R4, R4
ORR R1<<26, R0, R0
MOVW R1>>6, R1
ORR R2<<20, R1, R1
MOVW R2>>12, R2
ORR R3<<14, R2, R2
MOVW R3>>18, R3
ORR R4<<8, R3, R3
MOVW 40(R5), R6
MOVW 44(R5), R7
MOVW 48(R5), g
MOVW 52(R5), R11
ADD.S R6, R0, R0
ADC.S R7, R1, R1
ADC.S g, R2, R2
ADC.S R11, R3, R3
MOVM.IA [R0-R3], (R8)
MOVW R5, R12
EOR R0, R0, R0
EOR R1, R1, R1
EOR R2, R2, R2
EOR R3, R3, R3
EOR R4, R4, R4
EOR R5, R5, R5
EOR R6, R6, R6
EOR R7, R7, R7
MOVM.IA.W [R0-R7], (R12)
MOVM.IA [R0-R7], (R12)
MOVW 4(R13), g
RET

172
vendor/golang.org/x/crypto/poly1305/sum_generic.go generated vendored Normal file
View File

@@ -0,0 +1,172 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poly1305
import "encoding/binary"
const (
msgBlock = uint32(1 << 24)
finalBlock = uint32(0)
)
// sumGeneric generates an authenticator for msg using a one-time key and
// puts the 16-byte result into out. This is the generic implementation of
// Sum and should be called if no assembly implementation is available.
func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
h := newMACGeneric(key)
h.Write(msg)
h.Sum(out)
}
func newMACGeneric(key *[32]byte) (h macGeneric) {
h.r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
h.r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
h.r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
h.r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
h.r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
h.s[0] = binary.LittleEndian.Uint32(key[16:])
h.s[1] = binary.LittleEndian.Uint32(key[20:])
h.s[2] = binary.LittleEndian.Uint32(key[24:])
h.s[3] = binary.LittleEndian.Uint32(key[28:])
return
}
type macGeneric struct {
h, r [5]uint32
s [4]uint32
buffer [TagSize]byte
offset int
}
func (h *macGeneric) Write(p []byte) (n int, err error) {
n = len(p)
if h.offset > 0 {
remaining := TagSize - h.offset
if n < remaining {
h.offset += copy(h.buffer[h.offset:], p)
return n, nil
}
copy(h.buffer[h.offset:], p[:remaining])
p = p[remaining:]
h.offset = 0
updateGeneric(h.buffer[:], msgBlock, &(h.h), &(h.r))
}
if nn := len(p) - (len(p) % TagSize); nn > 0 {
updateGeneric(p, msgBlock, &(h.h), &(h.r))
p = p[nn:]
}
if len(p) > 0 {
h.offset += copy(h.buffer[h.offset:], p)
}
return n, nil
}
func (h *macGeneric) Sum(out *[16]byte) {
H, R := h.h, h.r
if h.offset > 0 {
var buffer [TagSize]byte
copy(buffer[:], h.buffer[:h.offset])
buffer[h.offset] = 1 // invariant: h.offset < TagSize
updateGeneric(buffer[:], finalBlock, &H, &R)
}
finalizeGeneric(out, &H, &(h.s))
}
func updateGeneric(msg []byte, flag uint32, h, r *[5]uint32) {
h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
for len(msg) >= TagSize {
// h += msg
h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
// h *= r
d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
// h %= p
h0 = uint32(d0) & 0x3ffffff
h1 = uint32(d1) & 0x3ffffff
h2 = uint32(d2) & 0x3ffffff
h3 = uint32(d3) & 0x3ffffff
h4 = uint32(d4) & 0x3ffffff
h0 += uint32(d4>>26) * 5
h1 += h0 >> 26
h0 = h0 & 0x3ffffff
msg = msg[TagSize:]
}
h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
}
func finalizeGeneric(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
// h %= p reduction
h2 += h1 >> 26
h1 &= 0x3ffffff
h3 += h2 >> 26
h2 &= 0x3ffffff
h4 += h3 >> 26
h3 &= 0x3ffffff
h0 += 5 * (h4 >> 26)
h4 &= 0x3ffffff
h1 += h0 >> 26
h0 &= 0x3ffffff
// h - p
t0 := h0 + 5
t1 := h1 + (t0 >> 26)
t2 := h2 + (t1 >> 26)
t3 := h3 + (t2 >> 26)
t4 := h4 + (t3 >> 26) - (1 << 26)
t0 &= 0x3ffffff
t1 &= 0x3ffffff
t2 &= 0x3ffffff
t3 &= 0x3ffffff
// select h if h < p else h - p
t_mask := (t4 >> 31) - 1
h_mask := ^t_mask
h0 = (h0 & h_mask) | (t0 & t_mask)
h1 = (h1 & h_mask) | (t1 & t_mask)
h2 = (h2 & h_mask) | (t2 & t_mask)
h3 = (h3 & h_mask) | (t3 & t_mask)
h4 = (h4 & h_mask) | (t4 & t_mask)
// h %= 2^128
h0 |= h1 << 26
h1 = ((h1 >> 6) | (h2 << 20))
h2 = ((h2 >> 12) | (h3 << 14))
h3 = ((h3 >> 18) | (h4 << 8))
// s: the s part of the key
// tag = (h + s) % (2^128)
t := uint64(h0) + uint64(s[0])
h0 = uint32(t)
t = uint64(h1) + uint64(s[1]) + (t >> 32)
h1 = uint32(t)
t = uint64(h2) + uint64(s[2]) + (t >> 32)
h2 = uint32(t)
t = uint64(h3) + uint64(s[3]) + (t >> 32)
h3 = uint32(t)
binary.LittleEndian.PutUint32(out[0:], h0)
binary.LittleEndian.PutUint32(out[4:], h1)
binary.LittleEndian.PutUint32(out[8:], h2)
binary.LittleEndian.PutUint32(out[12:], h3)
}

16
vendor/golang.org/x/crypto/poly1305/sum_noasm.go generated vendored Normal file
View File

@@ -0,0 +1,16 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build s390x,!go1.11 !arm,!amd64,!s390x,!ppc64le gccgo appengine nacl
package poly1305
// Sum generates an authenticator for msg using a one-time key and puts the
// 16-byte result into out. Authenticating two different messages with the same
// key allows an attacker to forge messages at will.
func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
h := newMAC(key)
h.Write(msg)
h.Sum(out)
}

68
vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go generated vendored Normal file
View File

@@ -0,0 +1,68 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64le,!gccgo,!appengine
package poly1305
//go:noescape
func initialize(state *[7]uint64, key *[32]byte)
//go:noescape
func update(state *[7]uint64, msg []byte)
//go:noescape
func finalize(tag *[TagSize]byte, state *[7]uint64)
// Sum generates an authenticator for m using a one-time key and puts the
// 16-byte result into out. Authenticating two different messages with the same
// key allows an attacker to forge messages at will.
func Sum(out *[16]byte, m []byte, key *[32]byte) {
h := newMAC(key)
h.Write(m)
h.Sum(out)
}
func newMAC(key *[32]byte) (h mac) {
initialize(&h.state, key)
return
}
type mac struct {
state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
buffer [TagSize]byte
offset int
}
func (h *mac) Write(p []byte) (n int, err error) {
n = len(p)
if h.offset > 0 {
remaining := TagSize - h.offset
if n < remaining {
h.offset += copy(h.buffer[h.offset:], p)
return n, nil
}
copy(h.buffer[h.offset:], p[:remaining])
p = p[remaining:]
h.offset = 0
update(&h.state, h.buffer[:])
}
if nn := len(p) - (len(p) % TagSize); nn > 0 {
update(&h.state, p[:nn])
p = p[nn:]
}
if len(p) > 0 {
h.offset += copy(h.buffer[h.offset:], p)
}
return n, nil
}
func (h *mac) Sum(out *[16]byte) {
state := h.state
if h.offset > 0 {
update(&state, h.buffer[:h.offset])
}
finalize(out, &state)
}

247
vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s generated vendored Normal file
View File

@@ -0,0 +1,247 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64le,!gccgo,!appengine
#include "textflag.h"
// This was ported from the amd64 implementation.
#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
MOVD (msg), t0; \
MOVD 8(msg), t1; \
MOVD $1, t2; \
ADDC t0, h0, h0; \
ADDE t1, h1, h1; \
ADDE t2, h2; \
ADD $16, msg
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
MULLD r0, h0, t0; \
MULLD r0, h1, t4; \
MULHDU r0, h0, t1; \
MULHDU r0, h1, t5; \
ADDC t4, t1, t1; \
MULLD r0, h2, t2; \
ADDZE t5; \
MULHDU r1, h0, t4; \
MULLD r1, h0, h0; \
ADD t5, t2, t2; \
ADDC h0, t1, t1; \
MULLD h2, r1, t3; \
ADDZE t4, h0; \
MULHDU r1, h1, t5; \
MULLD r1, h1, t4; \
ADDC t4, t2, t2; \
ADDE t5, t3, t3; \
ADDC h0, t2, t2; \
MOVD $-4, t4; \
MOVD t0, h0; \
MOVD t1, h1; \
ADDZE t3; \
ANDCC $3, t2, h2; \
AND t2, t4, t0; \
ADDC t0, h0, h0; \
ADDE t3, h1, h1; \
SLD $62, t3, t4; \
SRD $2, t2; \
ADDZE h2; \
OR t4, t2, t2; \
SRD $2, t3; \
ADDC t2, h0, h0; \
ADDE t3, h1, h1; \
ADDZE h2
DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
GLOBL ·poly1305Mask<>(SB), RODATA, $16
// func update(state *[7]uint64, msg []byte)
TEXT ·update(SB), $0-32
MOVD state+0(FP), R3
MOVD msg_base+8(FP), R4
MOVD msg_len+16(FP), R5
MOVD 0(R3), R8 // h0
MOVD 8(R3), R9 // h1
MOVD 16(R3), R10 // h2
MOVD 24(R3), R11 // r0
MOVD 32(R3), R12 // r1
CMP R5, $16
BLT bytes_between_0_and_15
loop:
POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
multiply:
POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
ADD $-16, R5
CMP R5, $16
BGE loop
bytes_between_0_and_15:
CMP $0, R5
BEQ done
MOVD $0, R16 // h0
MOVD $0, R17 // h1
flush_buffer:
CMP R5, $8
BLE just1
MOVD $8, R21
SUB R21, R5, R21
// Greater than 8 -- load the rightmost remaining bytes in msg
// and put into R17 (h1)
MOVD (R4)(R21), R17
MOVD $16, R22
// Find the offset to those bytes
SUB R5, R22, R22
SLD $3, R22
// Shift to get only the bytes in msg
SRD R22, R17, R17
// Put 1 at high end
MOVD $1, R23
SLD $3, R21
SLD R21, R23, R23
OR R23, R17, R17
// Remainder is 8
MOVD $8, R5
just1:
CMP R5, $8
BLT less8
// Exactly 8
MOVD (R4), R16
CMP $0, R17
// Check if we've already set R17; if not
// set 1 to indicate end of msg.
BNE carry
MOVD $1, R17
BR carry
less8:
MOVD $0, R16 // h0
MOVD $0, R22 // shift count
CMP R5, $4
BLT less4
MOVWZ (R4), R16
ADD $4, R4
ADD $-4, R5
MOVD $32, R22
less4:
CMP R5, $2
BLT less2
MOVHZ (R4), R21
SLD R22, R21, R21
OR R16, R21, R16
ADD $16, R22
ADD $-2, R5
ADD $2, R4
less2:
CMP $0, R5
BEQ insert1
MOVBZ (R4), R21
SLD R22, R21, R21
OR R16, R21, R16
ADD $8, R22
insert1:
// Insert 1 at end of msg
MOVD $1, R21
SLD R22, R21, R21
OR R16, R21, R16
carry:
// Add new values to h0, h1, h2
ADDC R16, R8
ADDE R17, R9
ADDE $0, R10
MOVD $16, R5
ADD R5, R4
BR multiply
done:
// Save h0, h1, h2 in state
MOVD R8, 0(R3)
MOVD R9, 8(R3)
MOVD R10, 16(R3)
RET
// func initialize(state *[7]uint64, key *[32]byte)
TEXT ·initialize(SB), $0-16
MOVD state+0(FP), R3
MOVD key+8(FP), R4
// state[0...7] is initialized with zero
// Load key
MOVD 0(R4), R5
MOVD 8(R4), R6
MOVD 16(R4), R7
MOVD 24(R4), R8
// Address of key mask
MOVD $·poly1305Mask<>(SB), R9
// Save original key in state
MOVD R7, 40(R3)
MOVD R8, 48(R3)
// Get mask
MOVD (R9), R7
MOVD 8(R9), R8
// And with key
AND R5, R7, R5
AND R6, R8, R6
// Save masked key in state
MOVD R5, 24(R3)
MOVD R6, 32(R3)
RET
// func finalize(tag *[TagSize]byte, state *[7]uint64)
TEXT ·finalize(SB), $0-16
MOVD tag+0(FP), R3
MOVD state+8(FP), R4
// Get h0, h1, h2 from state
MOVD 0(R4), R5
MOVD 8(R4), R6
MOVD 16(R4), R7
// Save h0, h1
MOVD R5, R8
MOVD R6, R9
MOVD $3, R20
MOVD $-1, R21
SUBC $-5, R5
SUBE R21, R6
SUBE R20, R7
MOVD $0, R21
SUBZE R21
// Check for carry
CMP $0, R21
ISEL $2, R5, R8, R5
ISEL $2, R6, R9, R6
MOVD 40(R4), R8
MOVD 48(R4), R9
ADDC R8, R5
ADDE R9, R6
MOVD R5, 0(R3)
MOVD R6, 8(R3)
RET

42
vendor/golang.org/x/crypto/poly1305/sum_s390x.go generated vendored Normal file
View File

@@ -0,0 +1,42 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build s390x,go1.11,!gccgo,!appengine
package poly1305
import (
"golang.org/x/sys/cpu"
)
// poly1305vx is an assembly implementation of Poly1305 that uses vector
// instructions. It must only be called if the vector facility (vx) is
// available.
//go:noescape
func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
// poly1305vmsl is an assembly implementation of Poly1305 that uses vector
// instructions, including VMSL. It must only be called if the vector facility (vx) is
// available and if VMSL is supported.
//go:noescape
func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
// Sum generates an authenticator for m using a one-time key and puts the
// 16-byte result into out. Authenticating two different messages with the same
// key allows an attacker to forge messages at will.
func Sum(out *[16]byte, m []byte, key *[32]byte) {
if cpu.S390X.HasVX {
var mPtr *byte
if len(m) > 0 {
mPtr = &m[0]
}
if cpu.S390X.HasVXE && len(m) > 256 {
poly1305vmsl(out, mPtr, uint64(len(m)), key)
} else {
poly1305vx(out, mPtr, uint64(len(m)), key)
}
} else {
sumGeneric(out, m, key)
}
}

378
vendor/golang.org/x/crypto/poly1305/sum_s390x.s generated vendored Normal file
View File

@@ -0,0 +1,378 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build s390x,go1.11,!gccgo,!appengine
#include "textflag.h"
// Implementation of Poly1305 using the vector facility (vx).
// constants
#define MOD26 V0
#define EX0 V1
#define EX1 V2
#define EX2 V3
// temporaries
#define T_0 V4
#define T_1 V5
#define T_2 V6
#define T_3 V7
#define T_4 V8
// key (r)
#define R_0 V9
#define R_1 V10
#define R_2 V11
#define R_3 V12
#define R_4 V13
#define R5_1 V14
#define R5_2 V15
#define R5_3 V16
#define R5_4 V17
#define RSAVE_0 R5
#define RSAVE_1 R6
#define RSAVE_2 R7
#define RSAVE_3 R8
#define RSAVE_4 R9
#define R5SAVE_1 V28
#define R5SAVE_2 V29
#define R5SAVE_3 V30
#define R5SAVE_4 V31
// message block
#define F_0 V18
#define F_1 V19
#define F_2 V20
#define F_3 V21
#define F_4 V22
// accumulator
#define H_0 V23
#define H_1 V24
#define H_2 V25
#define H_3 V26
#define H_4 V27
GLOBL ·keyMask<>(SB), RODATA, $16
DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f
DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f
GLOBL ·bswapMask<>(SB), RODATA, $16
DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908
DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100
GLOBL ·constants<>(SB), RODATA, $64
// MOD26
DATA ·constants<>+0(SB)/8, $0x3ffffff
DATA ·constants<>+8(SB)/8, $0x3ffffff
// EX0
DATA ·constants<>+16(SB)/8, $0x0006050403020100
DATA ·constants<>+24(SB)/8, $0x1016151413121110
// EX1
DATA ·constants<>+32(SB)/8, $0x060c0b0a09080706
DATA ·constants<>+40(SB)/8, $0x161c1b1a19181716
// EX2
DATA ·constants<>+48(SB)/8, $0x0d0d0d0d0d0f0e0d
DATA ·constants<>+56(SB)/8, $0x1d1d1d1d1d1f1e1d
// h = (f*g) % (2**130-5) [partial reduction]
#define MULTIPLY(f0, f1, f2, f3, f4, g0, g1, g2, g3, g4, g51, g52, g53, g54, h0, h1, h2, h3, h4) \
VMLOF f0, g0, h0 \
VMLOF f0, g1, h1 \
VMLOF f0, g2, h2 \
VMLOF f0, g3, h3 \
VMLOF f0, g4, h4 \
VMLOF f1, g54, T_0 \
VMLOF f1, g0, T_1 \
VMLOF f1, g1, T_2 \
VMLOF f1, g2, T_3 \
VMLOF f1, g3, T_4 \
VMALOF f2, g53, h0, h0 \
VMALOF f2, g54, h1, h1 \
VMALOF f2, g0, h2, h2 \
VMALOF f2, g1, h3, h3 \
VMALOF f2, g2, h4, h4 \
VMALOF f3, g52, T_0, T_0 \
VMALOF f3, g53, T_1, T_1 \
VMALOF f3, g54, T_2, T_2 \
VMALOF f3, g0, T_3, T_3 \
VMALOF f3, g1, T_4, T_4 \
VMALOF f4, g51, h0, h0 \
VMALOF f4, g52, h1, h1 \
VMALOF f4, g53, h2, h2 \
VMALOF f4, g54, h3, h3 \
VMALOF f4, g0, h4, h4 \
VAG T_0, h0, h0 \
VAG T_1, h1, h1 \
VAG T_2, h2, h2 \
VAG T_3, h3, h3 \
VAG T_4, h4, h4
// carry h0->h1 h3->h4, h1->h2 h4->h0, h0->h1 h2->h3, h3->h4
#define REDUCE(h0, h1, h2, h3, h4) \
VESRLG $26, h0, T_0 \
VESRLG $26, h3, T_1 \
VN MOD26, h0, h0 \
VN MOD26, h3, h3 \
VAG T_0, h1, h1 \
VAG T_1, h4, h4 \
VESRLG $26, h1, T_2 \
VESRLG $26, h4, T_3 \
VN MOD26, h1, h1 \
VN MOD26, h4, h4 \
VESLG $2, T_3, T_4 \
VAG T_3, T_4, T_4 \
VAG T_2, h2, h2 \
VAG T_4, h0, h0 \
VESRLG $26, h2, T_0 \
VESRLG $26, h0, T_1 \
VN MOD26, h2, h2 \
VN MOD26, h0, h0 \
VAG T_0, h3, h3 \
VAG T_1, h1, h1 \
VESRLG $26, h3, T_2 \
VN MOD26, h3, h3 \
VAG T_2, h4, h4
// expand in0 into d[0] and in1 into d[1]
#define EXPAND(in0, in1, d0, d1, d2, d3, d4) \
VGBM $0x0707, d1 \ // d1=tmp
VPERM in0, in1, EX2, d4 \
VPERM in0, in1, EX0, d0 \
VPERM in0, in1, EX1, d2 \
VN d1, d4, d4 \
VESRLG $26, d0, d1 \
VESRLG $30, d2, d3 \
VESRLG $4, d2, d2 \
VN MOD26, d0, d0 \
VN MOD26, d1, d1 \
VN MOD26, d2, d2 \
VN MOD26, d3, d3
// pack h4:h0 into h1:h0 (no carry)
#define PACK(h0, h1, h2, h3, h4) \
VESLG $26, h1, h1 \
VESLG $26, h3, h3 \
VO h0, h1, h0 \
VO h2, h3, h2 \
VESLG $4, h2, h2 \
VLEIB $7, $48, h1 \
VSLB h1, h2, h2 \
VO h0, h2, h0 \
VLEIB $7, $104, h1 \
VSLB h1, h4, h3 \
VO h3, h0, h0 \
VLEIB $7, $24, h1 \
VSRLB h1, h4, h1
// if h > 2**130-5 then h -= 2**130-5
#define MOD(h0, h1, t0, t1, t2) \
VZERO t0 \
VLEIG $1, $5, t0 \
VACCQ h0, t0, t1 \
VAQ h0, t0, t0 \
VONE t2 \
VLEIG $1, $-4, t2 \
VAQ t2, t1, t1 \
VACCQ h1, t1, t1 \
VONE t2 \
VAQ t2, t1, t1 \
VN h0, t1, t2 \
VNC t0, t1, t1 \
VO t1, t2, h0
// func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]key)
TEXT ·poly1305vx(SB), $0-32
// This code processes up to 2 blocks (32 bytes) per iteration
// using the algorithm described in:
// NEON crypto, Daniel J. Bernstein & Peter Schwabe
// https://cryptojedi.org/papers/neoncrypto-20120320.pdf
LMG out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key
// load MOD26, EX0, EX1 and EX2
MOVD $·constants<>(SB), R5
VLM (R5), MOD26, EX2
// setup r
VL (R4), T_0
MOVD $·keyMask<>(SB), R6
VL (R6), T_1
VN T_0, T_1, T_0
EXPAND(T_0, T_0, R_0, R_1, R_2, R_3, R_4)
// setup r*5
VLEIG $0, $5, T_0
VLEIG $1, $5, T_0
// store r (for final block)
VMLOF T_0, R_1, R5SAVE_1
VMLOF T_0, R_2, R5SAVE_2
VMLOF T_0, R_3, R5SAVE_3
VMLOF T_0, R_4, R5SAVE_4
VLGVG $0, R_0, RSAVE_0
VLGVG $0, R_1, RSAVE_1
VLGVG $0, R_2, RSAVE_2
VLGVG $0, R_3, RSAVE_3
VLGVG $0, R_4, RSAVE_4
// skip r**2 calculation
CMPBLE R3, $16, skip
// calculate r**2
MULTIPLY(R_0, R_1, R_2, R_3, R_4, R_0, R_1, R_2, R_3, R_4, R5SAVE_1, R5SAVE_2, R5SAVE_3, R5SAVE_4, H_0, H_1, H_2, H_3, H_4)
REDUCE(H_0, H_1, H_2, H_3, H_4)
VLEIG $0, $5, T_0
VLEIG $1, $5, T_0
VMLOF T_0, H_1, R5_1
VMLOF T_0, H_2, R5_2
VMLOF T_0, H_3, R5_3
VMLOF T_0, H_4, R5_4
VLR H_0, R_0
VLR H_1, R_1
VLR H_2, R_2
VLR H_3, R_3
VLR H_4, R_4
// initialize h
VZERO H_0
VZERO H_1
VZERO H_2
VZERO H_3
VZERO H_4
loop:
CMPBLE R3, $32, b2
VLM (R2), T_0, T_1
SUB $32, R3
MOVD $32(R2), R2
EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
VLEIB $4, $1, F_4
VLEIB $12, $1, F_4
multiply:
VAG H_0, F_0, F_0
VAG H_1, F_1, F_1
VAG H_2, F_2, F_2
VAG H_3, F_3, F_3
VAG H_4, F_4, F_4
MULTIPLY(F_0, F_1, F_2, F_3, F_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, H_0, H_1, H_2, H_3, H_4)
REDUCE(H_0, H_1, H_2, H_3, H_4)
CMPBNE R3, $0, loop
finish:
// sum vectors
VZERO T_0
VSUMQG H_0, T_0, H_0
VSUMQG H_1, T_0, H_1
VSUMQG H_2, T_0, H_2
VSUMQG H_3, T_0, H_3
VSUMQG H_4, T_0, H_4
// h may be >= 2*(2**130-5) so we need to reduce it again
REDUCE(H_0, H_1, H_2, H_3, H_4)
// carry h1->h4
VESRLG $26, H_1, T_1
VN MOD26, H_1, H_1
VAQ T_1, H_2, H_2
VESRLG $26, H_2, T_2
VN MOD26, H_2, H_2
VAQ T_2, H_3, H_3
VESRLG $26, H_3, T_3
VN MOD26, H_3, H_3
VAQ T_3, H_4, H_4
// h is now < 2*(2**130-5)
// pack h into h1 (hi) and h0 (lo)
PACK(H_0, H_1, H_2, H_3, H_4)
// if h > 2**130-5 then h -= 2**130-5
MOD(H_0, H_1, T_0, T_1, T_2)
// h += s
MOVD $·bswapMask<>(SB), R5
VL (R5), T_1
VL 16(R4), T_0
VPERM T_0, T_0, T_1, T_0 // reverse bytes (to big)
VAQ T_0, H_0, H_0
VPERM H_0, H_0, T_1, H_0 // reverse bytes (to little)
VST H_0, (R1)
RET
b2:
CMPBLE R3, $16, b1
// 2 blocks remaining
SUB $17, R3
VL (R2), T_0
VLL R3, 16(R2), T_1
ADD $1, R3
MOVBZ $1, R0
CMPBEQ R3, $16, 2(PC)
VLVGB R3, R0, T_1
EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
CMPBNE R3, $16, 2(PC)
VLEIB $12, $1, F_4
VLEIB $4, $1, F_4
// setup [r²,r]
VLVGG $1, RSAVE_0, R_0
VLVGG $1, RSAVE_1, R_1
VLVGG $1, RSAVE_2, R_2
VLVGG $1, RSAVE_3, R_3
VLVGG $1, RSAVE_4, R_4
VPDI $0, R5_1, R5SAVE_1, R5_1
VPDI $0, R5_2, R5SAVE_2, R5_2
VPDI $0, R5_3, R5SAVE_3, R5_3
VPDI $0, R5_4, R5SAVE_4, R5_4
MOVD $0, R3
BR multiply
skip:
VZERO H_0
VZERO H_1
VZERO H_2
VZERO H_3
VZERO H_4
CMPBEQ R3, $0, finish
b1:
// 1 block remaining
SUB $1, R3
VLL R3, (R2), T_0
ADD $1, R3
MOVBZ $1, R0
CMPBEQ R3, $16, 2(PC)
VLVGB R3, R0, T_0
VZERO T_1
EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
CMPBNE R3, $16, 2(PC)
VLEIB $4, $1, F_4
VLEIG $1, $1, R_0
VZERO R_1
VZERO R_2
VZERO R_3
VZERO R_4
VZERO R5_1
VZERO R5_2
VZERO R5_3
VZERO R5_4
// setup [r, 1]
VLVGG $0, RSAVE_0, R_0
VLVGG $0, RSAVE_1, R_1
VLVGG $0, RSAVE_2, R_2
VLVGG $0, RSAVE_3, R_3
VLVGG $0, RSAVE_4, R_4
VPDI $0, R5SAVE_1, R5_1, R5_1
VPDI $0, R5SAVE_2, R5_2, R5_2
VPDI $0, R5SAVE_3, R5_3, R5_3
VPDI $0, R5SAVE_4, R5_4, R5_4
MOVD $0, R3
BR multiply

909
vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s generated vendored Normal file
View File

@@ -0,0 +1,909 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build s390x,go1.11,!gccgo,!appengine
#include "textflag.h"
// Implementation of Poly1305 using the vector facility (vx) and the VMSL instruction.
// constants
#define EX0 V1
#define EX1 V2
#define EX2 V3
// temporaries
#define T_0 V4
#define T_1 V5
#define T_2 V6
#define T_3 V7
#define T_4 V8
#define T_5 V9
#define T_6 V10
#define T_7 V11
#define T_8 V12
#define T_9 V13
#define T_10 V14
// r**2 & r**4
#define R_0 V15
#define R_1 V16
#define R_2 V17
#define R5_1 V18
#define R5_2 V19
// key (r)
#define RSAVE_0 R7
#define RSAVE_1 R8
#define RSAVE_2 R9
#define R5SAVE_1 R10
#define R5SAVE_2 R11
// message block
#define M0 V20
#define M1 V21
#define M2 V22
#define M3 V23
#define M4 V24
#define M5 V25
// accumulator
#define H0_0 V26
#define H1_0 V27
#define H2_0 V28
#define H0_1 V29
#define H1_1 V30
#define H2_1 V31
GLOBL ·keyMask<>(SB), RODATA, $16
DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f
DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f
GLOBL ·bswapMask<>(SB), RODATA, $16
DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908
DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100
GLOBL ·constants<>(SB), RODATA, $48
// EX0
DATA ·constants<>+0(SB)/8, $0x18191a1b1c1d1e1f
DATA ·constants<>+8(SB)/8, $0x0000050403020100
// EX1
DATA ·constants<>+16(SB)/8, $0x18191a1b1c1d1e1f
DATA ·constants<>+24(SB)/8, $0x00000a0908070605
// EX2
DATA ·constants<>+32(SB)/8, $0x18191a1b1c1d1e1f
DATA ·constants<>+40(SB)/8, $0x0000000f0e0d0c0b
GLOBL ·c<>(SB), RODATA, $48
// EX0
DATA ·c<>+0(SB)/8, $0x0000050403020100
DATA ·c<>+8(SB)/8, $0x0000151413121110
// EX1
DATA ·c<>+16(SB)/8, $0x00000a0908070605
DATA ·c<>+24(SB)/8, $0x00001a1918171615
// EX2
DATA ·c<>+32(SB)/8, $0x0000000f0e0d0c0b
DATA ·c<>+40(SB)/8, $0x0000001f1e1d1c1b
GLOBL ·reduce<>(SB), RODATA, $32
// 44 bit
DATA ·reduce<>+0(SB)/8, $0x0
DATA ·reduce<>+8(SB)/8, $0xfffffffffff
// 42 bit
DATA ·reduce<>+16(SB)/8, $0x0
DATA ·reduce<>+24(SB)/8, $0x3ffffffffff
// h = (f*g) % (2**130-5) [partial reduction]
// uses T_0...T_9 temporary registers
// input: m02_0, m02_1, m02_2, m13_0, m13_1, m13_2, r_0, r_1, r_2, r5_1, r5_2, m4_0, m4_1, m4_2, m5_0, m5_1, m5_2
// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
// output: m02_0, m02_1, m02_2, m13_0, m13_1, m13_2
#define MULTIPLY(m02_0, m02_1, m02_2, m13_0, m13_1, m13_2, r_0, r_1, r_2, r5_1, r5_2, m4_0, m4_1, m4_2, m5_0, m5_1, m5_2, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) \
\ // Eliminate the dependency for the last 2 VMSLs
VMSLG m02_0, r_2, m4_2, m4_2 \
VMSLG m13_0, r_2, m5_2, m5_2 \ // 8 VMSLs pipelined
VMSLG m02_0, r_0, m4_0, m4_0 \
VMSLG m02_1, r5_2, V0, T_0 \
VMSLG m02_0, r_1, m4_1, m4_1 \
VMSLG m02_1, r_0, V0, T_1 \
VMSLG m02_1, r_1, V0, T_2 \
VMSLG m02_2, r5_1, V0, T_3 \
VMSLG m02_2, r5_2, V0, T_4 \
VMSLG m13_0, r_0, m5_0, m5_0 \
VMSLG m13_1, r5_2, V0, T_5 \
VMSLG m13_0, r_1, m5_1, m5_1 \
VMSLG m13_1, r_0, V0, T_6 \
VMSLG m13_1, r_1, V0, T_7 \
VMSLG m13_2, r5_1, V0, T_8 \
VMSLG m13_2, r5_2, V0, T_9 \
VMSLG m02_2, r_0, m4_2, m4_2 \
VMSLG m13_2, r_0, m5_2, m5_2 \
VAQ m4_0, T_0, m02_0 \
VAQ m4_1, T_1, m02_1 \
VAQ m5_0, T_5, m13_0 \
VAQ m5_1, T_6, m13_1 \
VAQ m02_0, T_3, m02_0 \
VAQ m02_1, T_4, m02_1 \
VAQ m13_0, T_8, m13_0 \
VAQ m13_1, T_9, m13_1 \
VAQ m4_2, T_2, m02_2 \
VAQ m5_2, T_7, m13_2 \
// SQUARE uses three limbs of r and r_2*5 to output square of r
// uses T_1, T_5 and T_7 temporary registers
// input: r_0, r_1, r_2, r5_2
// temp: TEMP0, TEMP1, TEMP2
// output: p0, p1, p2
#define SQUARE(r_0, r_1, r_2, r5_2, p0, p1, p2, TEMP0, TEMP1, TEMP2) \
VMSLG r_0, r_0, p0, p0 \
VMSLG r_1, r5_2, V0, TEMP0 \
VMSLG r_2, r5_2, p1, p1 \
VMSLG r_0, r_1, V0, TEMP1 \
VMSLG r_1, r_1, p2, p2 \
VMSLG r_0, r_2, V0, TEMP2 \
VAQ TEMP0, p0, p0 \
VAQ TEMP1, p1, p1 \
VAQ TEMP2, p2, p2 \
VAQ TEMP0, p0, p0 \
VAQ TEMP1, p1, p1 \
VAQ TEMP2, p2, p2 \
// carry h0->h1->h2->h0 || h3->h4->h5->h3
// uses T_2, T_4, T_5, T_7, T_8, T_9
// t6, t7, t8, t9, t10, t11
// input: h0, h1, h2, h3, h4, h5
// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11
// output: h0, h1, h2, h3, h4, h5
#define REDUCE(h0, h1, h2, h3, h4, h5, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) \
VLM (R12), t6, t7 \ // 44 and 42 bit clear mask
VLEIB $7, $0x28, t10 \ // 5 byte shift mask
VREPIB $4, t8 \ // 4 bit shift mask
VREPIB $2, t11 \ // 2 bit shift mask
VSRLB t10, h0, t0 \ // h0 byte shift
VSRLB t10, h1, t1 \ // h1 byte shift
VSRLB t10, h2, t2 \ // h2 byte shift
VSRLB t10, h3, t3 \ // h3 byte shift
VSRLB t10, h4, t4 \ // h4 byte shift
VSRLB t10, h5, t5 \ // h5 byte shift
VSRL t8, t0, t0 \ // h0 bit shift
VSRL t8, t1, t1 \ // h2 bit shift
VSRL t11, t2, t2 \ // h2 bit shift
VSRL t8, t3, t3 \ // h3 bit shift
VSRL t8, t4, t4 \ // h4 bit shift
VESLG $2, t2, t9 \ // h2 carry x5
VSRL t11, t5, t5 \ // h5 bit shift
VN t6, h0, h0 \ // h0 clear carry
VAQ t2, t9, t2 \ // h2 carry x5
VESLG $2, t5, t9 \ // h5 carry x5
VN t6, h1, h1 \ // h1 clear carry
VN t7, h2, h2 \ // h2 clear carry
VAQ t5, t9, t5 \ // h5 carry x5
VN t6, h3, h3 \ // h3 clear carry
VN t6, h4, h4 \ // h4 clear carry
VN t7, h5, h5 \ // h5 clear carry
VAQ t0, h1, h1 \ // h0->h1
VAQ t3, h4, h4 \ // h3->h4
VAQ t1, h2, h2 \ // h1->h2
VAQ t4, h5, h5 \ // h4->h5
VAQ t2, h0, h0 \ // h2->h0
VAQ t5, h3, h3 \ // h5->h3
VREPG $1, t6, t6 \ // 44 and 42 bit masks across both halves
VREPG $1, t7, t7 \
VSLDB $8, h0, h0, h0 \ // set up [h0/1/2, h3/4/5]
VSLDB $8, h1, h1, h1 \
VSLDB $8, h2, h2, h2 \
VO h0, h3, h3 \
VO h1, h4, h4 \
VO h2, h5, h5 \
VESRLG $44, h3, t0 \ // 44 bit shift right
VESRLG $44, h4, t1 \
VESRLG $42, h5, t2 \
VN t6, h3, h3 \ // clear carry bits
VN t6, h4, h4 \
VN t7, h5, h5 \
VESLG $2, t2, t9 \ // multiply carry by 5
VAQ t9, t2, t2 \
VAQ t0, h4, h4 \
VAQ t1, h5, h5 \
VAQ t2, h3, h3 \
// carry h0->h1->h2->h0
// input: h0, h1, h2
// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8
// output: h0, h1, h2
#define REDUCE2(h0, h1, h2, t0, t1, t2, t3, t4, t5, t6, t7, t8) \
VLEIB $7, $0x28, t3 \ // 5 byte shift mask
VREPIB $4, t4 \ // 4 bit shift mask
VREPIB $2, t7 \ // 2 bit shift mask
VGBM $0x003F, t5 \ // mask to clear carry bits
VSRLB t3, h0, t0 \
VSRLB t3, h1, t1 \
VSRLB t3, h2, t2 \
VESRLG $4, t5, t5 \ // 44 bit clear mask
VSRL t4, t0, t0 \
VSRL t4, t1, t1 \
VSRL t7, t2, t2 \
VESRLG $2, t5, t6 \ // 42 bit clear mask
VESLG $2, t2, t8 \
VAQ t8, t2, t2 \
VN t5, h0, h0 \
VN t5, h1, h1 \
VN t6, h2, h2 \
VAQ t0, h1, h1 \
VAQ t1, h2, h2 \
VAQ t2, h0, h0 \
VSRLB t3, h0, t0 \
VSRLB t3, h1, t1 \
VSRLB t3, h2, t2 \
VSRL t4, t0, t0 \
VSRL t4, t1, t1 \
VSRL t7, t2, t2 \
VN t5, h0, h0 \
VN t5, h1, h1 \
VESLG $2, t2, t8 \
VN t6, h2, h2 \
VAQ t0, h1, h1 \
VAQ t8, t2, t2 \
VAQ t1, h2, h2 \
VAQ t2, h0, h0 \
// expands two message blocks into the lower halfs of the d registers
// moves the contents of the d registers into upper halfs
// input: in1, in2, d0, d1, d2, d3, d4, d5
// temp: TEMP0, TEMP1, TEMP2, TEMP3
// output: d0, d1, d2, d3, d4, d5
#define EXPACC(in1, in2, d0, d1, d2, d3, d4, d5, TEMP0, TEMP1, TEMP2, TEMP3) \
VGBM $0xff3f, TEMP0 \
VGBM $0xff1f, TEMP1 \
VESLG $4, d1, TEMP2 \
VESLG $4, d4, TEMP3 \
VESRLG $4, TEMP0, TEMP0 \
VPERM in1, d0, EX0, d0 \
VPERM in2, d3, EX0, d3 \
VPERM in1, d2, EX2, d2 \
VPERM in2, d5, EX2, d5 \
VPERM in1, TEMP2, EX1, d1 \
VPERM in2, TEMP3, EX1, d4 \
VN TEMP0, d0, d0 \
VN TEMP0, d3, d3 \
VESRLG $4, d1, d1 \
VESRLG $4, d4, d4 \
VN TEMP1, d2, d2 \
VN TEMP1, d5, d5 \
VN TEMP0, d1, d1 \
VN TEMP0, d4, d4 \
// expands one message block into the lower halfs of the d registers
// moves the contents of the d registers into upper halfs
// input: in, d0, d1, d2
// temp: TEMP0, TEMP1, TEMP2
// output: d0, d1, d2
#define EXPACC2(in, d0, d1, d2, TEMP0, TEMP1, TEMP2) \
VGBM $0xff3f, TEMP0 \
VESLG $4, d1, TEMP2 \
VGBM $0xff1f, TEMP1 \
VPERM in, d0, EX0, d0 \
VESRLG $4, TEMP0, TEMP0 \
VPERM in, d2, EX2, d2 \
VPERM in, TEMP2, EX1, d1 \
VN TEMP0, d0, d0 \
VN TEMP1, d2, d2 \
VESRLG $4, d1, d1 \
VN TEMP0, d1, d1 \
// pack h2:h0 into h1:h0 (no carry)
// input: h0, h1, h2
// output: h0, h1, h2
#define PACK(h0, h1, h2) \
VMRLG h1, h2, h2 \ // copy h1 to upper half h2
VESLG $44, h1, h1 \ // shift limb 1 44 bits, leaving 20
VO h0, h1, h0 \ // combine h0 with 20 bits from limb 1
VESRLG $20, h2, h1 \ // put top 24 bits of limb 1 into h1
VLEIG $1, $0, h1 \ // clear h2 stuff from lower half of h1
VO h0, h1, h0 \ // h0 now has 88 bits (limb 0 and 1)
VLEIG $0, $0, h2 \ // clear upper half of h2
VESRLG $40, h2, h1 \ // h1 now has upper two bits of result
VLEIB $7, $88, h1 \ // for byte shift (11 bytes)
VSLB h1, h2, h2 \ // shift h2 11 bytes to the left
VO h0, h2, h0 \ // combine h0 with 20 bits from limb 1
VLEIG $0, $0, h1 \ // clear upper half of h1
// if h > 2**130-5 then h -= 2**130-5
// input: h0, h1
// temp: t0, t1, t2
// output: h0
#define MOD(h0, h1, t0, t1, t2) \
VZERO t0 \
VLEIG $1, $5, t0 \
VACCQ h0, t0, t1 \
VAQ h0, t0, t0 \
VONE t2 \
VLEIG $1, $-4, t2 \
VAQ t2, t1, t1 \
VACCQ h1, t1, t1 \
VONE t2 \
VAQ t2, t1, t1 \
VN h0, t1, t2 \
VNC t0, t1, t1 \
VO t1, t2, h0 \
// func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]key)
TEXT ·poly1305vmsl(SB), $0-32
// This code processes 6 + up to 4 blocks (32 bytes) per iteration
// using the algorithm described in:
// NEON crypto, Daniel J. Bernstein & Peter Schwabe
// https://cryptojedi.org/papers/neoncrypto-20120320.pdf
// And as moddified for VMSL as described in
// Accelerating Poly1305 Cryptographic Message Authentication on the z14
// O'Farrell et al, CASCON 2017, p48-55
// https://ibm.ent.box.com/s/jf9gedj0e9d2vjctfyh186shaztavnht
LMG out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key
VZERO V0 // c
// load EX0, EX1 and EX2
MOVD $·constants<>(SB), R5
VLM (R5), EX0, EX2 // c
// setup r
VL (R4), T_0
MOVD $·keyMask<>(SB), R6
VL (R6), T_1
VN T_0, T_1, T_0
VZERO T_2 // limbs for r
VZERO T_3
VZERO T_4
EXPACC2(T_0, T_2, T_3, T_4, T_1, T_5, T_7)
// T_2, T_3, T_4: [0, r]
// setup r*20
VLEIG $0, $0, T_0
VLEIG $1, $20, T_0 // T_0: [0, 20]
VZERO T_5
VZERO T_6
VMSLG T_0, T_3, T_5, T_5
VMSLG T_0, T_4, T_6, T_6
// store r for final block in GR
VLGVG $1, T_2, RSAVE_0 // c
VLGVG $1, T_3, RSAVE_1 // c
VLGVG $1, T_4, RSAVE_2 // c
VLGVG $1, T_5, R5SAVE_1 // c
VLGVG $1, T_6, R5SAVE_2 // c
// initialize h
VZERO H0_0
VZERO H1_0
VZERO H2_0
VZERO H0_1
VZERO H1_1
VZERO H2_1
// initialize pointer for reduce constants
MOVD $·reduce<>(SB), R12
// calculate r**2 and 20*(r**2)
VZERO R_0
VZERO R_1
VZERO R_2
SQUARE(T_2, T_3, T_4, T_6, R_0, R_1, R_2, T_1, T_5, T_7)
REDUCE2(R_0, R_1, R_2, M0, M1, M2, M3, M4, R5_1, R5_2, M5, T_1)
VZERO R5_1
VZERO R5_2
VMSLG T_0, R_1, R5_1, R5_1
VMSLG T_0, R_2, R5_2, R5_2
// skip r**4 calculation if 3 blocks or less
CMPBLE R3, $48, b4
// calculate r**4 and 20*(r**4)
VZERO T_8
VZERO T_9
VZERO T_10
SQUARE(R_0, R_1, R_2, R5_2, T_8, T_9, T_10, T_1, T_5, T_7)
REDUCE2(T_8, T_9, T_10, M0, M1, M2, M3, M4, T_2, T_3, M5, T_1)
VZERO T_2
VZERO T_3
VMSLG T_0, T_9, T_2, T_2
VMSLG T_0, T_10, T_3, T_3
// put r**2 to the right and r**4 to the left of R_0, R_1, R_2
VSLDB $8, T_8, T_8, T_8
VSLDB $8, T_9, T_9, T_9
VSLDB $8, T_10, T_10, T_10
VSLDB $8, T_2, T_2, T_2
VSLDB $8, T_3, T_3, T_3
VO T_8, R_0, R_0
VO T_9, R_1, R_1
VO T_10, R_2, R_2
VO T_2, R5_1, R5_1
VO T_3, R5_2, R5_2
CMPBLE R3, $80, load // less than or equal to 5 blocks in message
// 6(or 5+1) blocks
SUB $81, R3
VLM (R2), M0, M4
VLL R3, 80(R2), M5
ADD $1, R3
MOVBZ $1, R0
CMPBGE R3, $16, 2(PC)
VLVGB R3, R0, M5
MOVD $96(R2), R2
EXPACC(M0, M1, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
EXPACC(M2, M3, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
VLEIB $2, $1, H2_0
VLEIB $2, $1, H2_1
VLEIB $10, $1, H2_0
VLEIB $10, $1, H2_1
VZERO M0
VZERO M1
VZERO M2
VZERO M3
VZERO T_4
VZERO T_10
EXPACC(M4, M5, M0, M1, M2, M3, T_4, T_10, T_0, T_1, T_2, T_3)
VLR T_4, M4
VLEIB $10, $1, M2
CMPBLT R3, $16, 2(PC)
VLEIB $10, $1, T_10
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M2, M3, M4, T_4, T_5, T_2, T_7, T_8, T_9)
VMRHG V0, H0_1, H0_0
VMRHG V0, H1_1, H1_0
VMRHG V0, H2_1, H2_0
VMRLG V0, H0_1, H0_1
VMRLG V0, H1_1, H1_1
VMRLG V0, H2_1, H2_1
SUB $16, R3
CMPBLE R3, $0, square
load:
// load EX0, EX1 and EX2
MOVD $·c<>(SB), R5
VLM (R5), EX0, EX2
loop:
CMPBLE R3, $64, add // b4 // last 4 or less blocks left
// next 4 full blocks
VLM (R2), M2, M5
SUB $64, R3
MOVD $64(R2), R2
REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, T_0, T_1, T_3, T_4, T_5, T_2, T_7, T_8, T_9)
// expacc in-lined to create [m2, m3] limbs
VGBM $0x3f3f, T_0 // 44 bit clear mask
VGBM $0x1f1f, T_1 // 40 bit clear mask
VPERM M2, M3, EX0, T_3
VESRLG $4, T_0, T_0 // 44 bit clear mask ready
VPERM M2, M3, EX1, T_4
VPERM M2, M3, EX2, T_5
VN T_0, T_3, T_3
VESRLG $4, T_4, T_4
VN T_1, T_5, T_5
VN T_0, T_4, T_4
VMRHG H0_1, T_3, H0_0
VMRHG H1_1, T_4, H1_0
VMRHG H2_1, T_5, H2_0
VMRLG H0_1, T_3, H0_1
VMRLG H1_1, T_4, H1_1
VMRLG H2_1, T_5, H2_1
VLEIB $10, $1, H2_0
VLEIB $10, $1, H2_1
VPERM M4, M5, EX0, T_3
VPERM M4, M5, EX1, T_4
VPERM M4, M5, EX2, T_5
VN T_0, T_3, T_3
VESRLG $4, T_4, T_4
VN T_1, T_5, T_5
VN T_0, T_4, T_4
VMRHG V0, T_3, M0
VMRHG V0, T_4, M1
VMRHG V0, T_5, M2
VMRLG V0, T_3, M3
VMRLG V0, T_4, M4
VMRLG V0, T_5, M5
VLEIB $10, $1, M2
VLEIB $10, $1, M5
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
CMPBNE R3, $0, loop
REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
VMRHG V0, H0_1, H0_0
VMRHG V0, H1_1, H1_0
VMRHG V0, H2_1, H2_0
VMRLG V0, H0_1, H0_1
VMRLG V0, H1_1, H1_1
VMRLG V0, H2_1, H2_1
// load EX0, EX1, EX2
MOVD $·constants<>(SB), R5
VLM (R5), EX0, EX2
// sum vectors
VAQ H0_0, H0_1, H0_0
VAQ H1_0, H1_1, H1_0
VAQ H2_0, H2_1, H2_0
// h may be >= 2*(2**130-5) so we need to reduce it again
// M0...M4 are used as temps here
REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
next: // carry h1->h2
VLEIB $7, $0x28, T_1
VREPIB $4, T_2
VGBM $0x003F, T_3
VESRLG $4, T_3
// byte shift
VSRLB T_1, H1_0, T_4
// bit shift
VSRL T_2, T_4, T_4
// clear h1 carry bits
VN T_3, H1_0, H1_0
// add carry
VAQ T_4, H2_0, H2_0
// h is now < 2*(2**130-5)
// pack h into h1 (hi) and h0 (lo)
PACK(H0_0, H1_0, H2_0)
// if h > 2**130-5 then h -= 2**130-5
MOD(H0_0, H1_0, T_0, T_1, T_2)
// h += s
MOVD $·bswapMask<>(SB), R5
VL (R5), T_1
VL 16(R4), T_0
VPERM T_0, T_0, T_1, T_0 // reverse bytes (to big)
VAQ T_0, H0_0, H0_0
VPERM H0_0, H0_0, T_1, H0_0 // reverse bytes (to little)
VST H0_0, (R1)
RET
add:
// load EX0, EX1, EX2
MOVD $·constants<>(SB), R5
VLM (R5), EX0, EX2
REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
VMRHG V0, H0_1, H0_0
VMRHG V0, H1_1, H1_0
VMRHG V0, H2_1, H2_0
VMRLG V0, H0_1, H0_1
VMRLG V0, H1_1, H1_1
VMRLG V0, H2_1, H2_1
CMPBLE R3, $64, b4
b4:
CMPBLE R3, $48, b3 // 3 blocks or less
// 4(3+1) blocks remaining
SUB $49, R3
VLM (R2), M0, M2
VLL R3, 48(R2), M3
ADD $1, R3
MOVBZ $1, R0
CMPBEQ R3, $16, 2(PC)
VLVGB R3, R0, M3
MOVD $64(R2), R2
EXPACC(M0, M1, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
VLEIB $10, $1, H2_0
VLEIB $10, $1, H2_1
VZERO M0
VZERO M1
VZERO M4
VZERO M5
VZERO T_4
VZERO T_10
EXPACC(M2, M3, M0, M1, M4, M5, T_4, T_10, T_0, T_1, T_2, T_3)
VLR T_4, M2
VLEIB $10, $1, M4
CMPBNE R3, $16, 2(PC)
VLEIB $10, $1, T_10
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M4, M5, M2, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
VMRHG V0, H0_1, H0_0
VMRHG V0, H1_1, H1_0
VMRHG V0, H2_1, H2_0
VMRLG V0, H0_1, H0_1
VMRLG V0, H1_1, H1_1
VMRLG V0, H2_1, H2_1
SUB $16, R3
CMPBLE R3, $0, square // this condition must always hold true!
b3:
CMPBLE R3, $32, b2
// 3 blocks remaining
// setup [r²,r]
VSLDB $8, R_0, R_0, R_0
VSLDB $8, R_1, R_1, R_1
VSLDB $8, R_2, R_2, R_2
VSLDB $8, R5_1, R5_1, R5_1
VSLDB $8, R5_2, R5_2, R5_2
VLVGG $1, RSAVE_0, R_0
VLVGG $1, RSAVE_1, R_1
VLVGG $1, RSAVE_2, R_2
VLVGG $1, R5SAVE_1, R5_1
VLVGG $1, R5SAVE_2, R5_2
// setup [h0, h1]
VSLDB $8, H0_0, H0_0, H0_0
VSLDB $8, H1_0, H1_0, H1_0
VSLDB $8, H2_0, H2_0, H2_0
VO H0_1, H0_0, H0_0
VO H1_1, H1_0, H1_0
VO H2_1, H2_0, H2_0
VZERO H0_1
VZERO H1_1
VZERO H2_1
VZERO M0
VZERO M1
VZERO M2
VZERO M3
VZERO M4
VZERO M5
// H*[r**2, r]
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, H0_1, H1_1, T_10, M5)
SUB $33, R3
VLM (R2), M0, M1
VLL R3, 32(R2), M2
ADD $1, R3
MOVBZ $1, R0
CMPBEQ R3, $16, 2(PC)
VLVGB R3, R0, M2
// H += m0
VZERO T_1
VZERO T_2
VZERO T_3
EXPACC2(M0, T_1, T_2, T_3, T_4, T_5, T_6)
VLEIB $10, $1, T_3
VAG H0_0, T_1, H0_0
VAG H1_0, T_2, H1_0
VAG H2_0, T_3, H2_0
VZERO M0
VZERO M3
VZERO M4
VZERO M5
VZERO T_10
// (H+m0)*r
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M3, M4, M5, V0, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE2(H0_0, H1_0, H2_0, M0, M3, M4, M5, T_10, H0_1, H1_1, H2_1, T_9)
// H += m1
VZERO V0
VZERO T_1
VZERO T_2
VZERO T_3
EXPACC2(M1, T_1, T_2, T_3, T_4, T_5, T_6)
VLEIB $10, $1, T_3
VAQ H0_0, T_1, H0_0
VAQ H1_0, T_2, H1_0
VAQ H2_0, T_3, H2_0
REDUCE2(H0_0, H1_0, H2_0, M0, M3, M4, M5, T_9, H0_1, H1_1, H2_1, T_10)
// [H, m2] * [r**2, r]
EXPACC2(M2, H0_0, H1_0, H2_0, T_1, T_2, T_3)
CMPBNE R3, $16, 2(PC)
VLEIB $10, $1, H2_0
VZERO M0
VZERO M1
VZERO M2
VZERO M3
VZERO M4
VZERO M5
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, H0_1, H1_1, M5, T_10)
SUB $16, R3
CMPBLE R3, $0, next // this condition must always hold true!
b2:
CMPBLE R3, $16, b1
// 2 blocks remaining
// setup [r²,r]
VSLDB $8, R_0, R_0, R_0
VSLDB $8, R_1, R_1, R_1
VSLDB $8, R_2, R_2, R_2
VSLDB $8, R5_1, R5_1, R5_1
VSLDB $8, R5_2, R5_2, R5_2
VLVGG $1, RSAVE_0, R_0
VLVGG $1, RSAVE_1, R_1
VLVGG $1, RSAVE_2, R_2
VLVGG $1, R5SAVE_1, R5_1
VLVGG $1, R5SAVE_2, R5_2
// setup [h0, h1]
VSLDB $8, H0_0, H0_0, H0_0
VSLDB $8, H1_0, H1_0, H1_0
VSLDB $8, H2_0, H2_0, H2_0
VO H0_1, H0_0, H0_0
VO H1_1, H1_0, H1_0
VO H2_1, H2_0, H2_0
VZERO H0_1
VZERO H1_1
VZERO H2_1
VZERO M0
VZERO M1
VZERO M2
VZERO M3
VZERO M4
VZERO M5
// H*[r**2, r]
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M2, M3, M4, T_4, T_5, T_2, T_7, T_8, T_9)
VMRHG V0, H0_1, H0_0
VMRHG V0, H1_1, H1_0
VMRHG V0, H2_1, H2_0
VMRLG V0, H0_1, H0_1
VMRLG V0, H1_1, H1_1
VMRLG V0, H2_1, H2_1
// move h to the left and 0s at the right
VSLDB $8, H0_0, H0_0, H0_0
VSLDB $8, H1_0, H1_0, H1_0
VSLDB $8, H2_0, H2_0, H2_0
// get message blocks and append 1 to start
SUB $17, R3
VL (R2), M0
VLL R3, 16(R2), M1
ADD $1, R3
MOVBZ $1, R0
CMPBEQ R3, $16, 2(PC)
VLVGB R3, R0, M1
VZERO T_6
VZERO T_7
VZERO T_8
EXPACC2(M0, T_6, T_7, T_8, T_1, T_2, T_3)
EXPACC2(M1, T_6, T_7, T_8, T_1, T_2, T_3)
VLEIB $2, $1, T_8
CMPBNE R3, $16, 2(PC)
VLEIB $10, $1, T_8
// add [m0, m1] to h
VAG H0_0, T_6, H0_0
VAG H1_0, T_7, H1_0
VAG H2_0, T_8, H2_0
VZERO M2
VZERO M3
VZERO M4
VZERO M5
VZERO T_10
VZERO M0
// at this point R_0 .. R5_2 look like [r**2, r]
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M2, M3, M4, M5, T_10, M0, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE2(H0_0, H1_0, H2_0, M2, M3, M4, M5, T_9, H0_1, H1_1, H2_1, T_10)
SUB $16, R3, R3
CMPBLE R3, $0, next
b1:
CMPBLE R3, $0, next
// 1 block remaining
// setup [r²,r]
VSLDB $8, R_0, R_0, R_0
VSLDB $8, R_1, R_1, R_1
VSLDB $8, R_2, R_2, R_2
VSLDB $8, R5_1, R5_1, R5_1
VSLDB $8, R5_2, R5_2, R5_2
VLVGG $1, RSAVE_0, R_0
VLVGG $1, RSAVE_1, R_1
VLVGG $1, RSAVE_2, R_2
VLVGG $1, R5SAVE_1, R5_1
VLVGG $1, R5SAVE_2, R5_2
// setup [h0, h1]
VSLDB $8, H0_0, H0_0, H0_0
VSLDB $8, H1_0, H1_0, H1_0
VSLDB $8, H2_0, H2_0, H2_0
VO H0_1, H0_0, H0_0
VO H1_1, H1_0, H1_0
VO H2_1, H2_0, H2_0
VZERO H0_1
VZERO H1_1
VZERO H2_1
VZERO M0
VZERO M1
VZERO M2
VZERO M3
VZERO M4
VZERO M5
// H*[r**2, r]
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
// set up [0, m0] limbs
SUB $1, R3
VLL R3, (R2), M0
ADD $1, R3
MOVBZ $1, R0
CMPBEQ R3, $16, 2(PC)
VLVGB R3, R0, M0
VZERO T_1
VZERO T_2
VZERO T_3
EXPACC2(M0, T_1, T_2, T_3, T_4, T_5, T_6)// limbs: [0, m]
CMPBNE R3, $16, 2(PC)
VLEIB $10, $1, T_3
// h+m0
VAQ H0_0, T_1, H0_0
VAQ H1_0, T_2, H1_0
VAQ H2_0, T_3, H2_0
VZERO M0
VZERO M1
VZERO M2
VZERO M3
VZERO M4
VZERO M5
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
BR next
square:
// setup [r²,r]
VSLDB $8, R_0, R_0, R_0
VSLDB $8, R_1, R_1, R_1
VSLDB $8, R_2, R_2, R_2
VSLDB $8, R5_1, R5_1, R5_1
VSLDB $8, R5_2, R5_2, R5_2
VLVGG $1, RSAVE_0, R_0
VLVGG $1, RSAVE_1, R_1
VLVGG $1, RSAVE_2, R_2
VLVGG $1, R5SAVE_1, R5_1
VLVGG $1, R5SAVE_2, R5_2
// setup [h0, h1]
VSLDB $8, H0_0, H0_0, H0_0
VSLDB $8, H1_0, H1_0, H1_0
VSLDB $8, H2_0, H2_0, H2_0
VO H0_1, H0_0, H0_0
VO H1_1, H1_0, H1_0
VO H2_1, H2_0, H2_0
VZERO H0_1
VZERO H1_1
VZERO H2_1
VZERO M0
VZERO M1
VZERO M2
VZERO M3
VZERO M4
VZERO M5
// (h0*r**2) + (h1*r)
MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
BR next

124
vendor/golang.org/x/crypto/ripemd160/ripemd160.go generated vendored Normal file
View File

@@ -0,0 +1,124 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ripemd160 implements the RIPEMD-160 hash algorithm.
//
// Deprecated: RIPEMD-160 is a legacy hash and should not be used for new
// applications. Also, this package does not and will not provide an optimized
// implementation. Instead, use a modern hash like SHA-256 (from crypto/sha256).
package ripemd160 // import "golang.org/x/crypto/ripemd160"
// RIPEMD-160 is designed by Hans Dobbertin, Antoon Bosselaers, and Bart
// Preneel with specifications available at:
// http://homes.esat.kuleuven.be/~cosicart/pdf/AB-9601/AB-9601.pdf.
import (
"crypto"
"hash"
)
func init() {
crypto.RegisterHash(crypto.RIPEMD160, New)
}
// The size of the checksum in bytes.
const Size = 20
// The block size of the hash algorithm in bytes.
const BlockSize = 64
const (
_s0 = 0x67452301
_s1 = 0xefcdab89
_s2 = 0x98badcfe
_s3 = 0x10325476
_s4 = 0xc3d2e1f0
)
// digest represents the partial evaluation of a checksum.
type digest struct {
s [5]uint32 // running context
x [BlockSize]byte // temporary buffer
nx int // index into x
tc uint64 // total count of bytes processed
}
func (d *digest) Reset() {
d.s[0], d.s[1], d.s[2], d.s[3], d.s[4] = _s0, _s1, _s2, _s3, _s4
d.nx = 0
d.tc = 0
}
// New returns a new hash.Hash computing the checksum.
func New() hash.Hash {
result := new(digest)
result.Reset()
return result
}
func (d *digest) Size() int { return Size }
func (d *digest) BlockSize() int { return BlockSize }
func (d *digest) Write(p []byte) (nn int, err error) {
nn = len(p)
d.tc += uint64(nn)
if d.nx > 0 {
n := len(p)
if n > BlockSize-d.nx {
n = BlockSize - d.nx
}
for i := 0; i < n; i++ {
d.x[d.nx+i] = p[i]
}
d.nx += n
if d.nx == BlockSize {
_Block(d, d.x[0:])
d.nx = 0
}
p = p[n:]
}
n := _Block(d, p)
p = p[n:]
if len(p) > 0 {
d.nx = copy(d.x[:], p)
}
return
}
func (d0 *digest) Sum(in []byte) []byte {
// Make a copy of d0 so that caller can keep writing and summing.
d := *d0
// Padding. Add a 1 bit and 0 bits until 56 bytes mod 64.
tc := d.tc
var tmp [64]byte
tmp[0] = 0x80
if tc%64 < 56 {
d.Write(tmp[0 : 56-tc%64])
} else {
d.Write(tmp[0 : 64+56-tc%64])
}
// Length in bits.
tc <<= 3
for i := uint(0); i < 8; i++ {
tmp[i] = byte(tc >> (8 * i))
}
d.Write(tmp[0:8])
if d.nx != 0 {
panic("d.nx != 0")
}
var digest [Size]byte
for i, s := range d.s {
digest[i*4] = byte(s)
digest[i*4+1] = byte(s >> 8)
digest[i*4+2] = byte(s >> 16)
digest[i*4+3] = byte(s >> 24)
}
return append(in, digest[:]...)
}

165
vendor/golang.org/x/crypto/ripemd160/ripemd160block.go generated vendored Normal file
View File

@@ -0,0 +1,165 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// RIPEMD-160 block step.
// In its own file so that a faster assembly or C version
// can be substituted easily.
package ripemd160
import (
"math/bits"
)
// work buffer indices and roll amounts for one line
var _n = [80]uint{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8,
3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12,
1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2,
4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13,
}
var _r = [80]uint{
11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8,
7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12,
11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5,
11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12,
9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6,
}
// same for the other parallel one
var n_ = [80]uint{
5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12,
6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2,
15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13,
8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14,
12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11,
}
var r_ = [80]uint{
8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6,
9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11,
9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5,
15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8,
8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11,
}
func _Block(md *digest, p []byte) int {
n := 0
var x [16]uint32
var alpha, beta uint32
for len(p) >= BlockSize {
a, b, c, d, e := md.s[0], md.s[1], md.s[2], md.s[3], md.s[4]
aa, bb, cc, dd, ee := a, b, c, d, e
j := 0
for i := 0; i < 16; i++ {
x[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
j += 4
}
// round 1
i := 0
for i < 16 {
alpha = a + (b ^ c ^ d) + x[_n[i]]
s := int(_r[i])
alpha = bits.RotateLeft32(alpha, s) + e
beta = bits.RotateLeft32(c, 10)
a, b, c, d, e = e, alpha, b, beta, d
// parallel line
alpha = aa + (bb ^ (cc | ^dd)) + x[n_[i]] + 0x50a28be6
s = int(r_[i])
alpha = bits.RotateLeft32(alpha, s) + ee
beta = bits.RotateLeft32(cc, 10)
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
i++
}
// round 2
for i < 32 {
alpha = a + (b&c | ^b&d) + x[_n[i]] + 0x5a827999
s := int(_r[i])
alpha = bits.RotateLeft32(alpha, s) + e
beta = bits.RotateLeft32(c, 10)
a, b, c, d, e = e, alpha, b, beta, d
// parallel line
alpha = aa + (bb&dd | cc&^dd) + x[n_[i]] + 0x5c4dd124
s = int(r_[i])
alpha = bits.RotateLeft32(alpha, s) + ee
beta = bits.RotateLeft32(cc, 10)
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
i++
}
// round 3
for i < 48 {
alpha = a + (b | ^c ^ d) + x[_n[i]] + 0x6ed9eba1
s := int(_r[i])
alpha = bits.RotateLeft32(alpha, s) + e
beta = bits.RotateLeft32(c, 10)
a, b, c, d, e = e, alpha, b, beta, d
// parallel line
alpha = aa + (bb | ^cc ^ dd) + x[n_[i]] + 0x6d703ef3
s = int(r_[i])
alpha = bits.RotateLeft32(alpha, s) + ee
beta = bits.RotateLeft32(cc, 10)
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
i++
}
// round 4
for i < 64 {
alpha = a + (b&d | c&^d) + x[_n[i]] + 0x8f1bbcdc
s := int(_r[i])
alpha = bits.RotateLeft32(alpha, s) + e
beta = bits.RotateLeft32(c, 10)
a, b, c, d, e = e, alpha, b, beta, d
// parallel line
alpha = aa + (bb&cc | ^bb&dd) + x[n_[i]] + 0x7a6d76e9
s = int(r_[i])
alpha = bits.RotateLeft32(alpha, s) + ee
beta = bits.RotateLeft32(cc, 10)
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
i++
}
// round 5
for i < 80 {
alpha = a + (b ^ (c | ^d)) + x[_n[i]] + 0xa953fd4e
s := int(_r[i])
alpha = bits.RotateLeft32(alpha, s) + e
beta = bits.RotateLeft32(c, 10)
a, b, c, d, e = e, alpha, b, beta, d
// parallel line
alpha = aa + (bb ^ cc ^ dd) + x[n_[i]]
s = int(r_[i])
alpha = bits.RotateLeft32(alpha, s) + ee
beta = bits.RotateLeft32(cc, 10)
aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
i++
}
// combine results
dd += c + md.s[1]
md.s[1] = md.s[2] + d + ee
md.s[2] = md.s[3] + e + aa
md.s[3] = md.s[4] + a + bb
md.s[4] = md.s[0] + b + cc
md.s[0] = dd
p = p[BlockSize:]
n += BlockSize
}
return n
}

144
vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go generated vendored Normal file
View File

@@ -0,0 +1,144 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package salsa provides low-level access to functions in the Salsa family.
package salsa // import "golang.org/x/crypto/salsa20/salsa"
// Sigma is the Salsa20 constant for 256-bit keys.
var Sigma = [16]byte{'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'}
// HSalsa20 applies the HSalsa20 core function to a 16-byte input in, 32-byte
// key k, and 16-byte constant c, and puts the result into the 32-byte array
// out.
func HSalsa20(out *[32]byte, in *[16]byte, k *[32]byte, c *[16]byte) {
x0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24
x1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24
x2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24
x3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24
x4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24
x5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24
x6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
x7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
x8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
x9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
x10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24
x11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24
x12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24
x13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24
x14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24
x15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24
for i := 0; i < 20; i += 2 {
u := x0 + x12
x4 ^= u<<7 | u>>(32-7)
u = x4 + x0
x8 ^= u<<9 | u>>(32-9)
u = x8 + x4
x12 ^= u<<13 | u>>(32-13)
u = x12 + x8
x0 ^= u<<18 | u>>(32-18)
u = x5 + x1
x9 ^= u<<7 | u>>(32-7)
u = x9 + x5
x13 ^= u<<9 | u>>(32-9)
u = x13 + x9
x1 ^= u<<13 | u>>(32-13)
u = x1 + x13
x5 ^= u<<18 | u>>(32-18)
u = x10 + x6
x14 ^= u<<7 | u>>(32-7)
u = x14 + x10
x2 ^= u<<9 | u>>(32-9)
u = x2 + x14
x6 ^= u<<13 | u>>(32-13)
u = x6 + x2
x10 ^= u<<18 | u>>(32-18)
u = x15 + x11
x3 ^= u<<7 | u>>(32-7)
u = x3 + x15
x7 ^= u<<9 | u>>(32-9)
u = x7 + x3
x11 ^= u<<13 | u>>(32-13)
u = x11 + x7
x15 ^= u<<18 | u>>(32-18)
u = x0 + x3
x1 ^= u<<7 | u>>(32-7)
u = x1 + x0
x2 ^= u<<9 | u>>(32-9)
u = x2 + x1
x3 ^= u<<13 | u>>(32-13)
u = x3 + x2
x0 ^= u<<18 | u>>(32-18)
u = x5 + x4
x6 ^= u<<7 | u>>(32-7)
u = x6 + x5
x7 ^= u<<9 | u>>(32-9)
u = x7 + x6
x4 ^= u<<13 | u>>(32-13)
u = x4 + x7
x5 ^= u<<18 | u>>(32-18)
u = x10 + x9
x11 ^= u<<7 | u>>(32-7)
u = x11 + x10
x8 ^= u<<9 | u>>(32-9)
u = x8 + x11
x9 ^= u<<13 | u>>(32-13)
u = x9 + x8
x10 ^= u<<18 | u>>(32-18)
u = x15 + x14
x12 ^= u<<7 | u>>(32-7)
u = x12 + x15
x13 ^= u<<9 | u>>(32-9)
u = x13 + x12
x14 ^= u<<13 | u>>(32-13)
u = x14 + x13
x15 ^= u<<18 | u>>(32-18)
}
out[0] = byte(x0)
out[1] = byte(x0 >> 8)
out[2] = byte(x0 >> 16)
out[3] = byte(x0 >> 24)
out[4] = byte(x5)
out[5] = byte(x5 >> 8)
out[6] = byte(x5 >> 16)
out[7] = byte(x5 >> 24)
out[8] = byte(x10)
out[9] = byte(x10 >> 8)
out[10] = byte(x10 >> 16)
out[11] = byte(x10 >> 24)
out[12] = byte(x15)
out[13] = byte(x15 >> 8)
out[14] = byte(x15 >> 16)
out[15] = byte(x15 >> 24)
out[16] = byte(x6)
out[17] = byte(x6 >> 8)
out[18] = byte(x6 >> 16)
out[19] = byte(x6 >> 24)
out[20] = byte(x7)
out[21] = byte(x7 >> 8)
out[22] = byte(x7 >> 16)
out[23] = byte(x7 >> 24)
out[24] = byte(x8)
out[25] = byte(x8 >> 8)
out[26] = byte(x8 >> 16)
out[27] = byte(x8 >> 24)
out[28] = byte(x9)
out[29] = byte(x9 >> 8)
out[30] = byte(x9 >> 16)
out[31] = byte(x9 >> 24)
}

199
vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go generated vendored Normal file
View File

@@ -0,0 +1,199 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package salsa
// Core208 applies the Salsa20/8 core function to the 64-byte array in and puts
// the result into the 64-byte array out. The input and output may be the same array.
func Core208(out *[64]byte, in *[64]byte) {
j0 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
j1 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
j2 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
j3 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
j4 := uint32(in[16]) | uint32(in[17])<<8 | uint32(in[18])<<16 | uint32(in[19])<<24
j5 := uint32(in[20]) | uint32(in[21])<<8 | uint32(in[22])<<16 | uint32(in[23])<<24
j6 := uint32(in[24]) | uint32(in[25])<<8 | uint32(in[26])<<16 | uint32(in[27])<<24
j7 := uint32(in[28]) | uint32(in[29])<<8 | uint32(in[30])<<16 | uint32(in[31])<<24
j8 := uint32(in[32]) | uint32(in[33])<<8 | uint32(in[34])<<16 | uint32(in[35])<<24
j9 := uint32(in[36]) | uint32(in[37])<<8 | uint32(in[38])<<16 | uint32(in[39])<<24
j10 := uint32(in[40]) | uint32(in[41])<<8 | uint32(in[42])<<16 | uint32(in[43])<<24
j11 := uint32(in[44]) | uint32(in[45])<<8 | uint32(in[46])<<16 | uint32(in[47])<<24
j12 := uint32(in[48]) | uint32(in[49])<<8 | uint32(in[50])<<16 | uint32(in[51])<<24
j13 := uint32(in[52]) | uint32(in[53])<<8 | uint32(in[54])<<16 | uint32(in[55])<<24
j14 := uint32(in[56]) | uint32(in[57])<<8 | uint32(in[58])<<16 | uint32(in[59])<<24
j15 := uint32(in[60]) | uint32(in[61])<<8 | uint32(in[62])<<16 | uint32(in[63])<<24
x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8
x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15
for i := 0; i < 8; i += 2 {
u := x0 + x12
x4 ^= u<<7 | u>>(32-7)
u = x4 + x0
x8 ^= u<<9 | u>>(32-9)
u = x8 + x4
x12 ^= u<<13 | u>>(32-13)
u = x12 + x8
x0 ^= u<<18 | u>>(32-18)
u = x5 + x1
x9 ^= u<<7 | u>>(32-7)
u = x9 + x5
x13 ^= u<<9 | u>>(32-9)
u = x13 + x9
x1 ^= u<<13 | u>>(32-13)
u = x1 + x13
x5 ^= u<<18 | u>>(32-18)
u = x10 + x6
x14 ^= u<<7 | u>>(32-7)
u = x14 + x10
x2 ^= u<<9 | u>>(32-9)
u = x2 + x14
x6 ^= u<<13 | u>>(32-13)
u = x6 + x2
x10 ^= u<<18 | u>>(32-18)
u = x15 + x11
x3 ^= u<<7 | u>>(32-7)
u = x3 + x15
x7 ^= u<<9 | u>>(32-9)
u = x7 + x3
x11 ^= u<<13 | u>>(32-13)
u = x11 + x7
x15 ^= u<<18 | u>>(32-18)
u = x0 + x3
x1 ^= u<<7 | u>>(32-7)
u = x1 + x0
x2 ^= u<<9 | u>>(32-9)
u = x2 + x1
x3 ^= u<<13 | u>>(32-13)
u = x3 + x2
x0 ^= u<<18 | u>>(32-18)
u = x5 + x4
x6 ^= u<<7 | u>>(32-7)
u = x6 + x5
x7 ^= u<<9 | u>>(32-9)
u = x7 + x6
x4 ^= u<<13 | u>>(32-13)
u = x4 + x7
x5 ^= u<<18 | u>>(32-18)
u = x10 + x9
x11 ^= u<<7 | u>>(32-7)
u = x11 + x10
x8 ^= u<<9 | u>>(32-9)
u = x8 + x11
x9 ^= u<<13 | u>>(32-13)
u = x9 + x8
x10 ^= u<<18 | u>>(32-18)
u = x15 + x14
x12 ^= u<<7 | u>>(32-7)
u = x12 + x15
x13 ^= u<<9 | u>>(32-9)
u = x13 + x12
x14 ^= u<<13 | u>>(32-13)
u = x14 + x13
x15 ^= u<<18 | u>>(32-18)
}
x0 += j0
x1 += j1
x2 += j2
x3 += j3
x4 += j4
x5 += j5
x6 += j6
x7 += j7
x8 += j8
x9 += j9
x10 += j10
x11 += j11
x12 += j12
x13 += j13
x14 += j14
x15 += j15
out[0] = byte(x0)
out[1] = byte(x0 >> 8)
out[2] = byte(x0 >> 16)
out[3] = byte(x0 >> 24)
out[4] = byte(x1)
out[5] = byte(x1 >> 8)
out[6] = byte(x1 >> 16)
out[7] = byte(x1 >> 24)
out[8] = byte(x2)
out[9] = byte(x2 >> 8)
out[10] = byte(x2 >> 16)
out[11] = byte(x2 >> 24)
out[12] = byte(x3)
out[13] = byte(x3 >> 8)
out[14] = byte(x3 >> 16)
out[15] = byte(x3 >> 24)
out[16] = byte(x4)
out[17] = byte(x4 >> 8)
out[18] = byte(x4 >> 16)
out[19] = byte(x4 >> 24)
out[20] = byte(x5)
out[21] = byte(x5 >> 8)
out[22] = byte(x5 >> 16)
out[23] = byte(x5 >> 24)
out[24] = byte(x6)
out[25] = byte(x6 >> 8)
out[26] = byte(x6 >> 16)
out[27] = byte(x6 >> 24)
out[28] = byte(x7)
out[29] = byte(x7 >> 8)
out[30] = byte(x7 >> 16)
out[31] = byte(x7 >> 24)
out[32] = byte(x8)
out[33] = byte(x8 >> 8)
out[34] = byte(x8 >> 16)
out[35] = byte(x8 >> 24)
out[36] = byte(x9)
out[37] = byte(x9 >> 8)
out[38] = byte(x9 >> 16)
out[39] = byte(x9 >> 24)
out[40] = byte(x10)
out[41] = byte(x10 >> 8)
out[42] = byte(x10 >> 16)
out[43] = byte(x10 >> 24)
out[44] = byte(x11)
out[45] = byte(x11 >> 8)
out[46] = byte(x11 >> 16)
out[47] = byte(x11 >> 24)
out[48] = byte(x12)
out[49] = byte(x12 >> 8)
out[50] = byte(x12 >> 16)
out[51] = byte(x12 >> 24)
out[52] = byte(x13)
out[53] = byte(x13 >> 8)
out[54] = byte(x13 >> 16)
out[55] = byte(x13 >> 24)
out[56] = byte(x14)
out[57] = byte(x14 >> 8)
out[58] = byte(x14 >> 16)
out[59] = byte(x14 >> 24)
out[60] = byte(x15)
out[61] = byte(x15 >> 8)
out[62] = byte(x15 >> 16)
out[63] = byte(x15 >> 24)
}

View File

@@ -0,0 +1,23 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!appengine,!gccgo
package salsa
//go:noescape
// salsa2020XORKeyStream is implemented in salsa20_amd64.s.
func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
// XORKeyStream crypts bytes from in to out using the given key and counters.
// In and out must overlap entirely or not at all. Counter
// contains the raw salsa20 counter bytes (both nonce and block counter).
func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
if len(in) == 0 {
return
}
_ = out[len(in)-1]
salsa2020XORKeyStream(&out[0], &in[0], uint64(len(in)), &counter[0], &key[0])
}

View File

@@ -0,0 +1,883 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!appengine,!gccgo
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
// This needs up to 64 bytes at 360(SP); hence the non-obvious frame size.
TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
MOVQ out+0(FP),DI
MOVQ in+8(FP),SI
MOVQ n+16(FP),DX
MOVQ nonce+24(FP),CX
MOVQ key+32(FP),R8
MOVQ SP,R12
MOVQ SP,R9
ADDQ $31, R9
ANDQ $~31, R9
MOVQ R9, SP
MOVQ DX,R9
MOVQ CX,DX
MOVQ R8,R10
CMPQ R9,$0
JBE DONE
START:
MOVL 20(R10),CX
MOVL 0(R10),R8
MOVL 0(DX),AX
MOVL 16(R10),R11
MOVL CX,0(SP)
MOVL R8, 4 (SP)
MOVL AX, 8 (SP)
MOVL R11, 12 (SP)
MOVL 8(DX),CX
MOVL 24(R10),R8
MOVL 4(R10),AX
MOVL 4(DX),R11
MOVL CX,16(SP)
MOVL R8, 20 (SP)
MOVL AX, 24 (SP)
MOVL R11, 28 (SP)
MOVL 12(DX),CX
MOVL 12(R10),DX
MOVL 28(R10),R8
MOVL 8(R10),AX
MOVL DX,32(SP)
MOVL CX, 36 (SP)
MOVL R8, 40 (SP)
MOVL AX, 44 (SP)
MOVQ $1634760805,DX
MOVQ $857760878,CX
MOVQ $2036477234,R8
MOVQ $1797285236,AX
MOVL DX,48(SP)
MOVL CX, 52 (SP)
MOVL R8, 56 (SP)
MOVL AX, 60 (SP)
CMPQ R9,$256
JB BYTESBETWEEN1AND255
MOVOA 48(SP),X0
PSHUFL $0X55,X0,X1
PSHUFL $0XAA,X0,X2
PSHUFL $0XFF,X0,X3
PSHUFL $0X00,X0,X0
MOVOA X1,64(SP)
MOVOA X2,80(SP)
MOVOA X3,96(SP)
MOVOA X0,112(SP)
MOVOA 0(SP),X0
PSHUFL $0XAA,X0,X1
PSHUFL $0XFF,X0,X2
PSHUFL $0X00,X0,X3
PSHUFL $0X55,X0,X0
MOVOA X1,128(SP)
MOVOA X2,144(SP)
MOVOA X3,160(SP)
MOVOA X0,176(SP)
MOVOA 16(SP),X0
PSHUFL $0XFF,X0,X1
PSHUFL $0X55,X0,X2
PSHUFL $0XAA,X0,X0
MOVOA X1,192(SP)
MOVOA X2,208(SP)
MOVOA X0,224(SP)
MOVOA 32(SP),X0
PSHUFL $0X00,X0,X1
PSHUFL $0XAA,X0,X2
PSHUFL $0XFF,X0,X0
MOVOA X1,240(SP)
MOVOA X2,256(SP)
MOVOA X0,272(SP)
BYTESATLEAST256:
MOVL 16(SP),DX
MOVL 36 (SP),CX
MOVL DX,288(SP)
MOVL CX,304(SP)
SHLQ $32,CX
ADDQ CX,DX
ADDQ $1,DX
MOVQ DX,CX
SHRQ $32,CX
MOVL DX, 292 (SP)
MOVL CX, 308 (SP)
ADDQ $1,DX
MOVQ DX,CX
SHRQ $32,CX
MOVL DX, 296 (SP)
MOVL CX, 312 (SP)
ADDQ $1,DX
MOVQ DX,CX
SHRQ $32,CX
MOVL DX, 300 (SP)
MOVL CX, 316 (SP)
ADDQ $1,DX
MOVQ DX,CX
SHRQ $32,CX
MOVL DX,16(SP)
MOVL CX, 36 (SP)
MOVQ R9,352(SP)
MOVQ $20,DX
MOVOA 64(SP),X0
MOVOA 80(SP),X1
MOVOA 96(SP),X2
MOVOA 256(SP),X3
MOVOA 272(SP),X4
MOVOA 128(SP),X5
MOVOA 144(SP),X6
MOVOA 176(SP),X7
MOVOA 192(SP),X8
MOVOA 208(SP),X9
MOVOA 224(SP),X10
MOVOA 304(SP),X11
MOVOA 112(SP),X12
MOVOA 160(SP),X13
MOVOA 240(SP),X14
MOVOA 288(SP),X15
MAINLOOP1:
MOVOA X1,320(SP)
MOVOA X2,336(SP)
MOVOA X13,X1
PADDL X12,X1
MOVOA X1,X2
PSLLL $7,X1
PXOR X1,X14
PSRLL $25,X2
PXOR X2,X14
MOVOA X7,X1
PADDL X0,X1
MOVOA X1,X2
PSLLL $7,X1
PXOR X1,X11
PSRLL $25,X2
PXOR X2,X11
MOVOA X12,X1
PADDL X14,X1
MOVOA X1,X2
PSLLL $9,X1
PXOR X1,X15
PSRLL $23,X2
PXOR X2,X15
MOVOA X0,X1
PADDL X11,X1
MOVOA X1,X2
PSLLL $9,X1
PXOR X1,X9
PSRLL $23,X2
PXOR X2,X9
MOVOA X14,X1
PADDL X15,X1
MOVOA X1,X2
PSLLL $13,X1
PXOR X1,X13
PSRLL $19,X2
PXOR X2,X13
MOVOA X11,X1
PADDL X9,X1
MOVOA X1,X2
PSLLL $13,X1
PXOR X1,X7
PSRLL $19,X2
PXOR X2,X7
MOVOA X15,X1
PADDL X13,X1
MOVOA X1,X2
PSLLL $18,X1
PXOR X1,X12
PSRLL $14,X2
PXOR X2,X12
MOVOA 320(SP),X1
MOVOA X12,320(SP)
MOVOA X9,X2
PADDL X7,X2
MOVOA X2,X12
PSLLL $18,X2
PXOR X2,X0
PSRLL $14,X12
PXOR X12,X0
MOVOA X5,X2
PADDL X1,X2
MOVOA X2,X12
PSLLL $7,X2
PXOR X2,X3
PSRLL $25,X12
PXOR X12,X3
MOVOA 336(SP),X2
MOVOA X0,336(SP)
MOVOA X6,X0
PADDL X2,X0
MOVOA X0,X12
PSLLL $7,X0
PXOR X0,X4
PSRLL $25,X12
PXOR X12,X4
MOVOA X1,X0
PADDL X3,X0
MOVOA X0,X12
PSLLL $9,X0
PXOR X0,X10
PSRLL $23,X12
PXOR X12,X10
MOVOA X2,X0
PADDL X4,X0
MOVOA X0,X12
PSLLL $9,X0
PXOR X0,X8
PSRLL $23,X12
PXOR X12,X8
MOVOA X3,X0
PADDL X10,X0
MOVOA X0,X12
PSLLL $13,X0
PXOR X0,X5
PSRLL $19,X12
PXOR X12,X5
MOVOA X4,X0
PADDL X8,X0
MOVOA X0,X12
PSLLL $13,X0
PXOR X0,X6
PSRLL $19,X12
PXOR X12,X6
MOVOA X10,X0
PADDL X5,X0
MOVOA X0,X12
PSLLL $18,X0
PXOR X0,X1
PSRLL $14,X12
PXOR X12,X1
MOVOA 320(SP),X0
MOVOA X1,320(SP)
MOVOA X4,X1
PADDL X0,X1
MOVOA X1,X12
PSLLL $7,X1
PXOR X1,X7
PSRLL $25,X12
PXOR X12,X7
MOVOA X8,X1
PADDL X6,X1
MOVOA X1,X12
PSLLL $18,X1
PXOR X1,X2
PSRLL $14,X12
PXOR X12,X2
MOVOA 336(SP),X12
MOVOA X2,336(SP)
MOVOA X14,X1
PADDL X12,X1
MOVOA X1,X2
PSLLL $7,X1
PXOR X1,X5
PSRLL $25,X2
PXOR X2,X5
MOVOA X0,X1
PADDL X7,X1
MOVOA X1,X2
PSLLL $9,X1
PXOR X1,X10
PSRLL $23,X2
PXOR X2,X10
MOVOA X12,X1
PADDL X5,X1
MOVOA X1,X2
PSLLL $9,X1
PXOR X1,X8
PSRLL $23,X2
PXOR X2,X8
MOVOA X7,X1
PADDL X10,X1
MOVOA X1,X2
PSLLL $13,X1
PXOR X1,X4
PSRLL $19,X2
PXOR X2,X4
MOVOA X5,X1
PADDL X8,X1
MOVOA X1,X2
PSLLL $13,X1
PXOR X1,X14
PSRLL $19,X2
PXOR X2,X14
MOVOA X10,X1
PADDL X4,X1
MOVOA X1,X2
PSLLL $18,X1
PXOR X1,X0
PSRLL $14,X2
PXOR X2,X0
MOVOA 320(SP),X1
MOVOA X0,320(SP)
MOVOA X8,X0
PADDL X14,X0
MOVOA X0,X2
PSLLL $18,X0
PXOR X0,X12
PSRLL $14,X2
PXOR X2,X12
MOVOA X11,X0
PADDL X1,X0
MOVOA X0,X2
PSLLL $7,X0
PXOR X0,X6
PSRLL $25,X2
PXOR X2,X6
MOVOA 336(SP),X2
MOVOA X12,336(SP)
MOVOA X3,X0
PADDL X2,X0
MOVOA X0,X12
PSLLL $7,X0
PXOR X0,X13
PSRLL $25,X12
PXOR X12,X13
MOVOA X1,X0
PADDL X6,X0
MOVOA X0,X12
PSLLL $9,X0
PXOR X0,X15
PSRLL $23,X12
PXOR X12,X15
MOVOA X2,X0
PADDL X13,X0
MOVOA X0,X12
PSLLL $9,X0
PXOR X0,X9
PSRLL $23,X12
PXOR X12,X9
MOVOA X6,X0
PADDL X15,X0
MOVOA X0,X12
PSLLL $13,X0
PXOR X0,X11
PSRLL $19,X12
PXOR X12,X11
MOVOA X13,X0
PADDL X9,X0
MOVOA X0,X12
PSLLL $13,X0
PXOR X0,X3
PSRLL $19,X12
PXOR X12,X3
MOVOA X15,X0
PADDL X11,X0
MOVOA X0,X12
PSLLL $18,X0
PXOR X0,X1
PSRLL $14,X12
PXOR X12,X1
MOVOA X9,X0
PADDL X3,X0
MOVOA X0,X12
PSLLL $18,X0
PXOR X0,X2
PSRLL $14,X12
PXOR X12,X2
MOVOA 320(SP),X12
MOVOA 336(SP),X0
SUBQ $2,DX
JA MAINLOOP1
PADDL 112(SP),X12
PADDL 176(SP),X7
PADDL 224(SP),X10
PADDL 272(SP),X4
MOVD X12,DX
MOVD X7,CX
MOVD X10,R8
MOVD X4,R9
PSHUFL $0X39,X12,X12
PSHUFL $0X39,X7,X7
PSHUFL $0X39,X10,X10
PSHUFL $0X39,X4,X4
XORL 0(SI),DX
XORL 4(SI),CX
XORL 8(SI),R8
XORL 12(SI),R9
MOVL DX,0(DI)
MOVL CX,4(DI)
MOVL R8,8(DI)
MOVL R9,12(DI)
MOVD X12,DX
MOVD X7,CX
MOVD X10,R8
MOVD X4,R9
PSHUFL $0X39,X12,X12
PSHUFL $0X39,X7,X7
PSHUFL $0X39,X10,X10
PSHUFL $0X39,X4,X4
XORL 64(SI),DX
XORL 68(SI),CX
XORL 72(SI),R8
XORL 76(SI),R9
MOVL DX,64(DI)
MOVL CX,68(DI)
MOVL R8,72(DI)
MOVL R9,76(DI)
MOVD X12,DX
MOVD X7,CX
MOVD X10,R8
MOVD X4,R9
PSHUFL $0X39,X12,X12
PSHUFL $0X39,X7,X7
PSHUFL $0X39,X10,X10
PSHUFL $0X39,X4,X4
XORL 128(SI),DX
XORL 132(SI),CX
XORL 136(SI),R8
XORL 140(SI),R9
MOVL DX,128(DI)
MOVL CX,132(DI)
MOVL R8,136(DI)
MOVL R9,140(DI)
MOVD X12,DX
MOVD X7,CX
MOVD X10,R8
MOVD X4,R9
XORL 192(SI),DX
XORL 196(SI),CX
XORL 200(SI),R8
XORL 204(SI),R9
MOVL DX,192(DI)
MOVL CX,196(DI)
MOVL R8,200(DI)
MOVL R9,204(DI)
PADDL 240(SP),X14
PADDL 64(SP),X0
PADDL 128(SP),X5
PADDL 192(SP),X8
MOVD X14,DX
MOVD X0,CX
MOVD X5,R8
MOVD X8,R9
PSHUFL $0X39,X14,X14
PSHUFL $0X39,X0,X0
PSHUFL $0X39,X5,X5
PSHUFL $0X39,X8,X8
XORL 16(SI),DX
XORL 20(SI),CX
XORL 24(SI),R8
XORL 28(SI),R9
MOVL DX,16(DI)
MOVL CX,20(DI)
MOVL R8,24(DI)
MOVL R9,28(DI)
MOVD X14,DX
MOVD X0,CX
MOVD X5,R8
MOVD X8,R9
PSHUFL $0X39,X14,X14
PSHUFL $0X39,X0,X0
PSHUFL $0X39,X5,X5
PSHUFL $0X39,X8,X8
XORL 80(SI),DX
XORL 84(SI),CX
XORL 88(SI),R8
XORL 92(SI),R9
MOVL DX,80(DI)
MOVL CX,84(DI)
MOVL R8,88(DI)
MOVL R9,92(DI)
MOVD X14,DX
MOVD X0,CX
MOVD X5,R8
MOVD X8,R9
PSHUFL $0X39,X14,X14
PSHUFL $0X39,X0,X0
PSHUFL $0X39,X5,X5
PSHUFL $0X39,X8,X8
XORL 144(SI),DX
XORL 148(SI),CX
XORL 152(SI),R8
XORL 156(SI),R9
MOVL DX,144(DI)
MOVL CX,148(DI)
MOVL R8,152(DI)
MOVL R9,156(DI)
MOVD X14,DX
MOVD X0,CX
MOVD X5,R8
MOVD X8,R9
XORL 208(SI),DX
XORL 212(SI),CX
XORL 216(SI),R8
XORL 220(SI),R9
MOVL DX,208(DI)
MOVL CX,212(DI)
MOVL R8,216(DI)
MOVL R9,220(DI)
PADDL 288(SP),X15
PADDL 304(SP),X11
PADDL 80(SP),X1
PADDL 144(SP),X6
MOVD X15,DX
MOVD X11,CX
MOVD X1,R8
MOVD X6,R9
PSHUFL $0X39,X15,X15
PSHUFL $0X39,X11,X11
PSHUFL $0X39,X1,X1
PSHUFL $0X39,X6,X6
XORL 32(SI),DX
XORL 36(SI),CX
XORL 40(SI),R8
XORL 44(SI),R9
MOVL DX,32(DI)
MOVL CX,36(DI)
MOVL R8,40(DI)
MOVL R9,44(DI)
MOVD X15,DX
MOVD X11,CX
MOVD X1,R8
MOVD X6,R9
PSHUFL $0X39,X15,X15
PSHUFL $0X39,X11,X11
PSHUFL $0X39,X1,X1
PSHUFL $0X39,X6,X6
XORL 96(SI),DX
XORL 100(SI),CX
XORL 104(SI),R8
XORL 108(SI),R9
MOVL DX,96(DI)
MOVL CX,100(DI)
MOVL R8,104(DI)
MOVL R9,108(DI)
MOVD X15,DX
MOVD X11,CX
MOVD X1,R8
MOVD X6,R9
PSHUFL $0X39,X15,X15
PSHUFL $0X39,X11,X11
PSHUFL $0X39,X1,X1
PSHUFL $0X39,X6,X6
XORL 160(SI),DX
XORL 164(SI),CX
XORL 168(SI),R8
XORL 172(SI),R9
MOVL DX,160(DI)
MOVL CX,164(DI)
MOVL R8,168(DI)
MOVL R9,172(DI)
MOVD X15,DX
MOVD X11,CX
MOVD X1,R8
MOVD X6,R9
XORL 224(SI),DX
XORL 228(SI),CX
XORL 232(SI),R8
XORL 236(SI),R9
MOVL DX,224(DI)
MOVL CX,228(DI)
MOVL R8,232(DI)
MOVL R9,236(DI)
PADDL 160(SP),X13
PADDL 208(SP),X9
PADDL 256(SP),X3
PADDL 96(SP),X2
MOVD X13,DX
MOVD X9,CX
MOVD X3,R8
MOVD X2,R9
PSHUFL $0X39,X13,X13
PSHUFL $0X39,X9,X9
PSHUFL $0X39,X3,X3
PSHUFL $0X39,X2,X2
XORL 48(SI),DX
XORL 52(SI),CX
XORL 56(SI),R8
XORL 60(SI),R9
MOVL DX,48(DI)
MOVL CX,52(DI)
MOVL R8,56(DI)
MOVL R9,60(DI)
MOVD X13,DX
MOVD X9,CX
MOVD X3,R8
MOVD X2,R9
PSHUFL $0X39,X13,X13
PSHUFL $0X39,X9,X9
PSHUFL $0X39,X3,X3
PSHUFL $0X39,X2,X2
XORL 112(SI),DX
XORL 116(SI),CX
XORL 120(SI),R8
XORL 124(SI),R9
MOVL DX,112(DI)
MOVL CX,116(DI)
MOVL R8,120(DI)
MOVL R9,124(DI)
MOVD X13,DX
MOVD X9,CX
MOVD X3,R8
MOVD X2,R9
PSHUFL $0X39,X13,X13
PSHUFL $0X39,X9,X9
PSHUFL $0X39,X3,X3
PSHUFL $0X39,X2,X2
XORL 176(SI),DX
XORL 180(SI),CX
XORL 184(SI),R8
XORL 188(SI),R9
MOVL DX,176(DI)
MOVL CX,180(DI)
MOVL R8,184(DI)
MOVL R9,188(DI)
MOVD X13,DX
MOVD X9,CX
MOVD X3,R8
MOVD X2,R9
XORL 240(SI),DX
XORL 244(SI),CX
XORL 248(SI),R8
XORL 252(SI),R9
MOVL DX,240(DI)
MOVL CX,244(DI)
MOVL R8,248(DI)
MOVL R9,252(DI)
MOVQ 352(SP),R9
SUBQ $256,R9
ADDQ $256,SI
ADDQ $256,DI
CMPQ R9,$256
JAE BYTESATLEAST256
CMPQ R9,$0
JBE DONE
BYTESBETWEEN1AND255:
CMPQ R9,$64
JAE NOCOPY
MOVQ DI,DX
LEAQ 360(SP),DI
MOVQ R9,CX
REP; MOVSB
LEAQ 360(SP),DI
LEAQ 360(SP),SI
NOCOPY:
MOVQ R9,352(SP)
MOVOA 48(SP),X0
MOVOA 0(SP),X1
MOVOA 16(SP),X2
MOVOA 32(SP),X3
MOVOA X1,X4
MOVQ $20,CX
MAINLOOP2:
PADDL X0,X4
MOVOA X0,X5
MOVOA X4,X6
PSLLL $7,X4
PSRLL $25,X6
PXOR X4,X3
PXOR X6,X3
PADDL X3,X5
MOVOA X3,X4
MOVOA X5,X6
PSLLL $9,X5
PSRLL $23,X6
PXOR X5,X2
PSHUFL $0X93,X3,X3
PXOR X6,X2
PADDL X2,X4
MOVOA X2,X5
MOVOA X4,X6
PSLLL $13,X4
PSRLL $19,X6
PXOR X4,X1
PSHUFL $0X4E,X2,X2
PXOR X6,X1
PADDL X1,X5
MOVOA X3,X4
MOVOA X5,X6
PSLLL $18,X5
PSRLL $14,X6
PXOR X5,X0
PSHUFL $0X39,X1,X1
PXOR X6,X0
PADDL X0,X4
MOVOA X0,X5
MOVOA X4,X6
PSLLL $7,X4
PSRLL $25,X6
PXOR X4,X1
PXOR X6,X1
PADDL X1,X5
MOVOA X1,X4
MOVOA X5,X6
PSLLL $9,X5
PSRLL $23,X6
PXOR X5,X2
PSHUFL $0X93,X1,X1
PXOR X6,X2
PADDL X2,X4
MOVOA X2,X5
MOVOA X4,X6
PSLLL $13,X4
PSRLL $19,X6
PXOR X4,X3
PSHUFL $0X4E,X2,X2
PXOR X6,X3
PADDL X3,X5
MOVOA X1,X4
MOVOA X5,X6
PSLLL $18,X5
PSRLL $14,X6
PXOR X5,X0
PSHUFL $0X39,X3,X3
PXOR X6,X0
PADDL X0,X4
MOVOA X0,X5
MOVOA X4,X6
PSLLL $7,X4
PSRLL $25,X6
PXOR X4,X3
PXOR X6,X3
PADDL X3,X5
MOVOA X3,X4
MOVOA X5,X6
PSLLL $9,X5
PSRLL $23,X6
PXOR X5,X2
PSHUFL $0X93,X3,X3
PXOR X6,X2
PADDL X2,X4
MOVOA X2,X5
MOVOA X4,X6
PSLLL $13,X4
PSRLL $19,X6
PXOR X4,X1
PSHUFL $0X4E,X2,X2
PXOR X6,X1
PADDL X1,X5
MOVOA X3,X4
MOVOA X5,X6
PSLLL $18,X5
PSRLL $14,X6
PXOR X5,X0
PSHUFL $0X39,X1,X1
PXOR X6,X0
PADDL X0,X4
MOVOA X0,X5
MOVOA X4,X6
PSLLL $7,X4
PSRLL $25,X6
PXOR X4,X1
PXOR X6,X1
PADDL X1,X5
MOVOA X1,X4
MOVOA X5,X6
PSLLL $9,X5
PSRLL $23,X6
PXOR X5,X2
PSHUFL $0X93,X1,X1
PXOR X6,X2
PADDL X2,X4
MOVOA X2,X5
MOVOA X4,X6
PSLLL $13,X4
PSRLL $19,X6
PXOR X4,X3
PSHUFL $0X4E,X2,X2
PXOR X6,X3
SUBQ $4,CX
PADDL X3,X5
MOVOA X1,X4
MOVOA X5,X6
PSLLL $18,X5
PXOR X7,X7
PSRLL $14,X6
PXOR X5,X0
PSHUFL $0X39,X3,X3
PXOR X6,X0
JA MAINLOOP2
PADDL 48(SP),X0
PADDL 0(SP),X1
PADDL 16(SP),X2
PADDL 32(SP),X3
MOVD X0,CX
MOVD X1,R8
MOVD X2,R9
MOVD X3,AX
PSHUFL $0X39,X0,X0
PSHUFL $0X39,X1,X1
PSHUFL $0X39,X2,X2
PSHUFL $0X39,X3,X3
XORL 0(SI),CX
XORL 48(SI),R8
XORL 32(SI),R9
XORL 16(SI),AX
MOVL CX,0(DI)
MOVL R8,48(DI)
MOVL R9,32(DI)
MOVL AX,16(DI)
MOVD X0,CX
MOVD X1,R8
MOVD X2,R9
MOVD X3,AX
PSHUFL $0X39,X0,X0
PSHUFL $0X39,X1,X1
PSHUFL $0X39,X2,X2
PSHUFL $0X39,X3,X3
XORL 20(SI),CX
XORL 4(SI),R8
XORL 52(SI),R9
XORL 36(SI),AX
MOVL CX,20(DI)
MOVL R8,4(DI)
MOVL R9,52(DI)
MOVL AX,36(DI)
MOVD X0,CX
MOVD X1,R8
MOVD X2,R9
MOVD X3,AX
PSHUFL $0X39,X0,X0
PSHUFL $0X39,X1,X1
PSHUFL $0X39,X2,X2
PSHUFL $0X39,X3,X3
XORL 40(SI),CX
XORL 24(SI),R8
XORL 8(SI),R9
XORL 56(SI),AX
MOVL CX,40(DI)
MOVL R8,24(DI)
MOVL R9,8(DI)
MOVL AX,56(DI)
MOVD X0,CX
MOVD X1,R8
MOVD X2,R9
MOVD X3,AX
XORL 60(SI),CX
XORL 44(SI),R8
XORL 28(SI),R9
XORL 12(SI),AX
MOVL CX,60(DI)
MOVL R8,44(DI)
MOVL R9,28(DI)
MOVL AX,12(DI)
MOVQ 352(SP),R9
MOVL 16(SP),CX
MOVL 36 (SP),R8
ADDQ $1,CX
SHLQ $32,R8
ADDQ R8,CX
MOVQ CX,R8
SHRQ $32,R8
MOVL CX,16(SP)
MOVL R8, 36 (SP)
CMPQ R9,$64
JA BYTESATLEAST65
JAE BYTESATLEAST64
MOVQ DI,SI
MOVQ DX,DI
MOVQ R9,CX
REP; MOVSB
BYTESATLEAST64:
DONE:
MOVQ R12,SP
RET
BYTESATLEAST65:
SUBQ $64,R9
ADDQ $64,DI
ADDQ $64,SI
JMP BYTESBETWEEN1AND255

View File

@@ -0,0 +1,14 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 appengine gccgo
package salsa
// XORKeyStream crypts bytes from in to out using the given key and counters.
// In and out must overlap entirely or not at all. Counter
// contains the raw salsa20 counter bytes (both nonce and block counter).
func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
genericXORKeyStream(out, in, counter, key)
}

231
vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go generated vendored Normal file
View File

@@ -0,0 +1,231 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package salsa
const rounds = 20
// core applies the Salsa20 core function to 16-byte input in, 32-byte key k,
// and 16-byte constant c, and puts the result into 64-byte array out.
func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) {
j0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24
j1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24
j2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24
j3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24
j4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24
j5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24
j6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
j7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
j8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
j9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
j10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24
j11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24
j12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24
j13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24
j14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24
j15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24
x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8
x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15
for i := 0; i < rounds; i += 2 {
u := x0 + x12
x4 ^= u<<7 | u>>(32-7)
u = x4 + x0
x8 ^= u<<9 | u>>(32-9)
u = x8 + x4
x12 ^= u<<13 | u>>(32-13)
u = x12 + x8
x0 ^= u<<18 | u>>(32-18)
u = x5 + x1
x9 ^= u<<7 | u>>(32-7)
u = x9 + x5
x13 ^= u<<9 | u>>(32-9)
u = x13 + x9
x1 ^= u<<13 | u>>(32-13)
u = x1 + x13
x5 ^= u<<18 | u>>(32-18)
u = x10 + x6
x14 ^= u<<7 | u>>(32-7)
u = x14 + x10
x2 ^= u<<9 | u>>(32-9)
u = x2 + x14
x6 ^= u<<13 | u>>(32-13)
u = x6 + x2
x10 ^= u<<18 | u>>(32-18)
u = x15 + x11
x3 ^= u<<7 | u>>(32-7)
u = x3 + x15
x7 ^= u<<9 | u>>(32-9)
u = x7 + x3
x11 ^= u<<13 | u>>(32-13)
u = x11 + x7
x15 ^= u<<18 | u>>(32-18)
u = x0 + x3
x1 ^= u<<7 | u>>(32-7)
u = x1 + x0
x2 ^= u<<9 | u>>(32-9)
u = x2 + x1
x3 ^= u<<13 | u>>(32-13)
u = x3 + x2
x0 ^= u<<18 | u>>(32-18)
u = x5 + x4
x6 ^= u<<7 | u>>(32-7)
u = x6 + x5
x7 ^= u<<9 | u>>(32-9)
u = x7 + x6
x4 ^= u<<13 | u>>(32-13)
u = x4 + x7
x5 ^= u<<18 | u>>(32-18)
u = x10 + x9
x11 ^= u<<7 | u>>(32-7)
u = x11 + x10
x8 ^= u<<9 | u>>(32-9)
u = x8 + x11
x9 ^= u<<13 | u>>(32-13)
u = x9 + x8
x10 ^= u<<18 | u>>(32-18)
u = x15 + x14
x12 ^= u<<7 | u>>(32-7)
u = x12 + x15
x13 ^= u<<9 | u>>(32-9)
u = x13 + x12
x14 ^= u<<13 | u>>(32-13)
u = x14 + x13
x15 ^= u<<18 | u>>(32-18)
}
x0 += j0
x1 += j1
x2 += j2
x3 += j3
x4 += j4
x5 += j5
x6 += j6
x7 += j7
x8 += j8
x9 += j9
x10 += j10
x11 += j11
x12 += j12
x13 += j13
x14 += j14
x15 += j15
out[0] = byte(x0)
out[1] = byte(x0 >> 8)
out[2] = byte(x0 >> 16)
out[3] = byte(x0 >> 24)
out[4] = byte(x1)
out[5] = byte(x1 >> 8)
out[6] = byte(x1 >> 16)
out[7] = byte(x1 >> 24)
out[8] = byte(x2)
out[9] = byte(x2 >> 8)
out[10] = byte(x2 >> 16)
out[11] = byte(x2 >> 24)
out[12] = byte(x3)
out[13] = byte(x3 >> 8)
out[14] = byte(x3 >> 16)
out[15] = byte(x3 >> 24)
out[16] = byte(x4)
out[17] = byte(x4 >> 8)
out[18] = byte(x4 >> 16)
out[19] = byte(x4 >> 24)
out[20] = byte(x5)
out[21] = byte(x5 >> 8)
out[22] = byte(x5 >> 16)
out[23] = byte(x5 >> 24)
out[24] = byte(x6)
out[25] = byte(x6 >> 8)
out[26] = byte(x6 >> 16)
out[27] = byte(x6 >> 24)
out[28] = byte(x7)
out[29] = byte(x7 >> 8)
out[30] = byte(x7 >> 16)
out[31] = byte(x7 >> 24)
out[32] = byte(x8)
out[33] = byte(x8 >> 8)
out[34] = byte(x8 >> 16)
out[35] = byte(x8 >> 24)
out[36] = byte(x9)
out[37] = byte(x9 >> 8)
out[38] = byte(x9 >> 16)
out[39] = byte(x9 >> 24)
out[40] = byte(x10)
out[41] = byte(x10 >> 8)
out[42] = byte(x10 >> 16)
out[43] = byte(x10 >> 24)
out[44] = byte(x11)
out[45] = byte(x11 >> 8)
out[46] = byte(x11 >> 16)
out[47] = byte(x11 >> 24)
out[48] = byte(x12)
out[49] = byte(x12 >> 8)
out[50] = byte(x12 >> 16)
out[51] = byte(x12 >> 24)
out[52] = byte(x13)
out[53] = byte(x13 >> 8)
out[54] = byte(x13 >> 16)
out[55] = byte(x13 >> 24)
out[56] = byte(x14)
out[57] = byte(x14 >> 8)
out[58] = byte(x14 >> 16)
out[59] = byte(x14 >> 24)
out[60] = byte(x15)
out[61] = byte(x15 >> 8)
out[62] = byte(x15 >> 16)
out[63] = byte(x15 >> 24)
}
// genericXORKeyStream is the generic implementation of XORKeyStream to be used
// when no assembly implementation is available.
func genericXORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
var block [64]byte
var counterCopy [16]byte
copy(counterCopy[:], counter[:])
for len(in) >= 64 {
core(&block, &counterCopy, key, &Sigma)
for i, x := range block {
out[i] = in[i] ^ x
}
u := uint32(1)
for i := 8; i < 16; i++ {
u += uint32(counterCopy[i])
counterCopy[i] = byte(u)
u >>= 8
}
in = in[64:]
out = out[64:]
}
if len(in) > 0 {
core(&block, &counterCopy, key, &Sigma)
for i, v := range in {
out[i] = v ^ block[i]
}
}
}

213
vendor/golang.org/x/crypto/scrypt/scrypt.go generated vendored Normal file
View File

@@ -0,0 +1,213 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package scrypt implements the scrypt key derivation function as defined in
// Colin Percival's paper "Stronger Key Derivation via Sequential Memory-Hard
// Functions" (https://www.tarsnap.com/scrypt/scrypt.pdf).
package scrypt // import "golang.org/x/crypto/scrypt"
import (
"crypto/sha256"
"errors"
"math/bits"
"golang.org/x/crypto/pbkdf2"
)
const maxInt = int(^uint(0) >> 1)
// blockCopy copies n numbers from src into dst.
func blockCopy(dst, src []uint32, n int) {
copy(dst, src[:n])
}
// blockXOR XORs numbers from dst with n numbers from src.
func blockXOR(dst, src []uint32, n int) {
for i, v := range src[:n] {
dst[i] ^= v
}
}
// salsaXOR applies Salsa20/8 to the XOR of 16 numbers from tmp and in,
// and puts the result into both tmp and out.
func salsaXOR(tmp *[16]uint32, in, out []uint32) {
w0 := tmp[0] ^ in[0]
w1 := tmp[1] ^ in[1]
w2 := tmp[2] ^ in[2]
w3 := tmp[3] ^ in[3]
w4 := tmp[4] ^ in[4]
w5 := tmp[5] ^ in[5]
w6 := tmp[6] ^ in[6]
w7 := tmp[7] ^ in[7]
w8 := tmp[8] ^ in[8]
w9 := tmp[9] ^ in[9]
w10 := tmp[10] ^ in[10]
w11 := tmp[11] ^ in[11]
w12 := tmp[12] ^ in[12]
w13 := tmp[13] ^ in[13]
w14 := tmp[14] ^ in[14]
w15 := tmp[15] ^ in[15]
x0, x1, x2, x3, x4, x5, x6, x7, x8 := w0, w1, w2, w3, w4, w5, w6, w7, w8
x9, x10, x11, x12, x13, x14, x15 := w9, w10, w11, w12, w13, w14, w15
for i := 0; i < 8; i += 2 {
x4 ^= bits.RotateLeft32(x0+x12, 7)
x8 ^= bits.RotateLeft32(x4+x0, 9)
x12 ^= bits.RotateLeft32(x8+x4, 13)
x0 ^= bits.RotateLeft32(x12+x8, 18)
x9 ^= bits.RotateLeft32(x5+x1, 7)
x13 ^= bits.RotateLeft32(x9+x5, 9)
x1 ^= bits.RotateLeft32(x13+x9, 13)
x5 ^= bits.RotateLeft32(x1+x13, 18)
x14 ^= bits.RotateLeft32(x10+x6, 7)
x2 ^= bits.RotateLeft32(x14+x10, 9)
x6 ^= bits.RotateLeft32(x2+x14, 13)
x10 ^= bits.RotateLeft32(x6+x2, 18)
x3 ^= bits.RotateLeft32(x15+x11, 7)
x7 ^= bits.RotateLeft32(x3+x15, 9)
x11 ^= bits.RotateLeft32(x7+x3, 13)
x15 ^= bits.RotateLeft32(x11+x7, 18)
x1 ^= bits.RotateLeft32(x0+x3, 7)
x2 ^= bits.RotateLeft32(x1+x0, 9)
x3 ^= bits.RotateLeft32(x2+x1, 13)
x0 ^= bits.RotateLeft32(x3+x2, 18)
x6 ^= bits.RotateLeft32(x5+x4, 7)
x7 ^= bits.RotateLeft32(x6+x5, 9)
x4 ^= bits.RotateLeft32(x7+x6, 13)
x5 ^= bits.RotateLeft32(x4+x7, 18)
x11 ^= bits.RotateLeft32(x10+x9, 7)
x8 ^= bits.RotateLeft32(x11+x10, 9)
x9 ^= bits.RotateLeft32(x8+x11, 13)
x10 ^= bits.RotateLeft32(x9+x8, 18)
x12 ^= bits.RotateLeft32(x15+x14, 7)
x13 ^= bits.RotateLeft32(x12+x15, 9)
x14 ^= bits.RotateLeft32(x13+x12, 13)
x15 ^= bits.RotateLeft32(x14+x13, 18)
}
x0 += w0
x1 += w1
x2 += w2
x3 += w3
x4 += w4
x5 += w5
x6 += w6
x7 += w7
x8 += w8
x9 += w9
x10 += w10
x11 += w11
x12 += w12
x13 += w13
x14 += w14
x15 += w15
out[0], tmp[0] = x0, x0
out[1], tmp[1] = x1, x1
out[2], tmp[2] = x2, x2
out[3], tmp[3] = x3, x3
out[4], tmp[4] = x4, x4
out[5], tmp[5] = x5, x5
out[6], tmp[6] = x6, x6
out[7], tmp[7] = x7, x7
out[8], tmp[8] = x8, x8
out[9], tmp[9] = x9, x9
out[10], tmp[10] = x10, x10
out[11], tmp[11] = x11, x11
out[12], tmp[12] = x12, x12
out[13], tmp[13] = x13, x13
out[14], tmp[14] = x14, x14
out[15], tmp[15] = x15, x15
}
func blockMix(tmp *[16]uint32, in, out []uint32, r int) {
blockCopy(tmp[:], in[(2*r-1)*16:], 16)
for i := 0; i < 2*r; i += 2 {
salsaXOR(tmp, in[i*16:], out[i*8:])
salsaXOR(tmp, in[i*16+16:], out[i*8+r*16:])
}
}
func integer(b []uint32, r int) uint64 {
j := (2*r - 1) * 16
return uint64(b[j]) | uint64(b[j+1])<<32
}
func smix(b []byte, r, N int, v, xy []uint32) {
var tmp [16]uint32
x := xy
y := xy[32*r:]
j := 0
for i := 0; i < 32*r; i++ {
x[i] = uint32(b[j]) | uint32(b[j+1])<<8 | uint32(b[j+2])<<16 | uint32(b[j+3])<<24
j += 4
}
for i := 0; i < N; i += 2 {
blockCopy(v[i*(32*r):], x, 32*r)
blockMix(&tmp, x, y, r)
blockCopy(v[(i+1)*(32*r):], y, 32*r)
blockMix(&tmp, y, x, r)
}
for i := 0; i < N; i += 2 {
j := int(integer(x, r) & uint64(N-1))
blockXOR(x, v[j*(32*r):], 32*r)
blockMix(&tmp, x, y, r)
j = int(integer(y, r) & uint64(N-1))
blockXOR(y, v[j*(32*r):], 32*r)
blockMix(&tmp, y, x, r)
}
j = 0
for _, v := range x[:32*r] {
b[j+0] = byte(v >> 0)
b[j+1] = byte(v >> 8)
b[j+2] = byte(v >> 16)
b[j+3] = byte(v >> 24)
j += 4
}
}
// Key derives a key from the password, salt, and cost parameters, returning
// a byte slice of length keyLen that can be used as cryptographic key.
//
// N is a CPU/memory cost parameter, which must be a power of two greater than 1.
// r and p must satisfy r * p < 2³⁰. If the parameters do not satisfy the
// limits, the function returns a nil byte slice and an error.
//
// For example, you can get a derived key for e.g. AES-256 (which needs a
// 32-byte key) by doing:
//
// dk, err := scrypt.Key([]byte("some password"), salt, 32768, 8, 1, 32)
//
// The recommended parameters for interactive logins as of 2017 are N=32768, r=8
// and p=1. The parameters N, r, and p should be increased as memory latency and
// CPU parallelism increases; consider setting N to the highest power of 2 you
// can derive within 100 milliseconds. Remember to get a good random salt.
func Key(password, salt []byte, N, r, p, keyLen int) ([]byte, error) {
if N <= 1 || N&(N-1) != 0 {
return nil, errors.New("scrypt: N must be > 1 and a power of 2")
}
if uint64(r)*uint64(p) >= 1<<30 || r > maxInt/128/p || r > maxInt/256 || N > maxInt/128/r {
return nil, errors.New("scrypt: parameters are too large")
}
xy := make([]uint32, 64*r)
v := make([]uint32, 32*N*r)
b := pbkdf2.Key(password, salt, 1, p*128*r, sha256.New)
for i := 0; i < p; i++ {
smix(b[i*128*r:], r, N, v, xy)
}
return pbkdf2.Key(password, b, 1, keyLen, sha256.New), nil
}