mirror of
https://github.com/minio/minio.git
synced 2025-11-09 05:34:56 -05:00
Further restructure
This commit is contained in:
@@ -1,261 +0,0 @@
|
||||
#ifndef ENTRY
|
||||
#define ENTRY(name) \
|
||||
.globl name ; \
|
||||
.align 4,0x90 ; \
|
||||
name:
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
#define END(name) \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#ifndef ENDPROC
|
||||
#define ENDPROC(name) \
|
||||
.type name, @function ; \
|
||||
END(name)
|
||||
#endif
|
||||
|
||||
#define NUM_INVALID 100
|
||||
|
||||
#define TYPE_R32 0
|
||||
#define TYPE_R64 1
|
||||
#define TYPE_XMM 2
|
||||
#define TYPE_INVALID 100
|
||||
|
||||
.macro R32_NUM opd r32
|
||||
\opd = NUM_INVALID
|
||||
.ifc \r32,%eax
|
||||
\opd = 0
|
||||
.endif
|
||||
.ifc \r32,%ecx
|
||||
\opd = 1
|
||||
.endif
|
||||
.ifc \r32,%edx
|
||||
\opd = 2
|
||||
.endif
|
||||
.ifc \r32,%ebx
|
||||
\opd = 3
|
||||
.endif
|
||||
.ifc \r32,%esp
|
||||
\opd = 4
|
||||
.endif
|
||||
.ifc \r32,%ebp
|
||||
\opd = 5
|
||||
.endif
|
||||
.ifc \r32,%esi
|
||||
\opd = 6
|
||||
.endif
|
||||
.ifc \r32,%edi
|
||||
\opd = 7
|
||||
.endif
|
||||
#ifdef X86_64
|
||||
.ifc \r32,%r8d
|
||||
\opd = 8
|
||||
.endif
|
||||
.ifc \r32,%r9d
|
||||
\opd = 9
|
||||
.endif
|
||||
.ifc \r32,%r10d
|
||||
\opd = 10
|
||||
.endif
|
||||
.ifc \r32,%r11d
|
||||
\opd = 11
|
||||
.endif
|
||||
.ifc \r32,%r12d
|
||||
\opd = 12
|
||||
.endif
|
||||
.ifc \r32,%r13d
|
||||
\opd = 13
|
||||
.endif
|
||||
.ifc \r32,%r14d
|
||||
\opd = 14
|
||||
.endif
|
||||
.ifc \r32,%r15d
|
||||
\opd = 15
|
||||
.endif
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro R64_NUM opd r64
|
||||
\opd = NUM_INVALID
|
||||
#ifdef X86_64
|
||||
.ifc \r64,%rax
|
||||
\opd = 0
|
||||
.endif
|
||||
.ifc \r64,%rcx
|
||||
\opd = 1
|
||||
.endif
|
||||
.ifc \r64,%rdx
|
||||
\opd = 2
|
||||
.endif
|
||||
.ifc \r64,%rbx
|
||||
\opd = 3
|
||||
.endif
|
||||
.ifc \r64,%rsp
|
||||
\opd = 4
|
||||
.endif
|
||||
.ifc \r64,%rbp
|
||||
\opd = 5
|
||||
.endif
|
||||
.ifc \r64,%rsi
|
||||
\opd = 6
|
||||
.endif
|
||||
.ifc \r64,%rdi
|
||||
\opd = 7
|
||||
.endif
|
||||
.ifc \r64,%r8
|
||||
\opd = 8
|
||||
.endif
|
||||
.ifc \r64,%r9
|
||||
\opd = 9
|
||||
.endif
|
||||
.ifc \r64,%r10
|
||||
\opd = 10
|
||||
.endif
|
||||
.ifc \r64,%r11
|
||||
\opd = 11
|
||||
.endif
|
||||
.ifc \r64,%r12
|
||||
\opd = 12
|
||||
.endif
|
||||
.ifc \r64,%r13
|
||||
\opd = 13
|
||||
.endif
|
||||
.ifc \r64,%r14
|
||||
\opd = 14
|
||||
.endif
|
||||
.ifc \r64,%r15
|
||||
\opd = 15
|
||||
.endif
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro XMM_NUM opd xmm
|
||||
\opd = NUM_INVALID
|
||||
.ifc \xmm,%xmm0
|
||||
\opd = 0
|
||||
.endif
|
||||
.ifc \xmm,%xmm1
|
||||
\opd = 1
|
||||
.endif
|
||||
.ifc \xmm,%xmm2
|
||||
\opd = 2
|
||||
.endif
|
||||
.ifc \xmm,%xmm3
|
||||
\opd = 3
|
||||
.endif
|
||||
.ifc \xmm,%xmm4
|
||||
\opd = 4
|
||||
.endif
|
||||
.ifc \xmm,%xmm5
|
||||
\opd = 5
|
||||
.endif
|
||||
.ifc \xmm,%xmm6
|
||||
\opd = 6
|
||||
.endif
|
||||
.ifc \xmm,%xmm7
|
||||
\opd = 7
|
||||
.endif
|
||||
.ifc \xmm,%xmm8
|
||||
\opd = 8
|
||||
.endif
|
||||
.ifc \xmm,%xmm9
|
||||
\opd = 9
|
||||
.endif
|
||||
.ifc \xmm,%xmm10
|
||||
\opd = 10
|
||||
.endif
|
||||
.ifc \xmm,%xmm11
|
||||
\opd = 11
|
||||
.endif
|
||||
.ifc \xmm,%xmm12
|
||||
\opd = 12
|
||||
.endif
|
||||
.ifc \xmm,%xmm13
|
||||
\opd = 13
|
||||
.endif
|
||||
.ifc \xmm,%xmm14
|
||||
\opd = 14
|
||||
.endif
|
||||
.ifc \xmm,%xmm15
|
||||
\opd = 15
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro TYPE type reg
|
||||
R32_NUM reg_type_r32 \reg
|
||||
R64_NUM reg_type_r64 \reg
|
||||
XMM_NUM reg_type_xmm \reg
|
||||
.if reg_type_r64 <> NUM_INVALID
|
||||
\type = TYPE_R64
|
||||
.elseif reg_type_r32 <> NUM_INVALID
|
||||
\type = TYPE_R32
|
||||
.elseif reg_type_xmm <> NUM_INVALID
|
||||
\type = TYPE_XMM
|
||||
.else
|
||||
\type = TYPE_INVALID
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro PFX_OPD_SIZE
|
||||
.byte 0x66
|
||||
.endm
|
||||
|
||||
.macro PFX_REX opd1 opd2 W=0
|
||||
.if ((\opd1 | \opd2) & 8) || \W
|
||||
.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro MODRM mod opd1 opd2
|
||||
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
|
||||
.endm
|
||||
|
||||
.macro PSHUFB_XMM xmm1 xmm2
|
||||
XMM_NUM pshufb_opd1 \xmm1
|
||||
XMM_NUM pshufb_opd2 \xmm2
|
||||
PFX_OPD_SIZE
|
||||
PFX_REX pshufb_opd1 pshufb_opd2
|
||||
.byte 0x0f, 0x38, 0x00
|
||||
MODRM 0xc0 pshufb_opd1 pshufb_opd2
|
||||
.endm
|
||||
|
||||
.macro PCLMULQDQ imm8 xmm1 xmm2
|
||||
XMM_NUM clmul_opd1 \xmm1
|
||||
XMM_NUM clmul_opd2 \xmm2
|
||||
PFX_OPD_SIZE
|
||||
PFX_REX clmul_opd1 clmul_opd2
|
||||
.byte 0x0f, 0x3a, 0x44
|
||||
MODRM 0xc0 clmul_opd1 clmul_opd2
|
||||
.byte \imm8
|
||||
.endm
|
||||
|
||||
.macro PEXTRD imm8 xmm gpr
|
||||
R32_NUM extrd_opd1 \gpr
|
||||
XMM_NUM extrd_opd2 \xmm
|
||||
PFX_OPD_SIZE
|
||||
PFX_REX extrd_opd1 extrd_opd2
|
||||
.byte 0x0f, 0x3a, 0x16
|
||||
MODRM 0xc0 extrd_opd1 extrd_opd2
|
||||
.byte \imm8
|
||||
.endm
|
||||
|
||||
.macro MOVQ_R64_XMM opd1 opd2
|
||||
TYPE movq_r64_xmm_opd1_type \opd1
|
||||
.if movq_r64_xmm_opd1_type == TYPE_XMM
|
||||
XMM_NUM movq_r64_xmm_opd1 \opd1
|
||||
R64_NUM movq_r64_xmm_opd2 \opd2
|
||||
.else
|
||||
R64_NUM movq_r64_xmm_opd1 \opd1
|
||||
XMM_NUM movq_r64_xmm_opd2 \opd2
|
||||
.endif
|
||||
PFX_OPD_SIZE
|
||||
PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
|
||||
.if movq_r64_xmm_opd1_type == TYPE_XMM
|
||||
.byte 0x0f, 0x7e
|
||||
.else
|
||||
.byte 0x0f, 0x6e
|
||||
.endif
|
||||
MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
|
||||
.endm
|
||||
@@ -1,85 +0,0 @@
|
||||
/*
|
||||
* Mini Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// +build amd64
|
||||
|
||||
package sha1
|
||||
|
||||
// #include <stdio.h>
|
||||
// #include <stdint.h>
|
||||
// void sha1_transform(int32_t *hash, const char* input, size_t num_blocks);
|
||||
import "C"
|
||||
import (
|
||||
gosha1 "crypto/sha1"
|
||||
"io"
|
||||
)
|
||||
|
||||
/*
|
||||
const (
|
||||
SHA1_BLOCKSIZE = 64
|
||||
SHA1_DIGESTSIZE = 20
|
||||
)
|
||||
|
||||
|
||||
func Sha1(buffer []byte) ([]int32, error) {
|
||||
if cpu.HasAVX2() {
|
||||
var shbuf []int32
|
||||
var cbuffer *C.char
|
||||
|
||||
shbuf = make([]int32, SHA1_DIGESTSIZE)
|
||||
var length = len(buffer)
|
||||
if length == 0 {
|
||||
return []int32{0}, errors.New("Invalid input")
|
||||
}
|
||||
|
||||
rem := length % SHA1_BLOCKSIZE
|
||||
padded_len := length
|
||||
|
||||
if rem > 0 {
|
||||
padded_len = length + (SHA1_BLOCKSIZE - rem)
|
||||
}
|
||||
|
||||
rounds := padded_len / SHA1_BLOCKSIZE
|
||||
pad := padded_len - length
|
||||
if pad > 0 {
|
||||
s := make([]byte, pad)
|
||||
// Expand with new padded blocks to the byte array
|
||||
buffer = append(buffer, s...)
|
||||
}
|
||||
|
||||
cshbuf := (*C.int32_t)(unsafe.Pointer(&shbuf[0]))
|
||||
cbuffer = (*C.char)(unsafe.Pointer(&buffer[0]))
|
||||
C.sha1_transform(cshbuf, cbuffer, C.size_t(rounds))
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
}
|
||||
*/
|
||||
func Sum(reader io.Reader) ([]byte, error) {
|
||||
hash := gosha1.New()
|
||||
var err error
|
||||
for err == nil {
|
||||
length := 0
|
||||
byteBuffer := make([]byte, 1024*1024)
|
||||
length, err = reader.Read(byteBuffer)
|
||||
byteBuffer = byteBuffer[0:length]
|
||||
hash.Write(byteBuffer)
|
||||
}
|
||||
if err != io.EOF {
|
||||
return nil, err
|
||||
}
|
||||
return hash.Sum(nil), nil
|
||||
}
|
||||
@@ -1,702 +0,0 @@
|
||||
/*
|
||||
* Implement fast SHA-1 with AVX2 instructions. (x86_64)
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Ilya Albrekht <ilya.albrekht@intel.com>
|
||||
* Maxim Locktyukhin <maxim.locktyukhin@intel.com>
|
||||
* Ronen Zohar <ronen.zohar@intel.com>
|
||||
* Chandramouli Narayanan <mouli@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
|
||||
*
|
||||
*This implementation is based on the previous SSSE3 release:
|
||||
*Visit http://software.intel.com/en-us/articles/
|
||||
*and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
|
||||
*
|
||||
*Updates 20-byte SHA-1 record in 'hash' for even number of
|
||||
*'num_blocks' consecutive 64-byte blocks
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef HAS_AVX2
|
||||
#include "asm.S"
|
||||
|
||||
#define CTX %rdi /* arg1 */
|
||||
#define BUF %rsi /* arg2 */
|
||||
#define CNT %rdx /* arg3 */
|
||||
|
||||
#define REG_A %ecx
|
||||
#define REG_B %esi
|
||||
#define REG_C %edi
|
||||
#define REG_D %eax
|
||||
#define REG_E %edx
|
||||
#define REG_TB %ebx
|
||||
#define REG_TA %r12d
|
||||
#define REG_RA %rcx
|
||||
#define REG_RB %rsi
|
||||
#define REG_RC %rdi
|
||||
#define REG_RD %rax
|
||||
#define REG_RE %rdx
|
||||
#define REG_RTA %r12
|
||||
#define REG_RTB %rbx
|
||||
#define REG_T1 %ebp
|
||||
#define xmm_mov vmovups
|
||||
#define avx2_zeroupper vzeroupper
|
||||
#define RND_F1 1
|
||||
#define RND_F2 2
|
||||
#define RND_F3 3
|
||||
|
||||
.macro REGALLOC
|
||||
.set A, REG_A
|
||||
.set B, REG_B
|
||||
.set C, REG_C
|
||||
.set D, REG_D
|
||||
.set E, REG_E
|
||||
.set TB, REG_TB
|
||||
.set TA, REG_TA
|
||||
|
||||
.set RA, REG_RA
|
||||
.set RB, REG_RB
|
||||
.set RC, REG_RC
|
||||
.set RD, REG_RD
|
||||
.set RE, REG_RE
|
||||
|
||||
.set RTA, REG_RTA
|
||||
.set RTB, REG_RTB
|
||||
|
||||
.set T1, REG_T1
|
||||
.endm
|
||||
|
||||
#define K_BASE %r8
|
||||
#define HASH_PTR %r9
|
||||
#define BUFFER_PTR %r10
|
||||
#define BUFFER_PTR2 %r13
|
||||
#define BUFFER_END %r11
|
||||
|
||||
#define PRECALC_BUF %r14
|
||||
#define WK_BUF %r15
|
||||
|
||||
#define W_TMP %xmm0
|
||||
#define WY_TMP %ymm0
|
||||
#define WY_TMP2 %ymm9
|
||||
|
||||
# AVX2 variables
|
||||
#define WY0 %ymm3
|
||||
#define WY4 %ymm5
|
||||
#define WY08 %ymm7
|
||||
#define WY12 %ymm8
|
||||
#define WY16 %ymm12
|
||||
#define WY20 %ymm13
|
||||
#define WY24 %ymm14
|
||||
#define WY28 %ymm15
|
||||
|
||||
#define YMM_SHUFB_BSWAP %ymm10
|
||||
|
||||
/*
|
||||
* Keep 2 iterations precalculated at a time:
|
||||
* - 80 DWORDs per iteration * 2
|
||||
*/
|
||||
#define W_SIZE (80*2*2 +16)
|
||||
|
||||
#define WK(t) ((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF)
|
||||
#define PRECALC_WK(t) ((t)*2*2)(PRECALC_BUF)
|
||||
|
||||
|
||||
.macro UPDATE_HASH hash, val
|
||||
add \hash, \val
|
||||
mov \val, \hash
|
||||
.endm
|
||||
|
||||
.macro PRECALC_RESET_WY
|
||||
.set WY_00, WY0
|
||||
.set WY_04, WY4
|
||||
.set WY_08, WY08
|
||||
.set WY_12, WY12
|
||||
.set WY_16, WY16
|
||||
.set WY_20, WY20
|
||||
.set WY_24, WY24
|
||||
.set WY_28, WY28
|
||||
.set WY_32, WY_00
|
||||
.endm
|
||||
|
||||
.macro PRECALC_ROTATE_WY
|
||||
/* Rotate macros */
|
||||
.set WY_32, WY_28
|
||||
.set WY_28, WY_24
|
||||
.set WY_24, WY_20
|
||||
.set WY_20, WY_16
|
||||
.set WY_16, WY_12
|
||||
.set WY_12, WY_08
|
||||
.set WY_08, WY_04
|
||||
.set WY_04, WY_00
|
||||
.set WY_00, WY_32
|
||||
|
||||
/* Define register aliases */
|
||||
.set WY, WY_00
|
||||
.set WY_minus_04, WY_04
|
||||
.set WY_minus_08, WY_08
|
||||
.set WY_minus_12, WY_12
|
||||
.set WY_minus_16, WY_16
|
||||
.set WY_minus_20, WY_20
|
||||
.set WY_minus_24, WY_24
|
||||
.set WY_minus_28, WY_28
|
||||
.set WY_minus_32, WY
|
||||
.endm
|
||||
|
||||
.macro PRECALC_00_15
|
||||
.if (i == 0) # Initialize and rotate registers
|
||||
PRECALC_RESET_WY
|
||||
PRECALC_ROTATE_WY
|
||||
.endif
|
||||
|
||||
/* message scheduling pre-compute for rounds 0-15 */
|
||||
.if ((i & 7) == 0)
|
||||
/*
|
||||
* blended AVX2 and ALU instruction scheduling
|
||||
* 1 vector iteration per 8 rounds
|
||||
*/
|
||||
vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
|
||||
.elseif ((i & 7) == 1)
|
||||
vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
|
||||
WY_TMP, WY_TMP
|
||||
.elseif ((i & 7) == 2)
|
||||
vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
|
||||
.elseif ((i & 7) == 4)
|
||||
vpaddd K_XMM(K_BASE), WY, WY_TMP
|
||||
.elseif ((i & 7) == 7)
|
||||
vmovdqu WY_TMP, PRECALC_WK(i&~7)
|
||||
|
||||
PRECALC_ROTATE_WY
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro PRECALC_16_31
|
||||
/*
|
||||
* message scheduling pre-compute for rounds 16-31
|
||||
* calculating last 32 w[i] values in 8 XMM registers
|
||||
* pre-calculate K+w[i] values and store to mem
|
||||
* for later load by ALU add instruction
|
||||
*
|
||||
* "brute force" vectorization for rounds 16-31 only
|
||||
* due to w[i]->w[i-3] dependency
|
||||
*/
|
||||
.if ((i & 7) == 0)
|
||||
/*
|
||||
* blended AVX2 and ALU instruction scheduling
|
||||
* 1 vector iteration per 8 rounds
|
||||
*/
|
||||
/* w[i-14] */
|
||||
vpalignr $8, WY_minus_16, WY_minus_12, WY
|
||||
vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */
|
||||
.elseif ((i & 7) == 1)
|
||||
vpxor WY_minus_08, WY, WY
|
||||
vpxor WY_minus_16, WY_TMP, WY_TMP
|
||||
.elseif ((i & 7) == 2)
|
||||
vpxor WY_TMP, WY, WY
|
||||
vpslldq $12, WY, WY_TMP2
|
||||
.elseif ((i & 7) == 3)
|
||||
vpslld $1, WY, WY_TMP
|
||||
vpsrld $31, WY, WY
|
||||
.elseif ((i & 7) == 4)
|
||||
vpor WY, WY_TMP, WY_TMP
|
||||
vpslld $2, WY_TMP2, WY
|
||||
.elseif ((i & 7) == 5)
|
||||
vpsrld $30, WY_TMP2, WY_TMP2
|
||||
vpxor WY, WY_TMP, WY_TMP
|
||||
.elseif ((i & 7) == 7)
|
||||
vpxor WY_TMP2, WY_TMP, WY
|
||||
vpaddd K_XMM(K_BASE), WY, WY_TMP
|
||||
vmovdqu WY_TMP, PRECALC_WK(i&~7)
|
||||
|
||||
PRECALC_ROTATE_WY
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro PRECALC_32_79
|
||||
/*
|
||||
* in SHA-1 specification:
|
||||
* w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1
|
||||
* instead we do equal:
|
||||
* w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
|
||||
* allows more efficient vectorization
|
||||
* since w[i]=>w[i-3] dependency is broken
|
||||
*/
|
||||
|
||||
.if ((i & 7) == 0)
|
||||
/*
|
||||
* blended AVX2 and ALU instruction scheduling
|
||||
* 1 vector iteration per 8 rounds
|
||||
*/
|
||||
vpalignr $8, WY_minus_08, WY_minus_04, WY_TMP
|
||||
.elseif ((i & 7) == 1)
|
||||
/* W is W_minus_32 before xor */
|
||||
vpxor WY_minus_28, WY, WY
|
||||
.elseif ((i & 7) == 2)
|
||||
vpxor WY_minus_16, WY_TMP, WY_TMP
|
||||
.elseif ((i & 7) == 3)
|
||||
vpxor WY_TMP, WY, WY
|
||||
.elseif ((i & 7) == 4)
|
||||
vpslld $2, WY, WY_TMP
|
||||
.elseif ((i & 7) == 5)
|
||||
vpsrld $30, WY, WY
|
||||
vpor WY, WY_TMP, WY
|
||||
.elseif ((i & 7) == 7)
|
||||
vpaddd K_XMM(K_BASE), WY, WY_TMP
|
||||
vmovdqu WY_TMP, PRECALC_WK(i&~7)
|
||||
|
||||
PRECALC_ROTATE_WY
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro PRECALC r, s
|
||||
.set i, \r
|
||||
|
||||
.if (i < 40)
|
||||
.set K_XMM, 32*0
|
||||
.elseif (i < 80)
|
||||
.set K_XMM, 32*1
|
||||
.elseif (i < 120)
|
||||
.set K_XMM, 32*2
|
||||
.else
|
||||
.set K_XMM, 32*3
|
||||
.endif
|
||||
|
||||
.if (i<32)
|
||||
PRECALC_00_15 \s
|
||||
.elseif (i<64)
|
||||
PRECALC_16_31 \s
|
||||
.elseif (i < 160)
|
||||
PRECALC_32_79 \s
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro ROTATE_STATE
|
||||
.set T_REG, E
|
||||
.set E, D
|
||||
.set D, C
|
||||
.set C, B
|
||||
.set B, TB
|
||||
.set TB, A
|
||||
.set A, T_REG
|
||||
|
||||
.set T_REG, RE
|
||||
.set RE, RD
|
||||
.set RD, RC
|
||||
.set RC, RB
|
||||
.set RB, RTB
|
||||
.set RTB, RA
|
||||
.set RA, T_REG
|
||||
.endm
|
||||
|
||||
/* Macro relies on saved ROUND_Fx */
|
||||
|
||||
.macro RND_FUN f, r
|
||||
.if (\f == RND_F1)
|
||||
ROUND_F1 \r
|
||||
.elseif (\f == RND_F2)
|
||||
ROUND_F2 \r
|
||||
.elseif (\f == RND_F3)
|
||||
ROUND_F3 \r
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro RR r
|
||||
.set round_id, (\r % 80)
|
||||
|
||||
.if (round_id == 0) /* Precalculate F for first round */
|
||||
.set ROUND_FUNC, RND_F1
|
||||
mov B, TB
|
||||
|
||||
rorx $(32-30), B, B /* b>>>2 */
|
||||
andn D, TB, T1
|
||||
and C, TB
|
||||
xor T1, TB
|
||||
.endif
|
||||
|
||||
RND_FUN ROUND_FUNC, \r
|
||||
ROTATE_STATE
|
||||
|
||||
.if (round_id == 18)
|
||||
.set ROUND_FUNC, RND_F2
|
||||
.elseif (round_id == 38)
|
||||
.set ROUND_FUNC, RND_F3
|
||||
.elseif (round_id == 58)
|
||||
.set ROUND_FUNC, RND_F2
|
||||
.endif
|
||||
|
||||
.set round_id, ( (\r+1) % 80)
|
||||
|
||||
RND_FUN ROUND_FUNC, (\r+1)
|
||||
ROTATE_STATE
|
||||
.endm
|
||||
|
||||
.macro ROUND_F1 r
|
||||
add WK(\r), E
|
||||
|
||||
andn C, A, T1 /* ~b&d */
|
||||
lea (RE,RTB), E /* Add F from the previous round */
|
||||
|
||||
rorx $(32-5), A, TA /* T2 = A >>> 5 */
|
||||
rorx $(32-30),A, TB /* b>>>2 for next round */
|
||||
|
||||
PRECALC (\r) /* msg scheduling for next 2 blocks */
|
||||
|
||||
/*
|
||||
* Calculate F for the next round
|
||||
* (b & c) ^ andn[b, d]
|
||||
*/
|
||||
and B, A /* b&c */
|
||||
xor T1, A /* F1 = (b&c) ^ (~b&d) */
|
||||
|
||||
lea (RE,RTA), E /* E += A >>> 5 */
|
||||
.endm
|
||||
|
||||
.macro ROUND_F2 r
|
||||
add WK(\r), E
|
||||
lea (RE,RTB), E /* Add F from the previous round */
|
||||
|
||||
/* Calculate F for the next round */
|
||||
rorx $(32-5), A, TA /* T2 = A >>> 5 */
|
||||
.if ((round_id) < 79)
|
||||
rorx $(32-30), A, TB /* b>>>2 for next round */
|
||||
.endif
|
||||
PRECALC (\r) /* msg scheduling for next 2 blocks */
|
||||
|
||||
.if ((round_id) < 79)
|
||||
xor B, A
|
||||
.endif
|
||||
|
||||
add TA, E /* E += A >>> 5 */
|
||||
|
||||
.if ((round_id) < 79)
|
||||
xor C, A
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro ROUND_F3 r
|
||||
add WK(\r), E
|
||||
PRECALC (\r) /* msg scheduling for next 2 blocks */
|
||||
|
||||
lea (RE,RTB), E /* Add F from the previous round */
|
||||
|
||||
mov B, T1
|
||||
or A, T1
|
||||
|
||||
rorx $(32-5), A, TA /* T2 = A >>> 5 */
|
||||
rorx $(32-30), A, TB /* b>>>2 for next round */
|
||||
|
||||
/* Calculate F for the next round
|
||||
* (b and c) or (d and (b or c))
|
||||
*/
|
||||
and C, T1
|
||||
and B, A
|
||||
or T1, A
|
||||
|
||||
add TA, E /* E += A >>> 5 */
|
||||
|
||||
.endm
|
||||
|
||||
/*
|
||||
* macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
|
||||
*/
|
||||
.macro SHA1_PIPELINED_MAIN_BODY
|
||||
|
||||
REGALLOC
|
||||
|
||||
mov (HASH_PTR), A
|
||||
mov 4(HASH_PTR), B
|
||||
mov 8(HASH_PTR), C
|
||||
mov 12(HASH_PTR), D
|
||||
mov 16(HASH_PTR), E
|
||||
|
||||
mov %rsp, PRECALC_BUF
|
||||
lea (2*4*80+32)(%rsp), WK_BUF
|
||||
|
||||
# Precalc WK for first 2 blocks
|
||||
PRECALC_OFFSET = 0
|
||||
.set i, 0
|
||||
.rept 160
|
||||
PRECALC i
|
||||
.set i, i + 1
|
||||
.endr
|
||||
PRECALC_OFFSET = 128
|
||||
xchg WK_BUF, PRECALC_BUF
|
||||
|
||||
.align 32
|
||||
_loop:
|
||||
/*
|
||||
* code loops through more than one block
|
||||
* we use K_BASE value as a signal of a last block,
|
||||
* it is set below by: cmovae BUFFER_PTR, K_BASE
|
||||
*/
|
||||
cmp K_BASE, BUFFER_PTR
|
||||
jne _begin
|
||||
.align 32
|
||||
jmp _end
|
||||
.align 32
|
||||
_begin:
|
||||
|
||||
/*
|
||||
* Do first block
|
||||
* rounds: 0,2,4,6,8
|
||||
*/
|
||||
.set j, 0
|
||||
.rept 5
|
||||
RR j
|
||||
.set j, j+2
|
||||
.endr
|
||||
|
||||
jmp _loop0
|
||||
_loop0:
|
||||
|
||||
/*
|
||||
* rounds:
|
||||
* 10,12,14,16,18
|
||||
* 20,22,24,26,28
|
||||
* 30,32,34,36,38
|
||||
* 40,42,44,46,48
|
||||
* 50,52,54,56,58
|
||||
*/
|
||||
.rept 25
|
||||
RR j
|
||||
.set j, j+2
|
||||
.endr
|
||||
|
||||
add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
|
||||
cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
|
||||
cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
|
||||
|
||||
/*
|
||||
* rounds
|
||||
* 60,62,64,66,68
|
||||
* 70,72,74,76,78
|
||||
*/
|
||||
.rept 10
|
||||
RR j
|
||||
.set j, j+2
|
||||
.endr
|
||||
|
||||
UPDATE_HASH (HASH_PTR), A
|
||||
UPDATE_HASH 4(HASH_PTR), TB
|
||||
UPDATE_HASH 8(HASH_PTR), C
|
||||
UPDATE_HASH 12(HASH_PTR), D
|
||||
UPDATE_HASH 16(HASH_PTR), E
|
||||
|
||||
cmp K_BASE, BUFFER_PTR /* is current block the last one? */
|
||||
je _loop
|
||||
|
||||
mov TB, B
|
||||
|
||||
/* Process second block */
|
||||
/*
|
||||
* rounds
|
||||
* 0+80, 2+80, 4+80, 6+80, 8+80
|
||||
* 10+80,12+80,14+80,16+80,18+80
|
||||
*/
|
||||
|
||||
.set j, 0
|
||||
.rept 10
|
||||
RR j+80
|
||||
.set j, j+2
|
||||
.endr
|
||||
|
||||
jmp _loop1
|
||||
_loop1:
|
||||
/*
|
||||
* rounds
|
||||
* 20+80,22+80,24+80,26+80,28+80
|
||||
* 30+80,32+80,34+80,36+80,38+80
|
||||
*/
|
||||
.rept 10
|
||||
RR j+80
|
||||
.set j, j+2
|
||||
.endr
|
||||
|
||||
jmp _loop2
|
||||
_loop2:
|
||||
|
||||
/*
|
||||
* rounds
|
||||
* 40+80,42+80,44+80,46+80,48+80
|
||||
* 50+80,52+80,54+80,56+80,58+80
|
||||
*/
|
||||
.rept 10
|
||||
RR j+80
|
||||
.set j, j+2
|
||||
.endr
|
||||
|
||||
add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
|
||||
|
||||
cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
|
||||
cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
|
||||
|
||||
jmp _loop3
|
||||
_loop3:
|
||||
|
||||
/*
|
||||
* rounds
|
||||
* 60+80,62+80,64+80,66+80,68+80
|
||||
* 70+80,72+80,74+80,76+80,78+80
|
||||
*/
|
||||
.rept 10
|
||||
RR j+80
|
||||
.set j, j+2
|
||||
.endr
|
||||
|
||||
UPDATE_HASH (HASH_PTR), A
|
||||
UPDATE_HASH 4(HASH_PTR), TB
|
||||
UPDATE_HASH 8(HASH_PTR), C
|
||||
UPDATE_HASH 12(HASH_PTR), D
|
||||
UPDATE_HASH 16(HASH_PTR), E
|
||||
|
||||
/* Reset state for AVX2 reg permutation */
|
||||
mov A, TA
|
||||
mov TB, A
|
||||
mov C, TB
|
||||
mov E, C
|
||||
mov D, B
|
||||
mov TA, D
|
||||
|
||||
REGALLOC
|
||||
|
||||
xchg WK_BUF, PRECALC_BUF
|
||||
|
||||
jmp _loop
|
||||
|
||||
.align 32
|
||||
_end:
|
||||
|
||||
.endm
|
||||
|
||||
.section .rodata
|
||||
|
||||
#define K1 0x5a827999
|
||||
#define K2 0x6ed9eba1
|
||||
#define K3 0x8f1bbcdc
|
||||
#define K4 0xca62c1d6
|
||||
|
||||
.align 128
|
||||
K_XMM_AR:
|
||||
.long K1, K1, K1, K1
|
||||
.long K1, K1, K1, K1
|
||||
.long K2, K2, K2, K2
|
||||
.long K2, K2, K2, K2
|
||||
.long K3, K3, K3, K3
|
||||
.long K3, K3, K3, K3
|
||||
.long K4, K4, K4, K4
|
||||
.long K4, K4, K4, K4
|
||||
|
||||
BSWAP_SHUFB_CTL:
|
||||
.long 0x00010203
|
||||
.long 0x04050607
|
||||
.long 0x08090a0b
|
||||
.long 0x0c0d0e0f
|
||||
.long 0x00010203
|
||||
.long 0x04050607
|
||||
.long 0x08090a0b
|
||||
.long 0x0c0d0e0f
|
||||
|
||||
# void sha1_transform(int32_t *hash, const char* input, size_t num_blocks) ;
|
||||
.text
|
||||
ENTRY(sha1_transform)
|
||||
|
||||
push %rbx
|
||||
push %rbp
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
RESERVE_STACK = (W_SIZE*4 + 8+24)
|
||||
|
||||
/* Align stack */
|
||||
mov %rsp, %rbx
|
||||
and $~(0x20-1), %rsp
|
||||
push %rbx
|
||||
sub $RESERVE_STACK, %rsp
|
||||
|
||||
avx2_zeroupper
|
||||
|
||||
lea K_XMM_AR(%rip), K_BASE
|
||||
|
||||
mov CTX, HASH_PTR
|
||||
mov BUF, BUFFER_PTR
|
||||
lea 64(BUF), BUFFER_PTR2
|
||||
|
||||
shl $6, CNT /* mul by 64 */
|
||||
add BUF, CNT
|
||||
add $64, CNT
|
||||
mov CNT, BUFFER_END
|
||||
|
||||
cmp BUFFER_END, BUFFER_PTR2
|
||||
cmovae K_BASE, BUFFER_PTR2
|
||||
|
||||
xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
|
||||
|
||||
SHA1_PIPELINED_MAIN_BODY
|
||||
|
||||
avx2_zeroupper
|
||||
|
||||
add $RESERVE_STACK, %rsp
|
||||
pop %rsp
|
||||
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
pop %rbp
|
||||
pop %rbx
|
||||
|
||||
ret
|
||||
|
||||
ENDPROC(sha1_transform)
|
||||
#endif
|
||||
@@ -1,23 +0,0 @@
|
||||
package sha1
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"testing"
|
||||
|
||||
. "gopkg.in/check.v1"
|
||||
)
|
||||
|
||||
func Test(t *testing.T) { TestingT(t) }
|
||||
|
||||
type MySuite struct{}
|
||||
|
||||
var _ = Suite(&MySuite{})
|
||||
|
||||
func (s *MySuite) TestStreamingSha1(c *C) {
|
||||
testString := []byte("Test string")
|
||||
expectedHash, _ := hex.DecodeString("18af819125b70879d36378431c4e8d9bfa6a2599")
|
||||
hash, err := Sum(bytes.NewBuffer(testString))
|
||||
c.Assert(err, IsNil)
|
||||
c.Assert(bytes.Equal(expectedHash, hash), Equals, true)
|
||||
}
|
||||
Reference in New Issue
Block a user